From aeab82ff306b12ae980f40109d9b0152f77b1f12 Mon Sep 17 00:00:00 2001 From: Dheemanth Date: Wed, 13 May 2026 15:13:18 -0700 Subject: [PATCH] CUDA 13.2 samples update (#432) - Added Python samples for CUDA Python 1.0 release - Renamed top-level `Samples` directory to `cpp` to accommodate Python samples. --- .pre-commit-config.yaml | 10 + CHANGELOG.md | 4 + CMakeLists.txt | 2 +- Common/helper_nvJPEG.hxx | 6 +- Common/helper_string.h | 96 +-- README.md | 52 +- .../UnifiedMemoryStreams/.vscode/launch.json | 10 - .../asyncAPI/.vscode/launch.json | 10 - .../0_Introduction/clock/.vscode/launch.json | 10 - .../clock_nvrtc/.vscode/launch.json | 10 - .../cudaOpenMP/.vscode/launch.json | 10 - .../fp16ScalarProduct/.vscode/launch.json | 10 - .../matrixMul/.vscode/launch.json | 10 - .../matrixMulDrv/.vscode/launch.json | 10 - .../matrixMulDynlinkJIT/.vscode/launch.json | 10 - .../matrixMul_nvrtc/.vscode/launch.json | 10 - .../mergeSort/.vscode/launch.json | 10 - .../simpleAWBarrier/.vscode/launch.json | 10 - .../simpleAssert/.vscode/launch.json | 10 - .../simpleAssert_nvrtc/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../simpleAttributes/.vscode/launch.json | 10 - .../simpleCUDA2GL/.vscode/launch.json | 10 - .../simpleCallback/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../simpleCubemapTexture/.vscode/launch.json | 10 - .../simpleDrvRuntime/.vscode/launch.json | 10 - .../simpleHyperQ/.vscode/launch.json | 10 - .../simpleIPC/.vscode/launch.json | 10 - .../simpleLayeredTexture/.vscode/launch.json | 10 - .../simpleMPI/.vscode/launch.json | 10 - .../simpleMultiCopy/.vscode/launch.json | 10 - .../simpleMultiGPU/.vscode/launch.json | 10 - .../simpleOccupancy/.vscode/launch.json | 10 - .../simpleP2P/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../simplePrintf/.vscode/launch.json | 10 - .../simpleStreams/.vscode/launch.json | 10 - .../simpleSurfaceWrite/.vscode/launch.json | 10 - .../simpleTemplates/.vscode/launch.json | 10 - .../simpleTexture/.vscode/launch.json | 10 - .../simpleTexture3D/.vscode/launch.json | 10 - .../simpleTextureDrv/.vscode/launch.json | 10 - .../simpleVoteIntrinsics/.vscode/launch.json | 10 - .../simpleZeroCopy/.vscode/launch.json | 10 - .../systemWideAtomics/.vscode/launch.json | 10 - .../template/.vscode/launch.json | 10 - .../vectorAdd/.vscode/launch.json | 10 - .../vectorAddDrv/.vscode/launch.json | 10 - .../vectorAddMMAP/.vscode/launch.json | 10 - .../vectorAdd_nvrtc/.vscode/launch.json | 10 - .../deviceQuery/.vscode/launch.json | 10 - .../deviceQueryDrv/.vscode/launch.json | 10 - .../topologyQuery/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../FunctionPointers/.vscode/launch.json | 10 - .../MC_EstimatePiInlineP/.vscode/launch.json | 10 - .../MC_EstimatePiInlineQ/.vscode/launch.json | 10 - .../MC_EstimatePiP/.vscode/launch.json | 10 - .../MC_EstimatePiQ/.vscode/launch.json | 10 - .../MC_SingleAsianOptionP/.vscode/launch.json | 10 - .../boxFilter/.vscode/launch.json | 10 - .../convolutionSeparable/.vscode/launch.json | 10 - .../convolutionTexture/.vscode/launch.json | 10 - .../dct8x8/.vscode/launch.json | 10 - .../eigenvalues/.vscode/launch.json | 10 - .../histogram/.vscode/launch.json | 10 - .../imageDenoising/.vscode/launch.json | 10 - .../inlinePTX/.vscode/launch.json | 10 - .../inlinePTX_nvrtc/.vscode/launch.json | 10 - .../interval/.vscode/launch.json | 10 - .../particles/.vscode/launch.json | 10 - .../radixSortThrust/.vscode/launch.json | 10 - .../reduction/.vscode/launch.json | 10 - .../reductionMultiBlockCG/.vscode/launch.json | 10 - .../scalarProd/.vscode/launch.json | 10 - .../scan/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../shfl_scan/.vscode/launch.json | 10 - .../sortingNetworks/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../threadFenceReduction/.vscode/launch.json | 10 - .../threadMigration/.vscode/launch.json | 10 - .../StreamPriorities/.vscode/launch.json | 10 - .../bf16TensorCoreGemm/.vscode/launch.json | 10 - .../binaryPartitionCG/.vscode/launch.json | 10 - .../bindlessTexture/.vscode/launch.json | 10 - .../cdpAdvancedQuicksort/.vscode/launch.json | 10 - .../cdpBezierTessellation/.vscode/launch.json | 10 - .../cdpQuadtree/.vscode/launch.json | 10 - .../cdpSimplePrint/.vscode/launch.json | 10 - .../cdpSimpleQuicksort/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cudaTensorCoreGemm/.vscode/launch.json | 10 - .../dmmaTensorCoreGemm/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../graphConditionalNodes/.vscode/launch.json | 10 - .../graphMemoryFootprint/.vscode/launch.json | 10 - .../graphMemoryNodes/.vscode/launch.json | 10 - .../immaTensorCoreGemm/.vscode/launch.json | 10 - .../jacobiCudaGraphs/.vscode/launch.json | 10 - .../memMapIPCDrv/.vscode/launch.json | 10 - .../newdelete/.vscode/launch.json | 10 - .../ptxjit/.vscode/launch.json | 10 - .../simpleCudaGraphs/.vscode/launch.json | 10 - .../tf32TensorCoreGemm/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../batchCUBLAS/.vscode/launch.json | 10 - .../boxFilterNPP/.vscode/launch.json | 10 - .../cannyEdgeDetectorNPP/.vscode/launch.json | 10 - .../conjugateGradient/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../conjugateGradientUM/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cuSolverRf/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cuSolverSp_LowlevelQR/.vscode/launch.json | 10 - .../cudaNvSci/.vscode/launch.json | 10 - .../freeImageInteropNPP/.vscode/launch.json | 10 - .../histEqualizationNPP/.vscode/launch.json | 10 - .../jitLto/.vscode/launch.json | 10 - .../lineOfSight/.vscode/launch.json | 10 - .../matrixMulCUBLAS/.vscode/launch.json | 10 - .../nvJPEG/.vscode/launch.json | 10 - .../nvJPEG_encoder/.vscode/launch.json | 10 - .../oceanFFT/.vscode/launch.json | 10 - .../randomFog/.vscode/launch.json | 10 - .../simpleCUBLAS/.vscode/launch.json | 10 - .../simpleCUBLASXT/.vscode/launch.json | 10 - .../simpleCUBLAS_LU/.vscode/launch.json | 10 - .../simpleCUFFT/.vscode/launch.json | 10 - .../simpleCUFFT_2d_MGPU/.vscode/launch.json | 10 - .../simpleCUFFT_MGPU/.vscode/launch.json | 10 - .../simpleCUFFT_callback/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../BlackScholes/.vscode/launch.json | 10 - .../BlackScholes_nvrtc/.vscode/launch.json | 10 - .../FDTD3d/.vscode/launch.json | 10 - .../HSOpticalFlow/.vscode/launch.json | 10 - .../Mandelbrot/.vscode/launch.json | 10 - .../MonteCarloMultiGPU/.vscode/launch.json | 10 - .../NV12toBGRandResize/.vscode/launch.json | 10 - .../SobelFilter/.vscode/launch.json | 10 - .../SobolQRNG/.vscode/launch.json | 10 - .../bicubicTexture/.vscode/launch.json | 10 - .../bilateralFilter/.vscode/launch.json | 10 - .../binomialOptions/.vscode/launch.json | 10 - .../binomialOptions_nvrtc/.vscode/launch.json | 10 - .../convolutionFFT2D/.vscode/launch.json | 10 - .../dwtHaar1D/.vscode/launch.json | 10 - .../dxtc/.vscode/launch.json | 10 - .../fastWalshTransform/.vscode/launch.json | 10 - .../fluidsGL/.vscode/launch.json | 10 - .../marchingCubes/.vscode/launch.json | 10 - .../nbody/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../postProcessGL/.vscode/launch.json | 10 - .../quasirandomGenerator/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../recursiveGaussian/.vscode/launch.json | 10 - .../simpleGL/.vscode/launch.json | 10 - .../simpleVulkan/.vscode/launch.json | 10 - .../simpleVulkanMMAP/.vscode/launch.json | 10 - .../smokeParticles/.vscode/launch.json | 10 - .../stereoDisparity/.vscode/launch.json | 10 - .../volumeFiltering/.vscode/launch.json | 10 - .../volumeRender/.vscode/launch.json | 10 - .../vulkanImageCUDA/.vscode/launch.json | 10 - .../LargeKernelParameter/.vscode/launch.json | 10 - .../UnifiedMemoryPerf/.vscode/launch.json | 10 - .../alignedTypes/.vscode/launch.json | 10 - .../cudaGraphsPerfScaling/.vscode/launch.json | 10 - .../transpose/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cuDLAErrorReporting/.vscode/launch.json | 10 - .../Tegra/cuDLAHybridMode/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cuDLAStandaloneMode/.vscode/launch.json | 10 - .../.vscode/launch.json | 10 - .../cudaNvSciNvMedia/.vscode/launch.json | 10 - .../Tegra/fluidsGLES/.vscode/launch.json | 10 - .../Tegra/nbody_opengles/.vscode/launch.json | 10 - .../Tegra/simpleGLES/.vscode/launch.json | 10 - .../simpleGLES_EGLOutput/.vscode/launch.json | 10 - .../toolchains/toolchain-aarch64-linux.cmake | 5 +- .../0_Introduction/CMakeLists.txt | 0 {Samples => cpp}/0_Introduction/README.md | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../UnifiedMemoryStreams/CMakeLists.txt | 12 +- .../UnifiedMemoryStreams/README.md | 0 .../UnifiedMemoryStreams.cu | 0 .../asyncAPI/.vscode/c_cpp_properties.json | 0 .../asyncAPI/.vscode/extensions.json | 0 .../0_Introduction/asyncAPI/CMakeLists.txt | 0 .../0_Introduction/asyncAPI/README.md | 0 .../0_Introduction/asyncAPI/asyncAPI.cu | 0 .../clock/.vscode/c_cpp_properties.json | 0 .../clock/.vscode/extensions.json | 0 .../0_Introduction/clock/CMakeLists.txt | 0 .../0_Introduction/clock/README.md | 0 .../0_Introduction/clock/clock.cu | 0 .../clock_nvrtc/.vscode/c_cpp_properties.json | 0 .../clock_nvrtc/.vscode/extensions.json | 0 .../0_Introduction/clock_nvrtc/CMakeLists.txt | 0 .../0_Introduction/clock_nvrtc/README.md | 0 .../0_Introduction/clock_nvrtc/clock.cpp | 0 .../clock_nvrtc/clock_kernel.cu | 0 .../cudaOpenMP/.vscode/c_cpp_properties.json | 0 .../cudaOpenMP/.vscode/extensions.json | 0 .../0_Introduction/cudaOpenMP/CMakeLists.txt | 0 .../0_Introduction/cudaOpenMP/README.md | 0 .../0_Introduction/cudaOpenMP/cudaOpenMP.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../fp16ScalarProduct/.vscode/extensions.json | 0 .../fp16ScalarProduct/CMakeLists.txt | 0 .../fp16ScalarProduct/README.md | 0 .../fp16ScalarProduct/fp16ScalarProduct.cu | 0 .../matrixMul/.vscode/c_cpp_properties.json | 0 .../matrixMul/.vscode/extensions.json | 0 .../0_Introduction/matrixMul/CMakeLists.txt | 0 .../0_Introduction/matrixMul/README.md | 0 .../0_Introduction/matrixMul/matrixMul.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../matrixMulDrv/.vscode/extensions.json | 0 .../matrixMulDrv/CMakeLists.txt | 0 .../0_Introduction/matrixMulDrv/README.md | 0 .../0_Introduction/matrixMulDrv/matrixMul.h | 0 .../matrixMulDrv/matrixMulDrv.cpp | 0 .../matrixMulDrv/matrixMul_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../matrixMulDynlinkJIT/CMakeLists.txt | 0 .../matrixMulDynlinkJIT/README.md | 0 .../matrixMulDynlinkJIT/cuda_drvapi_dynlink.c | 0 .../matrixMulDynlinkJIT/cuda_drvapi_dynlink.h | 0 .../cuda_drvapi_dynlink_cuda.h | 0 .../matrixMulDynlinkJIT/extras/README.TXT | 0 .../extras/matrixMul_kernel_32.ptx | 0 .../extras/matrixMul_kernel_64.ptx | 0 .../matrixMulDynlinkJIT/extras/ptx2c.py | 0 .../matrixMulDynlinkJIT/helper_cuda_drvapi.h | 0 .../matrixMulDynlinkJIT/matrixMul.h | 0 .../matrixMulDynlinkJIT.cpp | 0 .../matrixMulDynlinkJIT/matrixMul_gold.cpp | 0 .../matrixMul_kernel_32_ptxdump.c | 0 .../matrixMul_kernel_32_ptxdump.h | 0 .../matrixMul_kernel_64_ptxdump.c | 0 .../matrixMul_kernel_64_ptxdump.h | 0 .../.vscode/c_cpp_properties.json | 0 .../matrixMul_nvrtc/.vscode/extensions.json | 0 .../matrixMul_nvrtc/CMakeLists.txt | 0 .../0_Introduction/matrixMul_nvrtc/README.md | 0 .../matrixMul_nvrtc/matrixMul.cpp | 0 .../matrixMul_nvrtc/matrixMul_kernel.cu | 0 .../mergeSort/.vscode/c_cpp_properties.json | 0 .../mergeSort/.vscode/extensions.json | 0 .../0_Introduction/mergeSort/CMakeLists.txt | 0 .../0_Introduction/mergeSort/README.md | 0 .../0_Introduction/mergeSort/bitonic.cu | 0 .../0_Introduction/mergeSort/main.cpp | 0 .../0_Introduction/mergeSort/mergeSort.cu | 0 .../mergeSort/mergeSort_common.h | 0 .../mergeSort/mergeSort_host.cpp | 0 .../mergeSort/mergeSort_validate.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleAWBarrier/.vscode/extensions.json | 0 .../simpleAWBarrier/CMakeLists.txt | 0 .../0_Introduction/simpleAWBarrier/README.md | 0 .../simpleAWBarrier/simpleAWBarrier.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleAssert/.vscode/extensions.json | 0 .../simpleAssert/CMakeLists.txt | 0 .../0_Introduction/simpleAssert/README.md | 0 .../simpleAssert/simpleAssert.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleAssert_nvrtc/CMakeLists.txt | 0 .../simpleAssert_nvrtc/README.md | 0 .../simpleAssert_nvrtc/simpleAssert.cpp | 0 .../simpleAssert_nvrtc/simpleAssert_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleAtomicIntrinsics/CMakeLists.txt | 0 .../simpleAtomicIntrinsics/README.md | 0 .../simpleAtomicIntrinsics.cu | 0 .../simpleAtomicIntrinsics_cpu.cpp | 0 .../simpleAtomicIntrinsics_kernel.cuh | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../simpleAtomicIntrinsics_nvrtc/README.md | 0 .../simpleAtomicIntrinsics.cpp | 0 .../simpleAtomicIntrinsics_cpu.cpp | 0 .../simpleAtomicIntrinsics_kernel.cuh | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleAttributes/.vscode/extensions.json | 0 .../simpleAttributes/CMakeLists.txt | 0 .../0_Introduction/simpleAttributes/README.md | 0 .../simpleAttributes/simpleAttributes.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCUDA2GL/.vscode/extensions.json | 0 .../simpleCUDA2GL/CMakeLists.txt | 0 .../0_Introduction/simpleCUDA2GL/README.md | 0 .../simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm | Bin .../0_Introduction/simpleCUDA2GL/main.cpp | 0 .../simpleCUDA2GL/simpleCUDA2GL.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCallback/.vscode/extensions.json | 0 .../simpleCallback/CMakeLists.txt | 0 .../0_Introduction/simpleCallback/README.md | 0 .../simpleCallback/multithreading.cpp | 0 .../simpleCallback/multithreading.h | 0 .../simpleCallback/simpleCallback.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleCooperativeGroups/CMakeLists.txt | 0 .../simpleCooperativeGroups/README.md | 0 .../simpleCooperativeGroups.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleCubemapTexture/CMakeLists.txt | 0 .../simpleCubemapTexture/README.md | 0 .../simpleCubemapTexture.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleDrvRuntime/.vscode/extensions.json | 0 .../simpleDrvRuntime/CMakeLists.txt | 0 .../0_Introduction/simpleDrvRuntime/README.md | 0 .../simpleDrvRuntime/simpleDrvRuntime.cpp | 0 .../simpleDrvRuntime/vectorAdd_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleHyperQ/.vscode/extensions.json | 0 .../simpleHyperQ/CMakeLists.txt | 0 .../0_Introduction/simpleHyperQ/README.md | 0 .../simpleHyperQ/doc/HyperQ.docx | Bin .../simpleHyperQ/doc/HyperQ.pdf | Bin .../simpleHyperQ/simpleHyperQ.cu | 0 .../simpleIPC/.vscode/c_cpp_properties.json | 0 .../simpleIPC/.vscode/extensions.json | 0 .../0_Introduction/simpleIPC/CMakeLists.txt | 0 .../0_Introduction/simpleIPC/README.md | 0 .../0_Introduction/simpleIPC/simpleIPC.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleLayeredTexture/CMakeLists.txt | 0 .../simpleLayeredTexture/README.md | 0 .../simpleLayeredTexture.cu | 0 .../simpleMPI/.vscode/c_cpp_properties.json | 0 .../simpleMPI/.vscode/extensions.json | 0 .../0_Introduction/simpleMPI/CMakeLists.txt | 0 .../0_Introduction/simpleMPI/README.md | 0 .../0_Introduction/simpleMPI/simpleMPI.cpp | 0 .../0_Introduction/simpleMPI/simpleMPI.cu | 0 .../0_Introduction/simpleMPI/simpleMPI.h | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleMultiCopy/.vscode/extensions.json | 0 .../simpleMultiCopy/CMakeLists.txt | 0 .../0_Introduction/simpleMultiCopy/README.md | 0 .../simpleMultiCopy/simpleMultiCopy.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleMultiGPU/.vscode/extensions.json | 0 .../simpleMultiGPU/CMakeLists.txt | 0 .../0_Introduction/simpleMultiGPU/README.md | 0 .../simpleMultiGPU/simpleMultiGPU.cu | 0 .../simpleMultiGPU/simpleMultiGPU.h | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleOccupancy/.vscode/extensions.json | 0 .../simpleOccupancy/CMakeLists.txt | 0 .../0_Introduction/simpleOccupancy/README.md | 0 .../simpleOccupancy/simpleOccupancy.cu | 0 .../simpleP2P/.vscode/c_cpp_properties.json | 0 .../simpleP2P/.vscode/extensions.json | 0 .../0_Introduction/simpleP2P/CMakeLists.txt | 0 .../0_Introduction/simpleP2P/README.md | 0 .../0_Introduction/simpleP2P/simpleP2P.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simplePitchLinearTexture/CMakeLists.txt | 0 .../simplePitchLinearTexture/README.md | 0 .../simplePitchLinearTexture.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simplePrintf/.vscode/extensions.json | 0 .../simplePrintf/CMakeLists.txt | 0 .../0_Introduction/simplePrintf/README.md | 0 .../simplePrintf/simplePrintf.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleStreams/.vscode/extensions.json | 0 .../simpleStreams/CMakeLists.txt | 0 .../0_Introduction/simpleStreams/README.md | 0 .../simpleStreams/simpleStreams.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleSurfaceWrite/CMakeLists.txt | 0 .../simpleSurfaceWrite/README.md | 0 .../simpleSurfaceWrite/data/ref_rotated.pgm | Bin .../simpleSurfaceWrite/data/teapot512.pgm | Bin .../simpleSurfaceWrite/simpleSurfaceWrite.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleTemplates/.vscode/extensions.json | 0 .../simpleTemplates/CMakeLists.txt | 0 .../0_Introduction/simpleTemplates/README.md | 0 .../simpleTemplates/sharedmem.cuh | 0 .../simpleTemplates/simpleTemplates.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleTexture/.vscode/extensions.json | 0 .../simpleTexture/CMakeLists.txt | 0 .../0_Introduction/simpleTexture/README.md | 0 .../simpleTexture/data/ref_rotated.pgm | Bin .../simpleTexture/data/teapot512.pgm | Bin .../simpleTexture/data/teapot512_out.pgm | Bin .../simpleTexture/simpleTexture.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleTexture3D/.vscode/extensions.json | 0 .../simpleTexture3D/CMakeLists.txt | 0 .../0_Introduction/simpleTexture3D/README.md | 0 .../simpleTexture3D/data/Bucky.raw | Bin .../simpleTexture3D/data/ref_texture3D.bin | Bin .../simpleTexture3D/doc/sshot_lg.JPG | Bin .../simpleTexture3D/doc/sshot_md.JPG | Bin .../simpleTexture3D/doc/sshot_sm.JPG | Bin .../simpleTexture3D/simpleTexture3D.cpp | 0 .../simpleTexture3D/simpleTexture3D_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleTextureDrv/.vscode/extensions.json | 0 .../simpleTextureDrv/CMakeLists.txt | 0 .../0_Introduction/simpleTextureDrv/README.md | 0 .../simpleTextureDrv/data/ref_rotated.pgm | Bin .../simpleTextureDrv/data/teapot512.pgm | Bin .../simpleTextureDrv/data/teapot512_out.pgm | Bin .../simpleTextureDrv/simpleTextureDrv.cpp | 0 .../simpleTextureDrv/simpleTexture_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleVoteIntrinsics/CMakeLists.txt | 0 .../simpleVoteIntrinsics/README.md | 0 .../simpleVoteIntrinsics.cu | 0 .../simpleVote_kernel.cuh | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleZeroCopy/.vscode/extensions.json | 0 .../simpleZeroCopy/CMakeLists.txt | 0 .../0_Introduction/simpleZeroCopy/README.md | 0 .../simpleZeroCopy/simpleZeroCopy.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../systemWideAtomics/.vscode/extensions.json | 0 .../systemWideAtomics/CMakeLists.txt | 0 .../systemWideAtomics/README.md | 0 .../systemWideAtomics/systemWideAtomics.cu | 0 .../template/.vscode/c_cpp_properties.json | 0 .../template/.vscode/extensions.json | 0 .../0_Introduction/template/CMakeLists.txt | 0 .../0_Introduction/template/README.md | 0 .../0_Introduction/template/template.cu | 0 .../0_Introduction/template/template_cpu.cpp | 0 .../vectorAdd/.vscode/c_cpp_properties.json | 0 .../vectorAdd/.vscode/extensions.json | 0 .../0_Introduction/vectorAdd/CMakeLists.txt | 0 .../0_Introduction/vectorAdd/README.md | 0 .../0_Introduction/vectorAdd/vectorAdd.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../vectorAddDrv/.vscode/extensions.json | 0 .../vectorAddDrv/CMakeLists.txt | 0 .../0_Introduction/vectorAddDrv/README.md | 0 .../vectorAddDrv/vectorAddDrv.cpp | 0 .../vectorAddDrv/vectorAdd_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../vectorAddMMAP/.vscode/extensions.json | 0 .../vectorAddMMAP/CMakeLists.txt | 0 .../0_Introduction/vectorAddMMAP/README.md | 0 .../vectorAddMMAP/multidevicealloc_memmap.cpp | 0 .../vectorAddMMAP/multidevicealloc_memmap.hpp | 0 .../vectorAddMMAP/vectorAddMMAP.cpp | 0 .../vectorAddMMAP/vectorAdd_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../vectorAdd_nvrtc/.vscode/extensions.json | 0 .../vectorAdd_nvrtc/CMakeLists.txt | 0 .../0_Introduction/vectorAdd_nvrtc/README.md | 0 .../vectorAdd_nvrtc/vectorAdd.cpp | 0 .../vectorAdd_nvrtc/vectorAdd_kernel.cu | 0 {Samples => cpp}/1_Utilities/CMakeLists.txt | 0 {Samples => cpp}/1_Utilities/README.md | 0 .../deviceQuery/.vscode/c_cpp_properties.json | 0 .../deviceQuery/.vscode/extensions.json | 0 .../1_Utilities/deviceQuery/CMakeLists.txt | 0 .../1_Utilities/deviceQuery/README.md | 0 .../1_Utilities/deviceQuery/deviceQuery.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../deviceQueryDrv/.vscode/extensions.json | 0 .../1_Utilities/deviceQueryDrv/CMakeLists.txt | 0 .../1_Utilities/deviceQueryDrv/README.md | 0 .../deviceQueryDrv/deviceQueryDrv.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../topologyQuery/.vscode/extensions.json | 0 .../1_Utilities/topologyQuery/CMakeLists.txt | 0 .../1_Utilities/topologyQuery/README.md | 0 .../topologyQuery/topologyQuery.cu | 0 .../2_Concepts_and_Techniques/CMakeLists.txt | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../EGLStream_CUDA_CrossGPU/CMakeLists.txt | 0 .../EGLStream_CUDA_CrossGPU/README.md | 0 .../EGLStream_CUDA_CrossGPU/cuda_consumer.cpp | 0 .../EGLStream_CUDA_CrossGPU/cuda_consumer.h | 0 .../EGLStream_CUDA_CrossGPU/cuda_producer.cpp | 0 .../EGLStream_CUDA_CrossGPU/cuda_producer.h | 0 .../eglstrm_common.cpp | 0 .../EGLStream_CUDA_CrossGPU/eglstrm_common.h | 0 .../EGLStream_CUDA_CrossGPU/helper.h | 0 .../EGLStream_CUDA_CrossGPU/kernel.cu | 0 .../EGLStream_CUDA_CrossGPU/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../EGLStream_CUDA_Interop/CMakeLists.txt | 0 .../EGLStream_CUDA_Interop/README.md | 0 .../EGLStream_CUDA_Interop/cuda_consumer.cpp | 0 .../EGLStream_CUDA_Interop/cuda_consumer.h | 0 .../EGLStream_CUDA_Interop/cuda_f_1.yuv | Bin .../EGLStream_CUDA_Interop/cuda_f_2.yuv | Bin .../EGLStream_CUDA_Interop/cuda_producer.cpp | 0 .../EGLStream_CUDA_Interop/cuda_producer.h | 0 .../EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv | Bin .../EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv | Bin .../EGLStream_CUDA_Interop/eglstrm_common.cpp | 0 .../EGLStream_CUDA_Interop/eglstrm_common.h | 0 .../EGLStream_CUDA_Interop/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../FunctionPointers/.vscode/extensions.json | 0 .../FunctionPointers/CMakeLists.txt | 0 .../FunctionPointers/FunctionPointers.cpp | 0 .../FunctionPointers_kernels.cu | 0 .../FunctionPointers_kernels.h | 0 .../FunctionPointers/README.md | 0 .../FunctionPointers/data/ref_orig.pgm | Bin .../FunctionPointers/data/ref_shared.pgm | Bin .../FunctionPointers/data/ref_tex.pgm | Bin .../FunctionPointers/data/teapot512.pgm | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../MC_EstimatePiInlineP/CMakeLists.txt | 0 .../MC_EstimatePiInlineP/README.md | 0 .../MC_EstimatePiInlineP/inc/cudasharedmem.h | 0 .../MC_EstimatePiInlineP/inc/piestimator.h | 0 .../MC_EstimatePiInlineP/inc/test.h | 0 .../MC_EstimatePiInlineP/src/main.cpp | 0 .../MC_EstimatePiInlineP/src/piestimator.cu | 0 .../MC_EstimatePiInlineP/src/test.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../MC_EstimatePiInlineQ/CMakeLists.txt | 0 .../MC_EstimatePiInlineQ/README.md | 0 .../MC_EstimatePiInlineQ/inc/cudasharedmem.h | 0 .../MC_EstimatePiInlineQ/inc/piestimator.h | 0 .../MC_EstimatePiInlineQ/inc/test.h | 0 .../MC_EstimatePiInlineQ/src/main.cpp | 0 .../MC_EstimatePiInlineQ/src/piestimator.cu | 0 .../MC_EstimatePiInlineQ/src/test.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../MC_EstimatePiP/.vscode/extensions.json | 0 .../MC_EstimatePiP/CMakeLists.txt | 0 .../MC_EstimatePiP/README.md | 0 .../MC_EstimatePiP/inc/cudasharedmem.h | 0 .../MC_EstimatePiP/inc/piestimator.h | 0 .../MC_EstimatePiP/inc/test.h | 0 .../MC_EstimatePiP/src/main.cpp | 0 .../MC_EstimatePiP/src/piestimator.cu | 0 .../MC_EstimatePiP/src/test.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../MC_EstimatePiQ/.vscode/extensions.json | 0 .../MC_EstimatePiQ/CMakeLists.txt | 0 .../MC_EstimatePiQ/README.md | 0 .../MC_EstimatePiQ/inc/cudasharedmem.h | 0 .../MC_EstimatePiQ/inc/piestimator.h | 0 .../MC_EstimatePiQ/inc/test.h | 0 .../MC_EstimatePiQ/src/main.cpp | 0 .../MC_EstimatePiQ/src/piestimator.cu | 0 .../MC_EstimatePiQ/src/test.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../MC_SingleAsianOptionP/CMakeLists.txt | 0 .../MC_SingleAsianOptionP/README.md | 0 .../MC_SingleAsianOptionP/inc/asianoption.h | 0 .../MC_SingleAsianOptionP/inc/cudasharedmem.h | 0 .../MC_SingleAsianOptionP/inc/pricingengine.h | 0 .../MC_SingleAsianOptionP/inc/test.h | 0 .../MC_SingleAsianOptionP/src/main.cpp | 0 .../src/pricingengine.cu | 0 .../MC_SingleAsianOptionP/src/test.cpp | 0 .../2_Concepts_and_Techniques/README.md | 0 .../boxFilter/.vscode/c_cpp_properties.json | 0 .../boxFilter/.vscode/extensions.json | 0 .../boxFilter/CMakeLists.txt | 0 .../boxFilter/README.md | 0 .../boxFilter/boxFilter.cpp | 0 .../boxFilter/boxFilter_cpu.cpp | 0 .../boxFilter/boxFilter_kernel.cu | 0 .../boxFilter/data/ref_14.ppm | Bin .../boxFilter/data/ref_22.ppm | Bin .../boxFilter/data/teapot1024.ppm | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../convolutionSeparable/CMakeLists.txt | 0 .../convolutionSeparable/README.md | 0 .../convolutionSeparable.cu | 0 .../convolutionSeparable_common.h | 0 .../convolutionSeparable_gold.cpp | 0 .../doc/convolutionSeparable.doc | Bin .../doc/convolutionSeparable.pdf | Bin .../doc/convolutionSeparable.vsd | Bin .../convolutionSeparable/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../convolutionTexture/CMakeLists.txt | 0 .../convolutionTexture/README.md | 0 .../convolutionTexture/convolutionTexture.cu | 0 .../convolutionTexture_common.h | 0 .../convolutionTexture_gold.cpp | 0 .../convolutionTexture/doc/Performance.xls | Bin .../convolutionTexture/main.cpp | 0 .../dct8x8/.vscode/c_cpp_properties.json | 0 .../dct8x8/.vscode/extensions.json | 0 .../dct8x8/BmpUtil.cpp | 0 .../dct8x8/BmpUtil.h | 0 .../dct8x8/CMakeLists.txt | 0 .../2_Concepts_and_Techniques/dct8x8/Common.h | 0 .../dct8x8/DCT8x8_Gold.cpp | 0 .../dct8x8/DCT8x8_Gold.h | 0 .../dct8x8/README.md | 0 .../dct8x8/data/teapot512.bmp | Bin .../dct8x8/data/teapot512.ppm | Bin .../dct8x8/dct8x8.cu | 0 .../dct8x8/dct8x8_kernel1.cuh | 0 .../dct8x8/dct8x8_kernel2.cuh | 0 .../dct8x8/dct8x8_kernel_quantization.cuh | 0 .../dct8x8/dct8x8_kernel_short.cuh | 0 .../dct8x8/doc/BarbaraBlocks1.bmp | Bin .../dct8x8/doc/BarbaraBlocks2.bmp | Bin .../dct8x8/doc/BarbaraBlocks3.bmp | Bin .../dct8x8/doc/CosineBasis.png | Bin .../dct8x8/doc/Cosines.xls | Bin .../dct8x8/doc/DctJpeg.png | Bin .../dct8x8/doc/barbara.bmp | Bin .../dct8x8/doc/barbara_lg.png | Bin .../dct8x8/doc/barbara_md.png | Bin .../dct8x8/doc/barbara_sm.png | Bin .../dct8x8/doc/dct8x8.doc | Bin .../dct8x8/doc/dct8x8.pdf | Bin .../dct8x8/teapot512_cuda1.bmp | Bin .../dct8x8/teapot512_cuda2.bmp | Bin .../dct8x8/teapot512_cuda_short.bmp | Bin .../dct8x8/teapot512_gold1.bmp | Bin .../dct8x8/teapot512_gold2.bmp | Bin .../eigenvalues/.vscode/c_cpp_properties.json | 0 .../eigenvalues/.vscode/extensions.json | 0 .../eigenvalues/CMakeLists.txt | 0 .../eigenvalues/README.md | 0 .../eigenvalues/bisect_kernel_large.cuh | 0 .../eigenvalues/bisect_kernel_large_multi.cuh | 0 .../eigenvalues/bisect_kernel_large_onei.cuh | 0 .../eigenvalues/bisect_kernel_small.cuh | 0 .../eigenvalues/bisect_large.cu | 0 .../eigenvalues/bisect_large.cuh | 0 .../eigenvalues/bisect_small.cu | 0 .../eigenvalues/bisect_small.cuh | 0 .../eigenvalues/bisect_util.cu | 0 .../eigenvalues/config.h | 0 .../eigenvalues/data/diagonal.dat | 0 .../eigenvalues/data/reference.dat | 0 .../eigenvalues/data/superdiagonal.dat | 0 .../eigenvalues/doc/eigenvalues.doc | Bin .../eigenvalues/doc/eigenvalues.pdf | Bin .../eigenvalues/gerschgorin.cpp | 0 .../eigenvalues/gerschgorin.h | 0 .../eigenvalues/main.cu | 0 .../eigenvalues/matlab.cpp | 0 .../eigenvalues/matlab.h | 0 .../eigenvalues/structs.h | 0 .../eigenvalues/util.h | 0 .../histogram/.vscode/c_cpp_properties.json | 0 .../histogram/.vscode/extensions.json | 0 .../histogram/CMakeLists.txt | 0 .../histogram/README.md | 0 .../histogram/doc/histogram.doc | Bin .../histogram/doc/histogram.pdf | Bin .../histogram/doc/histogram.vsd | Bin .../histogram/histogram256.cu | 0 .../histogram/histogram64.cu | 0 .../histogram/histogram_common.h | 0 .../histogram/histogram_gold.cpp | 0 .../histogram/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../imageDenoising/.vscode/extensions.json | 0 .../imageDenoising/CMakeLists.txt | 0 .../imageDenoising/README.md | 0 .../imageDenoising/bmploader.cpp | 0 .../imageDenoising/data/portrait_noise.bmp | Bin .../imageDenoising/data/ref_knn.ppm | 0 .../imageDenoising/data/ref_nlm.ppm | 0 .../imageDenoising/data/ref_nlm2.ppm | 0 .../imageDenoising/data/ref_passthru.ppm | Bin .../imageDenoising/doc/NLM_lg.png | Bin .../imageDenoising/doc/NLM_md.png | Bin .../imageDenoising/doc/NLM_sm.png | Bin .../imageDenoising/doc/imageDenoising.doc | Bin .../imageDenoising/doc/imageDenoising.pdf | Bin .../imageDenoising/imageDenoising.cu | 0 .../imageDenoising/imageDenoising.h | 0 .../imageDenoising/imageDenoisingGL.cpp | 0 .../imageDenoising_copy_kernel.cuh | 0 .../imageDenoising_knn_kernel.cuh | 0 .../imageDenoising_nlm2_kernel.cuh | 0 .../imageDenoising_nlm_kernel.cuh | 0 .../inlinePTX/.vscode/c_cpp_properties.json | 0 .../inlinePTX/.vscode/extensions.json | 0 .../inlinePTX/CMakeLists.txt | 0 .../inlinePTX/README.md | 0 .../inlinePTX/inlinePTX.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../inlinePTX_nvrtc/.vscode/extensions.json | 0 .../inlinePTX_nvrtc/CMakeLists.txt | 0 .../inlinePTX_nvrtc/README.md | 0 .../inlinePTX_nvrtc/inlinePTX.cpp | 0 .../inlinePTX_nvrtc/inlinePTX_kernel.cu | 0 .../interval/.vscode/c_cpp_properties.json | 0 .../interval/.vscode/extensions.json | 0 .../interval/CMakeLists.txt | 0 .../interval/README.md | 0 .../interval/boost/config.hpp | 0 .../boost/config/abi/borland_prefix.hpp | 0 .../boost/config/abi/borland_suffix.hpp | 0 .../interval/boost/config/abi/msvc_prefix.hpp | 0 .../interval/boost/config/abi/msvc_suffix.hpp | 0 .../interval/boost/config/abi_prefix.hpp | 0 .../interval/boost/config/abi_suffix.hpp | 0 .../interval/boost/config/auto_link.hpp | 0 .../boost/config/compiler/borland.hpp | 0 .../boost/config/compiler/codegear.hpp | 0 .../interval/boost/config/compiler/comeau.hpp | 0 .../boost/config/compiler/common_edg.hpp | 0 .../boost/config/compiler/compaq_cxx.hpp | 0 .../boost/config/compiler/digitalmars.hpp | 0 .../interval/boost/config/compiler/gcc.hpp | 0 .../boost/config/compiler/gcc_xml.hpp | 0 .../boost/config/compiler/greenhills.hpp | 0 .../interval/boost/config/compiler/hp_acc.hpp | 0 .../interval/boost/config/compiler/intel.hpp | 0 .../interval/boost/config/compiler/kai.hpp | 0 .../boost/config/compiler/metrowerks.hpp | 0 .../interval/boost/config/compiler/mpw.hpp | 0 .../interval/boost/config/compiler/pgi.hpp | 0 .../boost/config/compiler/sgi_mipspro.hpp | 0 .../boost/config/compiler/sunpro_cc.hpp | 0 .../interval/boost/config/compiler/vacpp.hpp | 0 .../boost/config/compiler/visualc.hpp | 0 .../interval/boost/config/no_tr1/cmath.hpp | 0 .../interval/boost/config/no_tr1/complex.hpp | 0 .../boost/config/no_tr1/functional.hpp | 0 .../interval/boost/config/no_tr1/memory.hpp | 0 .../interval/boost/config/no_tr1/utility.hpp | 0 .../interval/boost/config/platform/aix.hpp | 0 .../boost/config/platform/amigaos.hpp | 0 .../interval/boost/config/platform/beos.hpp | 0 .../interval/boost/config/platform/bsd.hpp | 0 .../interval/boost/config/platform/cygwin.hpp | 0 .../interval/boost/config/platform/hpux.hpp | 0 .../interval/boost/config/platform/irix.hpp | 0 .../interval/boost/config/platform/linux.hpp | 0 .../interval/boost/config/platform/macos.hpp | 0 .../interval/boost/config/platform/qnxnto.hpp | 0 .../boost/config/platform/solaris.hpp | 0 .../boost/config/platform/vxworks.hpp | 0 .../interval/boost/config/platform/win32.hpp | 0 .../interval/boost/config/posix_features.hpp | 0 .../boost/config/requires_threads.hpp | 0 .../boost/config/select_compiler_config.hpp | 0 .../boost/config/select_platform_config.hpp | 0 .../boost/config/select_stdlib_config.hpp | 0 .../boost/config/stdlib/dinkumware.hpp | 0 .../interval/boost/config/stdlib/libcomo.hpp | 0 .../boost/config/stdlib/libstdcpp3.hpp | 0 .../interval/boost/config/stdlib/modena.hpp | 0 .../interval/boost/config/stdlib/msl.hpp | 0 .../boost/config/stdlib/roguewave.hpp | 0 .../interval/boost/config/stdlib/sgi.hpp | 0 .../interval/boost/config/stdlib/stlport.hpp | 0 .../interval/boost/config/stdlib/vacpp.hpp | 0 .../interval/boost/config/suffix.hpp | 0 .../interval/boost/config/user.hpp | 0 .../interval/boost/config/warning_disable.hpp | 0 .../interval/boost/limits.hpp | 0 .../interval/boost/numeric/interval.hpp | 0 .../interval/boost/numeric/interval/arith.hpp | 0 .../boost/numeric/interval/arith2.hpp | 0 .../boost/numeric/interval/arith3.hpp | 0 .../boost/numeric/interval/checking.hpp | 0 .../boost/numeric/interval/compare.hpp | 0 .../numeric/interval/compare/certain.hpp | 0 .../numeric/interval/compare/explicit.hpp | 0 .../interval/compare/lexicographic.hpp | 0 .../numeric/interval/compare/possible.hpp | 0 .../boost/numeric/interval/compare/set.hpp | 0 .../numeric/interval/compare/tribool.hpp | 0 .../boost/numeric/interval/constants.hpp | 0 .../detail/alpha_rounding_control.hpp | 0 .../interval/detail/bcc_rounding_control.hpp | 0 .../boost/numeric/interval/detail/bugs.hpp | 0 .../interval/detail/c99_rounding_control.hpp | 0 .../detail/c99sub_rounding_control.hpp | 0 .../numeric/interval/detail/division.hpp | 0 .../interval/detail/ia64_rounding_control.hpp | 0 .../interval/detail/interval_prototype.hpp | 0 .../interval/detail/msvc_rounding_control.hpp | 0 .../interval/detail/ppc_rounding_control.hpp | 0 .../detail/sparc_rounding_control.hpp | 0 .../numeric/interval/detail/test_input.hpp | 0 .../interval/detail/x86_rounding_control.hpp | 0 .../detail/x86gcc_rounding_control.hpp | 0 .../boost/numeric/interval/ext/integer.hpp | 0 .../ext/x86_fast_rounding_control.hpp | 0 .../boost/numeric/interval/hw_rounding.hpp | 0 .../boost/numeric/interval/interval.hpp | 0 .../interval/boost/numeric/interval/io.hpp | 0 .../boost/numeric/interval/limits.hpp | 0 .../boost/numeric/interval/policies.hpp | 0 .../boost/numeric/interval/rounded_arith.hpp | 0 .../boost/numeric/interval/rounded_transc.hpp | 0 .../boost/numeric/interval/rounding.hpp | 0 .../boost/numeric/interval/transc.hpp | 0 .../boost/numeric/interval/utility.hpp | 0 .../interval/cpu_interval.h | 0 .../interval/cuda_interval.h | 0 .../interval/cuda_interval_lib.h | 0 .../interval/cuda_interval_rounded_arith.h | 0 .../interval/interval.cu | 0 .../interval/interval.h | 0 .../particles/.vscode/c_cpp_properties.json | 0 .../particles/.vscode/extensions.json | 0 .../particles/CMakeLists.txt | 0 .../particles/README.md | 0 .../particles/data/ref_particles.bin | Bin .../particles/doc/particles.doc | Bin .../particles/doc/particles.pdf | Bin .../particles/doc/screenshot_lg.png | Bin .../particles/doc/screenshot_md.png | Bin .../particles/doc/screenshot_sm.png | Bin .../particles/particleSystem.cpp | 0 .../particles/particleSystem.cuh | 0 .../particles/particleSystem.h | 0 .../particles/particleSystem_cuda.cu | 0 .../particles/particles.cpp | 0 .../particles/particles_kernel.cuh | 0 .../particles/particles_kernel_impl.cuh | 0 .../particles/render_particles.cpp | 0 .../particles/render_particles.h | 0 .../particles/shaders.cpp | 0 .../particles/shaders.h | 0 .../.vscode/c_cpp_properties.json | 0 .../radixSortThrust/.vscode/extensions.json | 0 .../radixSortThrust/CMakeLists.txt | 0 .../radixSortThrust/README.md | 0 .../radixSortThrust/doc/readme.txt | 0 .../radixSortThrust/radixSortThrust.cu | 0 .../reduction/.vscode/c_cpp_properties.json | 0 .../reduction/.vscode/extensions.json | 0 .../reduction/CMakeLists.txt | 0 .../reduction/README.md | 0 .../reduction/reduction.cpp | 0 .../reduction/reduction.h | 0 .../reduction/reduction_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../reductionMultiBlockCG/CMakeLists.txt | 0 .../reductionMultiBlockCG/README.md | 0 .../reductionMultiBlockCG.cu | 0 .../scalarProd/.vscode/c_cpp_properties.json | 0 .../scalarProd/.vscode/extensions.json | 0 .../scalarProd/CMakeLists.txt | 0 .../scalarProd/README.md | 0 .../scalarProd/scalarProd.cu | 0 .../scalarProd/scalarProd_cpu.cpp | 0 .../scalarProd/scalarProd_kernel.cuh | 0 .../scan/.vscode/c_cpp_properties.json | 0 .../scan/.vscode/extensions.json | 0 .../scan/CMakeLists.txt | 0 .../2_Concepts_and_Techniques/scan/README.md | 0 .../2_Concepts_and_Techniques/scan/main.cpp | 0 .../2_Concepts_and_Techniques/scan/scan.cu | 0 .../scan/scan_common.h | 0 .../scan/scan_gold.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../segmentationTreeThrust/CMakeLists.txt | 0 .../segmentationTreeThrust/README.md | 0 .../segmentationTreeThrust/common.cuh | 0 .../segmentationTreeThrust/data/ref_00.ppm | 0 .../segmentationTreeThrust/data/ref_09.ppm | 0 .../segmentationTreeThrust/data/test.ppm | 0 .../segmentationTreeThrust/kernels.cuh | 0 .../segmentationTree.cu | 0 .../shfl_scan/.vscode/c_cpp_properties.json | 0 .../shfl_scan/.vscode/extensions.json | 0 .../shfl_scan/CMakeLists.txt | 0 .../shfl_scan/README.md | 0 .../shfl_scan/shfl_integral_image.cuh | 0 .../shfl_scan/shfl_scan.cu | 0 .../shfl_scan/util.h | 0 .../.vscode/c_cpp_properties.json | 0 .../sortingNetworks/.vscode/extensions.json | 0 .../sortingNetworks/CMakeLists.txt | 0 .../sortingNetworks/README.md | 0 .../sortingNetworks/bitonicSort.cu | 0 .../sortingNetworks/main.cpp | 0 .../sortingNetworks/oddEvenMergeSort.cu | 0 .../sortingNetworks_common.cuh | 0 .../sortingNetworks/sortingNetworks_common.h | 0 .../sortingNetworks_validate.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../streamOrderedAllocation/CMakeLists.txt | 0 .../streamOrderedAllocation/README.md | 0 .../streamOrderedAllocation.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../streamOrderedAllocationIPC/CMakeLists.txt | 0 .../streamOrderedAllocationIPC/README.md | 0 .../streamOrderedAllocationIPC.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../streamOrderedAllocationP2P/CMakeLists.txt | 0 .../streamOrderedAllocationP2P/README.md | 0 .../streamOrderedAllocationP2P.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../threadFenceReduction/CMakeLists.txt | 0 .../threadFenceReduction/README.md | 0 .../threadFenceReduction.cu | 0 .../threadFenceReduction.h | 0 .../threadFenceReduction_kernel.cuh | 0 .../.vscode/c_cpp_properties.json | 0 .../threadMigration/.vscode/extensions.json | 0 .../threadMigration/CMakeLists.txt | 0 .../threadMigration/README.md | 0 .../threadMigration/threadMigration.cpp | 0 .../threadMigration/threadMigration_kernel.cu | 0 .../3_CUDA_Features/CMakeLists.txt | 0 {Samples => cpp}/3_CUDA_Features/README.md | 0 .../.vscode/c_cpp_properties.json | 0 .../StreamPriorities/.vscode/extensions.json | 0 .../StreamPriorities/CMakeLists.txt | 0 .../StreamPriorities/README.md | 0 .../StreamPriorities/StreamPriorities.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../bf16TensorCoreGemm/CMakeLists.txt | 0 .../bf16TensorCoreGemm/README.md | 0 .../bf16TensorCoreGemm/bf16TensorCoreGemm.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../binaryPartitionCG/.vscode/extensions.json | 0 .../binaryPartitionCG/CMakeLists.txt | 0 .../binaryPartitionCG/README.md | 0 .../binaryPartitionCG/binaryPartitionCG.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../bindlessTexture/.vscode/extensions.json | 0 .../bindlessTexture/CMakeLists.txt | 0 .../3_CUDA_Features/bindlessTexture/README.md | 0 .../bindlessTexture/bindlessTexture.cpp | 0 .../bindlessTexture/bindlessTexture.h | 0 .../bindlessTexture/bindlessTexture_kernel.cu | 0 .../bindlessTexture/data/flower.ppm | 0 .../bindlessTexture/data/person.ppm | Bin .../data/ref_bindlessTexture.bin | Bin .../bindlessTexture/data/sponge.ppm | Bin .../bindlessTexture/doc/sshot_lg.JPG | Bin .../bindlessTexture/doc/sshot_md.JPG | Bin .../bindlessTexture/doc/sshot_sm.JPG | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cdpAdvancedQuicksort/CMakeLists.txt | 0 .../cdpAdvancedQuicksort/README.md | 0 .../cdpAdvancedQuicksort.cu | 0 .../cdpAdvancedQuicksort/cdpBitonicSort.cu | 0 .../cdpAdvancedQuicksort/cdpQuicksort.h | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cdpBezierTessellation/BezierLineCDP.cu | 0 .../cdpBezierTessellation/CMakeLists.txt | 0 .../cdpBezierTessellation/README.md | 0 .../cdpQuadtree/.vscode/c_cpp_properties.json | 0 .../cdpQuadtree/.vscode/extensions.json | 0 .../cdpQuadtree/CMakeLists.txt | 0 .../3_CUDA_Features/cdpQuadtree/README.md | 0 .../cdpQuadtree/cdpQuadtree.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../cdpSimplePrint/.vscode/extensions.json | 0 .../cdpSimplePrint/CMakeLists.txt | 0 .../3_CUDA_Features/cdpSimplePrint/README.md | 0 .../cdpSimplePrint/cdpSimplePrint.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cdpSimpleQuicksort/CMakeLists.txt | 0 .../cdpSimpleQuicksort/README.md | 0 .../cdpSimpleQuicksort/cdpSimpleQuicksort.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cudaCompressibleMemory/CMakeLists.txt | 0 .../cudaCompressibleMemory/README.md | 0 .../cudaCompressibleMemory/compMalloc.cpp | 0 .../cudaCompressibleMemory/compMalloc.h | 0 .../cudaCompressibleMemory/saxpy.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cudaTensorCoreGemm/CMakeLists.txt | 0 .../cudaTensorCoreGemm/README.md | 0 .../cudaTensorCoreGemm/cudaTensorCoreGemm.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../dmmaTensorCoreGemm/CMakeLists.txt | 0 .../dmmaTensorCoreGemm/README.md | 0 .../dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../globalToShmemAsyncCopy/CMakeLists.txt | 0 .../globalToShmemAsyncCopy/README.md | 0 .../globalToShmemAsyncCopy.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../graphConditionalNodes/CMakeLists.txt | 0 .../graphConditionalNodes/README.md | 0 .../graphConditionalNodes.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../graphMemoryFootprint/CMakeLists.txt | 0 .../graphMemoryFootprint/README.md | 0 .../graphMemoryFootprint.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../graphMemoryNodes/.vscode/extensions.json | 0 .../graphMemoryNodes/CMakeLists.txt | 0 .../graphMemoryNodes/README.md | 0 .../graphMemoryNodes/graphMemoryNodes.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../immaTensorCoreGemm/CMakeLists.txt | 0 .../immaTensorCoreGemm/README.md | 0 .../immaTensorCoreGemm/immaTensorCoreGemm.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../jacobiCudaGraphs/.vscode/extensions.json | 0 .../jacobiCudaGraphs/CMakeLists.txt | 0 .../jacobiCudaGraphs/README.md | 0 .../jacobiCudaGraphs/jacobi.cu | 0 .../3_CUDA_Features/jacobiCudaGraphs/jacobi.h | 0 .../3_CUDA_Features/jacobiCudaGraphs/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../memMapIPCDrv/.vscode/extensions.json | 0 .../memMapIPCDrv/CMakeLists.txt | 0 .../3_CUDA_Features/memMapIPCDrv/README.md | 0 .../memMapIPCDrv/memMapIpc.cpp | 0 .../memMapIPCDrv/memMapIpc_kernel.cu | 0 .../newdelete/.vscode/c_cpp_properties.json | 0 .../newdelete/.vscode/extensions.json | 0 .../3_CUDA_Features/newdelete/CMakeLists.txt | 0 .../3_CUDA_Features/newdelete/README.md | 0 .../3_CUDA_Features/newdelete/container.hpp | 0 .../3_CUDA_Features/newdelete/newdelete.cu | 0 .../ptxjit/.vscode/c_cpp_properties.json | 0 .../ptxjit/.vscode/extensions.json | 0 .../3_CUDA_Features/ptxjit/CMakeLists.txt | 0 .../3_CUDA_Features/ptxjit/README.md | 0 .../3_CUDA_Features/ptxjit/ptxjit.cpp | 0 .../3_CUDA_Features/ptxjit/ptxjit_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCudaGraphs/.vscode/extensions.json | 0 .../simpleCudaGraphs/CMakeLists.txt | 0 .../simpleCudaGraphs/README.md | 0 .../simpleCudaGraphs/simpleCudaGraphs.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../tf32TensorCoreGemm/CMakeLists.txt | 0 .../tf32TensorCoreGemm/README.md | 0 .../tf32TensorCoreGemm/tf32TensorCoreGemm.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../warpAggregatedAtomicsCG/CMakeLists.txt | 0 .../warpAggregatedAtomicsCG/README.md | 0 .../warpAggregatedAtomicsCG.cu | 0 .../4_CUDA_Libraries/CMakeLists.txt | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../FilterBorderControlNPP/CMakeLists.txt | 0 .../FilterBorderControlNPP.cpp | 0 .../FilterBorderControlNPP/README.md | 0 .../FilterBorderControlNPP/data/teapot512.pgm | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../MersenneTwisterGP11213/CMakeLists.txt | 0 .../MersenneTwister.cpp | 0 .../MersenneTwisterGP11213/README.md | 0 {Samples => cpp}/4_CUDA_Libraries/README.md | 0 .../batchCUBLAS/.vscode/c_cpp_properties.json | 0 .../batchCUBLAS/.vscode/extensions.json | 0 .../batchCUBLAS/CMakeLists.txt | 0 .../4_CUDA_Libraries/batchCUBLAS/README.md | 0 .../batchCUBLAS/batchCUBLAS.cpp | 0 .../batchCUBLAS/batchCUBLAS.h | 0 .../.vscode/c_cpp_properties.json | 0 .../boxFilterNPP/.vscode/extensions.json | 0 .../boxFilterNPP/CMakeLists.txt | 0 .../4_CUDA_Libraries/boxFilterNPP/README.md | 0 .../boxFilterNPP/boxFilterNPP.cpp | 0 .../boxFilterNPP/teapot512.pgm | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cannyEdgeDetectorNPP/CMakeLists.txt | 0 .../cannyEdgeDetectorNPP/README.md | 0 .../cannyEdgeDetectorNPP.cpp | 0 .../cannyEdgeDetectorNPP/teapot512.pgm | Bin .../.vscode/c_cpp_properties.json | 0 .../conjugateGradient/.vscode/extensions.json | 0 .../conjugateGradient/CMakeLists.txt | 0 .../conjugateGradient/README.md | 0 .../conjugateGradient/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../conjugateGradientCudaGraphs/README.md | 0 .../conjugateGradientCudaGraphs.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../conjugateGradientMultiBlockCG/README.md | 0 .../conjugateGradientMultiBlockCG.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../conjugateGradientMultiDeviceCG/README.md | 0 .../conjugateGradientMultiDeviceCG.cu | 16 +- .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../conjugateGradientPrecond/CMakeLists.txt | 0 .../conjugateGradientPrecond/README.md | 0 .../conjugateGradientPrecond/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../conjugateGradientUM/CMakeLists.txt | 0 .../conjugateGradientUM/README.md | 0 .../conjugateGradientUM/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cuSolverDn_LinearSolver/CMakeLists.txt | 0 .../cuSolverDn_LinearSolver/README.md | 0 .../cuSolverDn_LinearSolver.cpp | 0 .../gr_900_900_crg.mtx | 0 .../cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx | 0 .../cuSolverDn_LinearSolver/mmio.c | 0 .../cuSolverDn_LinearSolver/mmio.h | 0 .../cuSolverDn_LinearSolver/mmio_wrapper.cpp | 0 .../cuSolverRf/.vscode/c_cpp_properties.json | 0 .../cuSolverRf/.vscode/extensions.json | 0 .../cuSolverRf/CMakeLists.txt | 0 .../4_CUDA_Libraries/cuSolverRf/README.md | 0 .../cuSolverRf/cuSolverRf.cpp | 0 .../cuSolverRf/lap2D_5pt_n100.mtx | 0 .../cuSolverRf/lap3D_7pt_n20.mtx | 0 .../4_CUDA_Libraries/cuSolverRf/mmio.c | 0 .../4_CUDA_Libraries/cuSolverRf/mmio.h | 0 .../cuSolverRf/mmio_wrapper.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cuSolverSp_LinearSolver/CMakeLists.txt | 0 .../cuSolverSp_LinearSolver/README.md | 0 .../cuSolverSp_LinearSolver.cpp | 0 .../lap2D_5pt_n100.mtx | 0 .../cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx | 0 .../cuSolverSp_LinearSolver/mmio.c | 0 .../cuSolverSp_LinearSolver/mmio.h | 0 .../cuSolverSp_LinearSolver/mmio_wrapper.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../cuSolverSp_LowlevelCholesky/README.md | 0 .../cuSolverSp_LowlevelCholesky.cpp | 0 .../lap2D_5pt_n100.mtx | 0 .../lap3D_7pt_n20.mtx | 0 .../cuSolverSp_LowlevelCholesky/mmio.c | 0 .../cuSolverSp_LowlevelCholesky/mmio.h | 0 .../mmio_wrapper.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cuSolverSp_LowlevelQR/CMakeLists.txt | 0 .../cuSolverSp_LowlevelQR/README.md | 0 .../cuSolverSp_LowlevelQR.cpp | 0 .../cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx | 0 .../cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx | 0 .../cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx | 0 .../cuSolverSp_LowlevelQR/mmio.c | 0 .../cuSolverSp_LowlevelQR/mmio.h | 0 .../cuSolverSp_LowlevelQR/mmio_wrapper.cpp | 0 .../cudaNvSci/.vscode/c_cpp_properties.json | 0 .../cudaNvSci/.vscode/extensions.json | 0 .../4_CUDA_Libraries/cudaNvSci/CMakeLists.txt | 0 .../4_CUDA_Libraries/cudaNvSci/README.md | 0 .../4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp | 0 .../4_CUDA_Libraries/cudaNvSci/cudaNvSci.h | 0 .../cudaNvSci/imageKernels.cu | 0 .../4_CUDA_Libraries/cudaNvSci/main.cpp | 0 .../4_CUDA_Libraries/cudaNvSci/teapot1024.ppm | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../freeImageInteropNPP/CMakeLists.txt | 0 .../freeImageInteropNPP/README.md | 0 .../freeImageInteropNPP.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../histEqualizationNPP/CMakeLists.txt | 0 .../histEqualizationNPP/README.md | 0 .../histEqualizationNPP.cpp | 0 .../jitLto/.vscode/c_cpp_properties.json | 0 .../jitLto/.vscode/extensions.json | 0 .../4_CUDA_Libraries/jitLto/CMakeLists.txt | 0 .../4_CUDA_Libraries/jitLto/README.md | 0 .../4_CUDA_Libraries/jitLto/jitLto.cpp | 0 .../lineOfSight/.vscode/c_cpp_properties.json | 0 .../lineOfSight/.vscode/extensions.json | 0 .../lineOfSight/CMakeLists.txt | 0 .../4_CUDA_Libraries/lineOfSight/README.md | 0 .../lineOfSight/lineOfSight.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../matrixMulCUBLAS/.vscode/extensions.json | 0 .../matrixMulCUBLAS/CMakeLists.txt | 0 .../matrixMulCUBLAS/README.md | 0 .../matrixMulCUBLAS/matrixMulCUBLAS.cpp | 0 .../nvJPEG/.vscode/c_cpp_properties.json | 0 .../nvJPEG/.vscode/extensions.json | 0 .../4_CUDA_Libraries/nvJPEG/CMakeLists.txt | 0 .../4_CUDA_Libraries/nvJPEG/README.md | 0 .../4_CUDA_Libraries/nvJPEG/images/img1.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img2.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img3.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img4.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img5.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img6.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img7.jpg | Bin .../4_CUDA_Libraries/nvJPEG/images/img8.jpg | Bin .../4_CUDA_Libraries/nvJPEG/nvJPEG.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../nvJPEG_encoder/.vscode/extensions.json | 0 .../nvJPEG_encoder/CMakeLists.txt | 0 .../4_CUDA_Libraries/nvJPEG_encoder/README.md | 0 .../nvJPEG_encoder/encode_output/img1.jpg | Bin .../nvJPEG_encoder/encode_output/img2.jpg | Bin .../nvJPEG_encoder/encode_output/img3.jpg | Bin .../nvJPEG_encoder/encode_output/img4.jpg | Bin .../nvJPEG_encoder/encode_output/img5.jpg | Bin .../nvJPEG_encoder/encode_output/img6.jpg | Bin .../nvJPEG_encoder/encode_output/img7.jpg | Bin .../nvJPEG_encoder/encode_output/img8.jpg | Bin .../nvJPEG_encoder/images/img1.jpg | Bin .../nvJPEG_encoder/images/img2.jpg | Bin .../nvJPEG_encoder/images/img3.jpg | Bin .../nvJPEG_encoder/images/img4.jpg | Bin .../nvJPEG_encoder/images/img5.jpg | Bin .../nvJPEG_encoder/images/img6.jpg | Bin .../nvJPEG_encoder/images/img7.jpg | Bin .../nvJPEG_encoder/images/img8.jpg | Bin .../nvJPEG_encoder/nvJPEG_encoder.cpp | 0 .../oceanFFT/.vscode/c_cpp_properties.json | 0 .../oceanFFT/.vscode/extensions.json | 0 .../4_CUDA_Libraries/oceanFFT/CMakeLists.txt | 0 .../4_CUDA_Libraries/oceanFFT/README.md | 0 .../4_CUDA_Libraries/oceanFFT/data/ocean.frag | 0 .../4_CUDA_Libraries/oceanFFT/data/ocean.vert | 0 .../oceanFFT/data/ref_slopeShading.bin | Bin .../oceanFFT/data/ref_spatialDomain.bin | Bin .../oceanFFT/data/reference.ppm | Bin .../oceanFFT/doc/sshot_lg.png | Bin .../oceanFFT/doc/sshot_md.png | Bin .../oceanFFT/doc/sshot_sm.png | Bin .../4_CUDA_Libraries/oceanFFT/oceanFFT.cpp | 0 .../oceanFFT/oceanFFT_kernel.cu | 0 .../randomFog/.vscode/c_cpp_properties.json | 0 .../randomFog/.vscode/extensions.json | 0 .../4_CUDA_Libraries/randomFog/CMakeLists.txt | 0 .../4_CUDA_Libraries/randomFog/README.md | 0 .../randomFog/data/ref_randomFog.bin | Bin .../4_CUDA_Libraries/randomFog/randomFog.cpp | 0 .../4_CUDA_Libraries/randomFog/rng.cpp | 0 .../4_CUDA_Libraries/randomFog/rng.h | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCUBLAS/.vscode/extensions.json | 0 .../simpleCUBLAS/CMakeLists.txt | 0 .../4_CUDA_Libraries/simpleCUBLAS/README.md | 0 .../simpleCUBLAS/simpleCUBLAS.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCUBLASXT/.vscode/extensions.json | 0 .../simpleCUBLASXT/CMakeLists.txt | 0 .../4_CUDA_Libraries/simpleCUBLASXT/README.md | 0 .../simpleCUBLASXT/simpleCUBLASXT.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCUBLAS_LU/.vscode/extensions.json | 0 .../simpleCUBLAS_LU/CMakeLists.txt | 0 .../simpleCUBLAS_LU/README.md | 0 .../simpleCUBLAS_LU/simpleCUBLAS_LU.cpp | 0 .../simpleCUFFT/.vscode/c_cpp_properties.json | 0 .../simpleCUFFT/.vscode/extensions.json | 0 .../simpleCUFFT/CMakeLists.txt | 0 .../4_CUDA_Libraries/simpleCUFFT/README.md | 0 .../simpleCUFFT/simpleCUFFT.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleCUFFT_2d_MGPU/CMakeLists.txt | 0 .../simpleCUFFT_2d_MGPU/README.md | 0 .../simpleCUFFT_2d_MGPU.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleCUFFT_MGPU/.vscode/extensions.json | 0 .../simpleCUFFT_MGPU/CMakeLists.txt | 0 .../simpleCUFFT_MGPU/README.md | 0 .../simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../simpleCUFFT_callback/CMakeLists.txt | 0 .../simpleCUFFT_callback/README.md | 0 .../simpleCUFFT_callback.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../watershedSegmentationNPP/CMakeLists.txt | 0 .../watershedSegmentationNPP/README.md | 0 .../watershedSegmentationNPP.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../BlackScholes/.vscode/extensions.json | 0 .../BlackScholes/BlackScholes.cu | 0 .../BlackScholes/BlackScholes_gold.cpp | 0 .../BlackScholes/BlackScholes_kernel.cuh | 0 .../BlackScholes/CMakeLists.txt | 0 .../5_Domain_Specific/BlackScholes/README.md | 0 .../BlackScholes/doc/BlackScholes.doc | Bin .../BlackScholes/doc/BlackScholes.pdf | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../BlackScholes_nvrtc/BlackScholes.cpp | 0 .../BlackScholes_nvrtc/BlackScholes_gold.cpp | 0 .../BlackScholes_kernel.cuh | 0 .../BlackScholes_nvrtc/CMakeLists.txt | 0 .../BlackScholes_nvrtc/README.md | 0 .../5_Domain_Specific/CMakeLists.txt | 0 .../FDTD3d/.vscode/c_cpp_properties.json | 0 .../FDTD3d/.vscode/extensions.json | 0 .../5_Domain_Specific/FDTD3d/CMakeLists.txt | 0 .../5_Domain_Specific/FDTD3d/README.md | 0 .../5_Domain_Specific/FDTD3d/inc/FDTD3d.h | 0 .../5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h | 0 .../FDTD3d/inc/FDTD3dGPUKernel.cuh | 0 .../FDTD3d/inc/FDTD3dReference.h | 0 .../5_Domain_Specific/FDTD3d/src/FDTD3d.cpp | 0 .../5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu | 0 .../FDTD3d/src/FDTD3dReference.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../HSOpticalFlow/.vscode/extensions.json | 0 .../HSOpticalFlow/CMakeLists.txt | 0 .../HSOpticalFlow/FlowCPU.flo | Bin .../HSOpticalFlow/FlowGPU.flo | Bin .../5_Domain_Specific/HSOpticalFlow/README.md | 0 .../HSOpticalFlow/addKernel.cuh | 0 .../5_Domain_Specific/HSOpticalFlow/common.h | 0 .../HSOpticalFlow/data/frame10.ppm | 0 .../HSOpticalFlow/data/frame11.ppm | 0 .../HSOpticalFlow/derivativesKernel.cuh | 0 .../HSOpticalFlow/doc/OpticalFlow.docx | Bin .../HSOpticalFlow/doc/OpticalFlow.pdf | Bin .../HSOpticalFlow/downscaleKernel.cuh | 0 .../HSOpticalFlow/flowCUDA.cu | 0 .../HSOpticalFlow/flowCUDA.h | 0 .../HSOpticalFlow/flowGold.cpp | 0 .../HSOpticalFlow/flowGold.h | 0 .../5_Domain_Specific/HSOpticalFlow/main.cpp | 0 .../HSOpticalFlow/solverKernel.cuh | 0 .../HSOpticalFlow/upscaleKernel.cuh | 0 .../HSOpticalFlow/warpingKernel.cuh | 0 .../Mandelbrot/.vscode/c_cpp_properties.json | 0 .../Mandelbrot/.vscode/extensions.json | 0 .../Mandelbrot/CMakeLists.txt | 0 .../Mandelbrot/Mandelbrot.cpp | 0 .../Mandelbrot/Mandelbrot_cuda.cu | 0 .../Mandelbrot/Mandelbrot_gold.cpp | 0 .../Mandelbrot/Mandelbrot_gold.h | 0 .../Mandelbrot/Mandelbrot_kernel.cuh | 0 .../Mandelbrot/Mandelbrot_kernel.h | 0 .../5_Domain_Specific/Mandelbrot/README.md | 0 .../Mandelbrot/data/Mandelbrot_fp32.ppm | 0 .../Mandelbrot/data/Mandelbrot_fp64.ppm | 0 .../Mandelbrot/data/params.txt | 0 .../Mandelbrot/data/referenceJulia_fp32.ppm | 0 .../Mandelbrot/data/referenceJulia_fp64.ppm | 0 .../Mandelbrot/doc/sshot_lg.JPG | Bin .../Mandelbrot/doc/sshot_md.JPG | Bin .../Mandelbrot/doc/sshot_sm.JPG | Bin .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../MonteCarloMultiGPU/CMakeLists.txt | 0 .../MonteCarloMultiGPU/MonteCarloMultiGPU.cpp | 0 .../MonteCarloMultiGPU/MonteCarlo_common.h | 0 .../MonteCarloMultiGPU/MonteCarlo_gold.cpp | 0 .../MonteCarloMultiGPU/MonteCarlo_kernel.cu | 0 .../MonteCarlo_reduction.cuh | 0 .../MonteCarloMultiGPU/README.md | 0 .../MonteCarloMultiGPU/doc/MonteCarlo.doc | Bin .../MonteCarloMultiGPU/doc/MonteCarlo.pdf | Bin .../MonteCarloMultiGPU/multithreading.cpp | 0 .../MonteCarloMultiGPU/multithreading.h | 0 .../MonteCarloMultiGPU/realtype.h | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../NV12toBGRandResize/CMakeLists.txt | 0 .../NV12toBGRandResize/README.md | 0 .../NV12toBGRandResize/bgr_resize.cu | 0 .../NV12toBGRandResize/data/test1280x720.nv12 | 0 .../data/test1920x1080.nv12 | 0 .../NV12toBGRandResize/data/test640x480.nv12 | 0 .../NV12toBGRandResize/nv12_resize.cu | 0 .../NV12toBGRandResize/nv12_to_bgr_planar.cu | 0 .../NV12toBGRandResize/resize_convert.h | 0 .../resize_convert_main.cpp | 0 .../NV12toBGRandResize/utils.cu | 0 .../NV12toBGRandResize/utils.h | 0 {Samples => cpp}/5_Domain_Specific/README.md | 0 .../SobelFilter/.vscode/c_cpp_properties.json | 0 .../SobelFilter/.vscode/extensions.json | 0 .../SobelFilter/CMakeLists.txt | 0 .../5_Domain_Specific/SobelFilter/README.md | 0 .../SobelFilter/SobelFilter.cpp | 0 .../SobelFilter/SobelFilter_kernels.cu | 0 .../SobelFilter/SobelFilter_kernels.h | 0 .../SobelFilter/data/ref_orig.pgm | Bin .../SobelFilter/data/ref_shared.pgm | Bin .../SobelFilter/data/ref_tex.pgm | Bin .../SobelFilter/data/teapot.pgm | Bin .../SobelFilter/doc/sshot_lg.JPG | Bin .../SobelFilter/doc/sshot_md.JPG | Bin .../SobelFilter/doc/sshot_sm.JPG | Bin .../SobolQRNG/.vscode/c_cpp_properties.json | 0 .../SobolQRNG/.vscode/extensions.json | 0 .../SobolQRNG/CMakeLists.txt | 0 .../5_Domain_Specific/SobolQRNG/README.md | 0 .../5_Domain_Specific/SobolQRNG/sobol.cpp | 0 .../5_Domain_Specific/SobolQRNG/sobol.h | 0 .../SobolQRNG/sobol_gold.cpp | 0 .../5_Domain_Specific/SobolQRNG/sobol_gold.h | 0 .../5_Domain_Specific/SobolQRNG/sobol_gpu.cu | 0 .../5_Domain_Specific/SobolQRNG/sobol_gpu.h | 0 .../SobolQRNG/sobol_primitives.cpp | 0 .../SobolQRNG/sobol_primitives.h | 0 .../.vscode/c_cpp_properties.json | 0 .../bicubicTexture/.vscode/extensions.json | 0 .../bicubicTexture/CMakeLists.txt | 0 .../bicubicTexture/README.md | 0 .../bicubicTexture/bicubicTexture.cpp | 0 .../bicubicTexture/bicubicTexture_cuda.cu | 0 .../bicubicTexture/bicubicTexture_kernel.cuh | 0 .../bicubicTexture/data/0_nearest.ppm | 0 .../bicubicTexture/data/1_bilinear.ppm | 0 .../bicubicTexture/data/2_bicubic.ppm | 0 .../bicubicTexture/data/3_fastbicubic.ppm | 0 .../bicubicTexture/data/4_catmull-rom.ppm | 0 .../bicubicTexture/data/teapot512.pgm | Bin .../.vscode/c_cpp_properties.json | 0 .../bilateralFilter/.vscode/extensions.json | 0 .../bilateralFilter/CMakeLists.txt | 0 .../bilateralFilter/README.md | 0 .../bilateralFilter/bilateralFilter.cpp | 0 .../bilateralFilter/bilateralFilter_cpu.cpp | 0 .../bilateralFilter/bilateral_kernel.cu | 0 .../bilateralFilter/bmploader.cpp | 0 .../bilateralFilter/data/nature_monte.bmp | Bin .../bilateralFilter/data/ref_05.ppm | 0 .../bilateralFilter/data/ref_06.ppm | 0 .../bilateralFilter/data/ref_07.ppm | 0 .../bilateralFilter/data/ref_08.ppm | 0 .../.vscode/c_cpp_properties.json | 0 .../binomialOptions/.vscode/extensions.json | 0 .../binomialOptions/CMakeLists.txt | 0 .../binomialOptions/README.md | 0 .../binomialOptions/binomialOptions.cpp | 0 .../binomialOptions/binomialOptions_common.h | 0 .../binomialOptions/binomialOptions_gold.cpp | 0 .../binomialOptions/binomialOptions_kernel.cu | 0 .../binomialOptions/doc/binomialOptions.doc | Bin .../binomialOptions/doc/binomialOptions.pdf | Bin .../binomialOptions/realtype.h | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../binomialOptions_nvrtc/CMakeLists.txt | 0 .../binomialOptions_nvrtc/README.md | 0 .../binomialOptions_nvrtc/binomialOptions.cpp | 0 .../binomialOptions_common.h | 0 .../binomialOptions_gold.cpp | 0 .../binomialOptions_gpu.cpp | 0 .../binomialOptions_kernel.cu | 0 .../binomialOptions_nvrtc/common_gpu_header.h | 0 .../binomialOptions_nvrtc/realtype.h | 0 .../.vscode/c_cpp_properties.json | 0 .../convolutionFFT2D/.vscode/extensions.json | 0 .../convolutionFFT2D/CMakeLists.txt | 0 .../convolutionFFT2D/README.md | 0 .../convolutionFFT2D/convolutionFFT2D.cu | 0 .../convolutionFFT2D/convolutionFFT2D.cuh | 0 .../convolutionFFT2D_common.h | 0 .../convolutionFFT2D_gold.cpp | 0 .../convolutionFFT2D/main.cpp | 0 .../dwtHaar1D/.vscode/c_cpp_properties.json | 0 .../dwtHaar1D/.vscode/extensions.json | 0 .../dwtHaar1D/CMakeLists.txt | 0 .../5_Domain_Specific/dwtHaar1D/README.md | 0 .../dwtHaar1D/data/regression.gold.dat | 0 .../dwtHaar1D/data/regression_2_14.gold.dat | 0 .../dwtHaar1D/data/regression_2_18.gold.dat | 0 .../dwtHaar1D/data/signal.dat | 0 .../dwtHaar1D/data/signal_2_14.dat | 0 .../dwtHaar1D/data/signal_2_18.dat | 0 .../5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu | 0 .../dwtHaar1D/dwtHaar1D_kernel.cuh | 0 .../dxtc/.vscode/c_cpp_properties.json | 0 .../dxtc/.vscode/extensions.json | 0 .../5_Domain_Specific/dxtc/CMakeLists.txt | 0 .../5_Domain_Specific/dxtc/CudaMath.h | 0 .../5_Domain_Specific/dxtc/README.md | 0 .../dxtc/data/teapot512_ref.dds | Bin .../dxtc/data/teapot512_std.dds | Bin .../dxtc/data/teapot512_std.ppm | Bin {Samples => cpp}/5_Domain_Specific/dxtc/dds.h | 0 .../5_Domain_Specific/dxtc/doc/cuda_dxtc.doc | Bin .../5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf | Bin .../5_Domain_Specific/dxtc/dxtc.cu | 0 .../5_Domain_Specific/dxtc/permutations.h | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../fastWalshTransform/CMakeLists.txt | 0 .../fastWalshTransform/README.md | 0 .../fastWalshTransform/doc/FWT.doc | Bin .../fastWalshTransform/fastWalshTransform.cu | 0 .../fastWalshTransform_gold.cpp | 0 .../fastWalshTransform_kernel.cuh | 0 .../fluidsGL/.vscode/c_cpp_properties.json | 0 .../fluidsGL/.vscode/extensions.json | 0 .../5_Domain_Specific/fluidsGL/CMakeLists.txt | 0 .../5_Domain_Specific/fluidsGL/README.md | 0 .../fluidsGL/data/ref_fluidsGL.ppm | Bin .../5_Domain_Specific/fluidsGL/defines.h | 0 .../fluidsGL/doc/fluidsGL.doc | Bin .../fluidsGL/doc/fluidsGL.pdf | Bin .../fluidsGL/doc/fluidsGL_lg.gif | Bin .../fluidsGL/doc/fluidsGL_md.gif | Bin .../fluidsGL/doc/fluidsGL_sm.gif | Bin .../5_Domain_Specific/fluidsGL/fluidsGL.cpp | 0 .../fluidsGL/fluidsGL_kernels.cu | 0 .../fluidsGL/fluidsGL_kernels.cuh | 0 .../fluidsGL/fluidsGL_kernels.h | 0 .../.vscode/c_cpp_properties.json | 0 .../marchingCubes/.vscode/extensions.json | 0 .../marchingCubes/CMakeLists.txt | 0 .../5_Domain_Specific/marchingCubes/README.md | 0 .../marchingCubes/data/Bucky.raw | Bin .../marchingCubes/data/compVoxelArray.bin | Bin .../marchingCubes/data/normalArray.bin | Bin .../marchingCubes/data/posArray.bin | Bin .../marchingCubes/data/ref_march_cubes.ppm | 0 .../5_Domain_Specific/marchingCubes/defines.h | 0 .../marchingCubes/doc/screenshot_lg.png | Bin .../marchingCubes/doc/screenshot_md.png | Bin .../marchingCubes/doc/screenshot_sm.png | Bin .../marchingCubes/marchingCubes.cpp | 0 .../marchingCubes/marchingCubes_kernel.cu | 0 .../5_Domain_Specific/marchingCubes/tables.h | 0 .../nbody/.vscode/c_cpp_properties.json | 0 .../nbody/.vscode/extensions.json | 0 .../5_Domain_Specific/nbody/CMakeLists.txt | 0 .../5_Domain_Specific/nbody/README.md | 0 .../5_Domain_Specific/nbody/bodysystem.h | 0 .../5_Domain_Specific/nbody/bodysystemcpu.h | 0 .../nbody/bodysystemcpu_impl.h | 0 .../5_Domain_Specific/nbody/bodysystemcuda.cu | 0 .../5_Domain_Specific/nbody/bodysystemcuda.h | 0 .../nbody/bodysystemcuda_impl.h | 0 .../nbody/doc/nbody_gems3_ch31.pdf | Bin .../nbody/doc/screenshot_lg.jpg | Bin .../nbody/doc/screenshot_md.jpg | Bin .../nbody/doc/screenshot_sm.jpg | Bin .../5_Domain_Specific/nbody/nbody.cpp | 0 .../nbody/render_particles.cpp | 0 .../nbody/render_particles.h | 0 .../5_Domain_Specific/nbody/tipsy.h | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../p2pBandwidthLatencyTest/CMakeLists.txt | 0 .../p2pBandwidthLatencyTest/README.md | 0 .../p2pBandwidthLatencyTest.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../postProcessGL/.vscode/extensions.json | 0 .../postProcessGL/CMakeLists.txt | 0 .../5_Domain_Specific/postProcessGL/README.md | 0 .../postProcessGL/data/teapot_2.ppm | 0 .../postProcessGL/data/teapot_4.ppm | 0 .../postProcessGL/data/teapot_8.ppm | 0 .../postProcessGL/data/teapot_orig.ppm | 0 .../postProcessGL/doc/postProcessGL_lg.gif | Bin .../postProcessGL/doc/postProcessGL_md.gif | Bin .../postProcessGL/doc/postProcessGL_sm.gif | Bin .../5_Domain_Specific/postProcessGL/main.cpp | 0 .../postProcessGL/postProcessGL.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../quasirandomGenerator/CMakeLists.txt | 0 .../quasirandomGenerator/README.md | 0 .../quasirandomGenerator.cpp | 0 .../quasirandomGenerator_common.h | 0 .../quasirandomGenerator_gold.cpp | 0 .../quasirandomGenerator_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../quasirandomGenerator_nvrtc/CMakeLists.txt | 0 .../quasirandomGenerator_nvrtc/README.md | 0 .../quasirandomGenerator.cpp | 0 .../quasirandomGenerator_common.h | 0 .../quasirandomGenerator_gold.cpp | 0 .../quasirandomGenerator_gpu.cuh | 0 .../quasirandomGenerator_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../recursiveGaussian/.vscode/extensions.json | 0 .../recursiveGaussian/CMakeLists.txt | 0 .../recursiveGaussian/README.md | 0 .../recursiveGaussian/data/ref_10.ppm | Bin .../recursiveGaussian/data/ref_14.ppm | Bin .../recursiveGaussian/data/ref_18.ppm | Bin .../recursiveGaussian/data/ref_22.ppm | Bin .../recursiveGaussian/data/teapot512.ppm | Bin .../recursiveGaussian/recursiveGaussian.cpp | 0 .../recursiveGaussian_cuda.cu | 0 .../recursiveGaussian_kernel.cuh | 0 .../simpleD3D11/CMakeLists.txt | 0 .../5_Domain_Specific/simpleD3D11/README.md | 0 .../simpleD3D11/ShaderStructs.h | 0 .../simpleD3D11/data/ref_simpleD3D11.ppm | 0 .../simpleD3D11/simpleD3D11.cpp | 0 .../simpleD3D11/sinewave_cuda.cu | 0 .../simpleD3D11/sinewave_cuda.h | 0 .../simpleD3D11Texture/CMakeLists.txt | 0 .../simpleD3D11Texture/README.md | 0 .../d3dx11effect/d3dx11effect.h | 0 .../data/ref_simpleD3D11Texture.ppm | 0 .../simpleD3D11Texture/simpleD3D11Texture.cpp | 0 .../simpleD3D11Texture/texture_2d.cu | 0 .../simpleD3D11Texture/texture_3d.cu | 0 .../simpleD3D11Texture/texture_cube.cu | 0 .../simpleD3D12/CMakeLists.txt | 0 .../simpleD3D12/DX12CudaSample.cpp | 0 .../simpleD3D12/DX12CudaSample.h | 0 .../simpleD3D12/DXSampleHelper.h | 0 .../5_Domain_Specific/simpleD3D12/Main.cpp | 0 .../5_Domain_Specific/simpleD3D12/README.md | 0 .../simpleD3D12/ShaderStructs.h | 0 .../simpleD3D12/Win32Application.cpp | 0 .../simpleD3D12/Win32Application.h | 0 .../5_Domain_Specific/simpleD3D12/d3dx12.h | 0 .../simpleD3D12/shaders.hlsl | 0 .../simpleD3D12/simpleD3D12.cpp | 0 .../simpleD3D12/simpleD3D12.h | 0 .../simpleD3D12/sinewave_cuda.cu | 0 .../5_Domain_Specific/simpleD3D12/stdafx.cpp | 0 .../5_Domain_Specific/simpleD3D12/stdafx.h | 0 .../simpleGL/.vscode/c_cpp_properties.json | 0 .../simpleGL/.vscode/extensions.json | 0 .../5_Domain_Specific/simpleGL/CMakeLists.txt | 0 .../5_Domain_Specific/simpleGL/README.md | 0 .../simpleGL/data/ref_simpleGL.bin | Bin .../5_Domain_Specific/simpleGL/simpleGL.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../simpleVulkan/.vscode/extensions.json | 0 .../simpleVulkan/Build_instructions.txt | 0 .../simpleVulkan/CMakeLists.txt | 0 .../5_Domain_Specific/simpleVulkan/README.md | 0 .../simpleVulkan/SineWaveSimulation.cu | 0 .../simpleVulkan/SineWaveSimulation.h | 0 .../simpleVulkan/VulkanBaseApp.cpp | 0 .../simpleVulkan/VulkanBaseApp.h | 0 .../5_Domain_Specific/simpleVulkan/frag.spv | Bin .../5_Domain_Specific/simpleVulkan/linmath.h | 0 .../5_Domain_Specific/simpleVulkan/main.cpp | 0 .../simpleVulkan/sinewave.frag | 0 .../simpleVulkan/sinewave.vert | 0 .../5_Domain_Specific/simpleVulkan/vert.spv | Bin .../.vscode/c_cpp_properties.json | 0 .../simpleVulkanMMAP/.vscode/extensions.json | 0 .../simpleVulkanMMAP/Build_instructions.txt | 0 .../simpleVulkanMMAP/CMakeLists.txt | 0 .../simpleVulkanMMAP/MonteCarloPi.cu | 0 .../simpleVulkanMMAP/MonteCarloPi.h | 0 .../simpleVulkanMMAP/README.md | 0 .../simpleVulkanMMAP/VulkanBaseApp.cpp | 0 .../simpleVulkanMMAP/VulkanBaseApp.h | 0 .../simpleVulkanMMAP/VulkanCudaInterop.h | 0 .../simpleVulkanMMAP/frag.spv | Bin .../simpleVulkanMMAP/main.cpp | 0 .../simpleVulkanMMAP/montecarlo.frag | 0 .../simpleVulkanMMAP/montecarlo.vert | 0 .../simpleVulkanMMAP/vert.spv | Bin .../.vscode/c_cpp_properties.json | 0 .../smokeParticles/.vscode/extensions.json | 0 .../smokeParticles/CMakeLists.txt | 0 .../smokeParticles/GLSLProgram.cpp | 0 .../smokeParticles/GLSLProgram.h | 0 .../smokeParticles/GpuArray.h | 0 .../smokeParticles/ParticleSystem.cpp | 0 .../smokeParticles/ParticleSystem.cuh | 0 .../smokeParticles/ParticleSystem.h | 0 .../smokeParticles/ParticleSystem_cuda.cu | 0 .../smokeParticles/README.md | 0 .../smokeParticles/SmokeRenderer.cpp | 0 .../smokeParticles/SmokeRenderer.h | 0 .../smokeParticles/SmokeShaders.cpp | 0 .../smokeParticles/SmokeShaders.h | 0 .../smokeParticles/data/floortile.ppm | Bin .../smokeParticles/data/ref_smokePart_pos.bin | Bin .../smokeParticles/data/ref_smokePart_vel.bin | Bin .../smokeParticles/doc/screenshot_lg.png | Bin .../smokeParticles/doc/screenshot_md.png | Bin .../smokeParticles/doc/screenshot_sm.png | Bin .../smokeParticles/doc/smokeParticles.doc | Bin .../smokeParticles/doc/smokeParticles.pdf | Bin .../smokeParticles/framebufferObject.cpp | 0 .../smokeParticles/framebufferObject.h | 0 .../5_Domain_Specific/smokeParticles/nvMath.h | 0 .../smokeParticles/nvMatrix.h | 0 .../smokeParticles/nvQuaternion.h | 0 .../smokeParticles/nvVector.h | 0 .../smokeParticles/particleDemo.cpp | 0 .../smokeParticles/particles_kernel.cuh | 0 .../particles_kernel_device.cuh | 0 .../smokeParticles/renderbuffer.cpp | 0 .../smokeParticles/renderbuffer.h | 0 .../.vscode/c_cpp_properties.json | 0 .../stereoDisparity/.vscode/extensions.json | 0 .../stereoDisparity/CMakeLists.txt | 0 .../stereoDisparity/README.md | 0 .../data/stereo.im0.640x533.ppm | Bin .../data/stereo.im1.640x533.ppm | Bin .../stereoDisparity/stereoDisparity.cu | 0 .../stereoDisparity_kernel.cuh | 0 .../.vscode/c_cpp_properties.json | 0 .../volumeFiltering/.vscode/extensions.json | 0 .../volumeFiltering/CMakeLists.txt | 0 .../volumeFiltering/README.md | 0 .../volumeFiltering/data/Bucky.raw | Bin .../volumeFiltering/data/ref_volumefilter.ppm | Bin .../volumeFiltering/doc/sshot_lg.JPG | Bin .../volumeFiltering/doc/sshot_md.JPG | Bin .../volumeFiltering/doc/sshot_sm.JPG | Bin .../volumeFiltering/volume.cpp | 0 .../volumeFiltering/volume.h | 0 .../volumeFiltering/volumeFilter.h | 0 .../volumeFiltering/volumeFilter_kernel.cu | 0 .../volumeFiltering/volumeFiltering.cpp | 0 .../volumeFiltering/volumeRender.h | 0 .../volumeFiltering/volumeRender_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../volumeRender/.vscode/extensions.json | 0 .../volumeRender/CMakeLists.txt | 0 .../5_Domain_Specific/volumeRender/README.md | 0 .../volumeRender/data/Bucky.raw | Bin .../volumeRender/data/ref_volume.ppm | Bin .../volumeRender/doc/sshot_lg.JPG | Bin .../volumeRender/doc/sshot_md.jpg | Bin .../volumeRender/doc/sshot_sm.JPG | Bin .../5_Domain_Specific/volumeRender/volume.ppm | Bin .../volumeRender/volumeRender.cpp | 0 .../volumeRender/volumeRender_kernel.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../vulkanImageCUDA/.vscode/extensions.json | 0 .../vulkanImageCUDA/Build_instructions.txt | 0 .../vulkanImageCUDA/CMakeLists.txt | 0 .../vulkanImageCUDA/README.md | 0 .../vulkanImageCUDA/frag.spv | Bin .../vulkanImageCUDA/linmath.h | 0 .../vulkanImageCUDA/shader.frag | 0 .../vulkanImageCUDA/shader.vert | 0 .../vulkanImageCUDA/teapot1024.ppm | Bin .../vulkanImageCUDA/vert.spv | Bin .../vulkanImageCUDA/vulkanImageCUDA.cu | 0 {Samples => cpp}/6_Performance/CMakeLists.txt | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../LargeKernelParameter/CMakeLists.txt | 0 .../LargeKernelParameter.cu | 0 .../LargeKernelParameter/README.md | 0 {Samples => cpp}/6_Performance/README.md | 0 .../.vscode/c_cpp_properties.json | 0 .../UnifiedMemoryPerf/.vscode/extensions.json | 0 .../UnifiedMemoryPerf/CMakeLists.txt | 0 .../6_Performance/UnifiedMemoryPerf/README.md | 0 .../UnifiedMemoryPerf/commonDefs.hpp | 0 .../UnifiedMemoryPerf/commonKernels.cu | 0 .../UnifiedMemoryPerf/commonKernels.hpp | 0 .../UnifiedMemoryPerf/helperFunctions.cpp | 0 .../UnifiedMemoryPerf/matrixMultiplyPerf.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../alignedTypes/.vscode/extensions.json | 0 .../6_Performance/alignedTypes/CMakeLists.txt | 0 .../6_Performance/alignedTypes/README.md | 0 .../alignedTypes/alignedTypes.cu | 0 .../alignedTypes/doc/alignedTypes.txt | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cudaGraphsPerfScaling/CMakeLists.txt | 0 .../cudaGraphsPerfScaling/README.md | 0 .../cudaGraphPerfScaling.cu | 0 .../cudaGraphsPerfScaling/dataCollection.bash | 0 .../transpose/.vscode/c_cpp_properties.json | 0 .../transpose/.vscode/extensions.json | 0 .../6_Performance/transpose/CMakeLists.txt | 0 .../6_Performance/transpose/README.md | 0 .../transpose/doc/MatrixTranspose.docx | Bin .../transpose/doc/MatrixTranspose.pdf | Bin .../6_Performance/transpose/transpose.cu | 0 {Samples => cpp}/7_libNVVM/CMakeLists.txt | 0 {Samples => cpp}/7_libNVVM/README.md | 0 .../7_libNVVM/common/include/DDSWriter.h | 0 .../7_libNVVM/cuda-c-linking/CMakeLists.txt | 0 .../7_libNVVM/cuda-c-linking/README.md | 0 .../cuda-c-linking/cuda-c-linking.cpp | 0 .../7_libNVVM/cuda-c-linking/math-funcs.cu | 0 .../cuda-shared-memory/CMakeLists.txt | 0 .../extern_shared_memory.ll | 0 .../cuda-shared-memory/shared_memory.ll | 0 .../device-side-launch/CMakeLists.txt | 0 .../7_libNVVM/device-side-launch/README.md | 0 .../7_libNVVM/device-side-launch/dsl-gpu64.ll | 0 .../7_libNVVM/device-side-launch/dsl.c | 0 .../7_libNVVM/ptxgen/CMakeLists.txt | 0 {Samples => cpp}/7_libNVVM/ptxgen/README.md | 0 {Samples => cpp}/7_libNVVM/ptxgen/ptxgen.c | 0 {Samples => cpp}/7_libNVVM/ptxgen/test.ll | 0 .../7_libNVVM/simple/CMakeLists.txt | 0 {Samples => cpp}/7_libNVVM/simple/README.md | 0 .../7_libNVVM/simple/simple-gpu64.ll | 0 {Samples => cpp}/7_libNVVM/simple/simple.c | 0 .../7_libNVVM/syscalls/CMakeLists.txt | 0 .../7_libNVVM/syscalls/malloc-free.ll | 0 .../7_libNVVM/syscalls/vprintf.ll | 0 {Samples => cpp}/7_libNVVM/utils/build.bat | 0 {Samples => cpp}/7_libNVVM/utils/build.sh | 0 .../7_libNVVM/uvmlite/CMakeLists.txt | 0 {Samples => cpp}/7_libNVVM/uvmlite/README.md | 0 {Samples => cpp}/7_libNVVM/uvmlite/uvmlite.c | 0 .../7_libNVVM/uvmlite/uvmlite64.ll | 0 .../8_Platform_Specific/Tegra/CMakeLists.txt | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../EGLSync_CUDAEvent_Interop/CMakeLists.txt | 0 .../EGLSync_CUDAEvent_Interop.cu | 0 .../Tegra/EGLSync_CUDAEvent_Interop/README.md | 0 .../EGLSync_CUDAEvent_Interop/egl_common.h | 0 .../graphics_interface.h | 0 .../8_Platform_Specific/Tegra/README.md | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../Tegra/cuDLAErrorReporting/CMakeLists.txt | 0 .../Tegra/cuDLAErrorReporting/README.md | 0 .../Tegra/cuDLAErrorReporting/main.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../cuDLAHybridMode/.vscode/extensions.json | 0 .../Tegra/cuDLAHybridMode/CMakeLists.txt | 0 .../Tegra/cuDLAHybridMode/README.md | 0 .../Tegra/cuDLAHybridMode/main.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cuDLALayerwiseStatsHybrid/CMakeLists.txt | 0 .../Tegra/cuDLALayerwiseStatsHybrid/README.md | 0 .../Tegra/cuDLALayerwiseStatsHybrid/main.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../CMakeLists.txt | 0 .../cuDLALayerwiseStatsStandalone/README.md | 0 .../cuDLALayerwiseStatsStandalone/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../Tegra/cuDLAStandaloneMode/CMakeLists.txt | 0 .../Tegra/cuDLAStandaloneMode/README.md | 0 .../Tegra/cuDLAStandaloneMode/main.cpp | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../cudaNvSciBufMultiplanar/CMakeLists.txt | 0 .../Tegra/cudaNvSciBufMultiplanar/README.md | 0 .../cudaNvSciBufMultiplanar.cpp | 0 .../cudaNvSciBufMultiplanar.h | 0 .../cudaNvSciBufMultiplanar/imageKernels.cu | 0 .../Tegra/cudaNvSciBufMultiplanar/main.cpp | 0 .../yuv_planar_img1.yuv | Bin .../.vscode/c_cpp_properties.json | 0 .../cudaNvSciNvMedia/.vscode/extensions.json | 0 .../Tegra/cudaNvSciNvMedia/CMakeLists.txt | 0 .../Tegra/cudaNvSciNvMedia/README.md | 0 .../cudaNvSciNvMedia_Readme.pdf | Bin .../Tegra/cudaNvSciNvMedia/cuda_consumer.cu | 0 .../Tegra/cudaNvSciNvMedia/cuda_consumer.h | 0 .../Tegra/cudaNvSciNvMedia/main.cpp | 0 .../cudaNvSciNvMedia/nvmedia_producer.cpp | 0 .../Tegra/cudaNvSciNvMedia/nvmedia_producer.h | 0 .../nvmedia_utils/cmdline.cpp | 0 .../cudaNvSciNvMedia/nvmedia_utils/cmdline.h | 0 .../nvmedia_utils/config_parser.cpp | 0 .../nvmedia_utils/config_parser.h | 0 .../nvmedia_utils/image_utils.cpp | 0 .../nvmedia_utils/image_utils.h | 0 .../nvmedia_utils/log_utils.cpp | 0 .../nvmedia_utils/log_utils.h | 0 .../nvmedia_utils/misc_utils.cpp | 0 .../nvmedia_utils/misc_utils.h | 0 .../Tegra/cudaNvSciNvMedia/nvsci_setup.cpp | 0 .../Tegra/cudaNvSciNvMedia/nvsci_setup.h | 0 .../Tegra/cudaNvSciNvMedia/sample.cfg | 0 .../Tegra/cudaNvSciNvMedia/teapot.rgba | Bin .../fluidsGLES/.vscode/c_cpp_properties.json | 0 .../Tegra/fluidsGLES/.vscode/extensions.json | 0 .../Tegra/fluidsGLES/CMakeLists.txt | 0 .../Tegra/fluidsGLES/README.md | 0 .../Tegra/fluidsGLES/data/ref_fluidsGLES.ppm | Bin .../Tegra/fluidsGLES/defines.h | 0 .../Tegra/fluidsGLES/fluidsGLES.cpp | 0 .../Tegra/fluidsGLES/fluidsGLES_kernels.cu | 0 .../Tegra/fluidsGLES/fluidsGLES_kernels.cuh | 0 .../Tegra/fluidsGLES/fluidsGLES_kernels.h | 0 .../Tegra/fluidsGLES/graphics_interface.h | 0 .../Tegra/fluidsGLES/mesh.frag.glsl | 0 .../Tegra/fluidsGLES/mesh.vert.glsl | 0 .../.vscode/c_cpp_properties.json | 0 .../nbody_opengles/.vscode/extensions.json | 0 .../Tegra/nbody_opengles/CMakeLists.txt | 0 .../Tegra/nbody_opengles/README.md | 0 .../Tegra/nbody_opengles/bodysystem.h | 0 .../Tegra/nbody_opengles/bodysystemcpu.h | 0 .../Tegra/nbody_opengles/bodysystemcpu_impl.h | 0 .../Tegra/nbody_opengles/bodysystemcuda.cu | 0 .../Tegra/nbody_opengles/bodysystemcuda.h | 0 .../nbody_opengles/bodysystemcuda_impl.h | 0 .../Tegra/nbody_opengles/galaxy_20K.bin | Bin .../Tegra/nbody_opengles/nbody_opengles.cpp | 0 .../Tegra/nbody_opengles/render_particles.cpp | 0 .../Tegra/nbody_opengles/render_particles.h | 0 .../Tegra/nbody_opengles/tipsy.h | 0 .../simpleGLES/.vscode/c_cpp_properties.json | 0 .../Tegra/simpleGLES/.vscode/extensions.json | 0 .../Tegra/simpleGLES/CMakeLists.txt | 0 .../Tegra/simpleGLES/README.md | 0 .../Tegra/simpleGLES/data/ref_simpleGL.bin | Bin .../Tegra/simpleGLES/graphics_interface.c | 0 .../Tegra/simpleGLES/mesh.frag.glsl | 0 .../Tegra/simpleGLES/mesh.vert.glsl | 0 .../Tegra/simpleGLES/simpleGLES.cu | 0 .../.vscode/c_cpp_properties.json | 0 .../.vscode/extensions.json | 0 .../Tegra/simpleGLES_EGLOutput/CMakeLists.txt | 0 .../Tegra/simpleGLES_EGLOutput/README.md | 0 .../data/ref_simpleGLES_EGLOutput.bin | Bin .../graphics_interface_egloutput_via_egl.c | 0 .../Tegra/simpleGLES_EGLOutput/mesh.frag.glsl | 0 .../Tegra/simpleGLES_EGLOutput/mesh.vert.glsl | 0 .../simpleGLES_EGLOutput.cu | 0 {Samples => cpp}/CMakeLists.txt | 0 pyproject.toml | 59 ++ python/.gitignore | 220 +++++ .../blurImageUnifiedMemory/README.md | 168 ++++ .../blurImageUnifiedMemory.py | 269 +++++++ .../blurImageUnifiedMemory/requirements.txt | 6 + .../copyImageArraytoGPU/README.md | 119 +++ .../copyImageArraytoGPU.py | 239 ++++++ .../copyImageArraytoGPU/requirements.txt | 6 + python/1_GettingStarted/deviceQuery/README.md | 189 +++++ .../deviceQuery/deviceQuery.py | 389 +++++++++ .../deviceQuery/requirements.txt | 4 + .../kernelNsysProfile/README.md | 72 ++ .../kernelNsysProfile/kernelNsysProfile.py | 327 ++++++++ .../kernelNsysProfile/requirements.txt | 7 + python/1_GettingStarted/numpyVsCupy/README.md | 73 ++ .../numpyVsCupy/numpyVsCupy.py | 141 ++++ .../numpyVsCupy/requirements.txt | 7 + python/1_GettingStarted/simplePrint/README.md | 263 ++++++ .../simplePrint/requirements.txt | 7 + .../simplePrint/simplePrint.py | 287 +++++++ python/1_GettingStarted/systemInfo/README.md | 152 ++++ .../systemInfo/requirements.txt | 4 + .../1_GettingStarted/systemInfo/systemInfo.py | 215 +++++ python/1_GettingStarted/vectorAdd/README.md | 130 +++ .../vectorAdd/requirements.txt | 5 + .../1_GettingStarted/vectorAdd/vectorAdd.py | 196 +++++ python/2_CoreConcepts/blockwiseSum/README.md | 102 +++ .../blockwiseSum/blockwiseSum.py | 259 ++++++ .../blockwiseSum/requirements.txt | 6 + python/2_CoreConcepts/cudaGraphs/README.md | 140 ++++ .../2_CoreConcepts/cudaGraphs/cudaGraphs.py | 266 +++++++ .../cudaGraphs/requirements.txt | 5 + .../fftSignalAnalysis/README.md | 136 ++++ .../fftSignalAnalysis/fftSignalAnalysis.py | 318 ++++++++ .../fftSignalAnalysis/requirements.txt | 6 + python/2_CoreConcepts/greenContext/README.md | 250 ++++++ .../greenContext/greenContext.py | 752 ++++++++++++++++++ .../greenContext/requirements.txt | 3 + python/2_CoreConcepts/jitLtoLinking/README.md | 133 ++++ .../jitLtoLinking/jitLtoLinking.py | 223 ++++++ .../jitLtoLinking/requirements.txt | 5 + .../launchConfigTuning/README.md | 184 +++++ .../launchConfigTuning/launchConfigTuning.py | 388 +++++++++ .../launchConfigTuning/requirements.txt | 6 + .../matrixMulSharedMem/README.md | 163 ++++ .../matrixMulSharedMem/matrixMulSharedMem.py | 243 ++++++ .../matrixMulSharedMem/requirements.txt | 7 + .../2_CoreConcepts/memoryResources/README.md | 139 ++++ .../memoryResources/memoryResources.py | 248 ++++++ .../memoryResources/requirements.txt | 5 + python/2_CoreConcepts/pageRank/README.md | 158 ++++ python/2_CoreConcepts/pageRank/pageRank.py | 346 ++++++++ .../2_CoreConcepts/pageRank/requirements.txt | 8 + .../parallelHistogram/README.md | 117 +++ .../parallelHistogram/parallelHistogram.py | 237 ++++++ .../parallelHistogram/requirements.txt | 7 + .../parallelReduction/README.md | 119 +++ .../parallelReduction/parallelReduction.py | 375 +++++++++ .../parallelReduction/requirements.txt | 7 + python/2_CoreConcepts/prefixSum/README.md | 83 ++ python/2_CoreConcepts/prefixSum/prefixSum.py | 199 +++++ .../2_CoreConcepts/prefixSum/requirements.txt | 8 + .../processCheckpoint/README.md | 206 +++++ .../processCheckpoint/processCheckpoint.py | 248 ++++++ .../processCheckpoint/requirements.txt | 3 + python/2_CoreConcepts/reduction/README.md | 137 ++++ python/2_CoreConcepts/reduction/reduction.py | 485 +++++++++++ .../2_CoreConcepts/reduction/requirements.txt | 8 + .../reductionMultiBlockCG/README.md | 129 +++ .../reductionMultiBlockCG.py | 472 +++++++++++ .../reductionMultiBlockCG/requirements.txt | 10 + .../2_CoreConcepts/simpleZeroCopy/README.md | 141 ++++ .../simpleZeroCopy/requirements.txt | 6 + .../simpleZeroCopy/simpleZeroCopy.py | 275 +++++++ .../streamingCopyComputeOverlap/README.md | 98 +++ .../requirements.txt | 6 + .../streamingCopyComputeOverlap.py | 312 ++++++++ python/2_CoreConcepts/tmaTensorMap/README.md | 138 ++++ .../tmaTensorMap/requirements.txt | 4 + .../tmaTensorMap/tmaTensorMap.py | 281 +++++++ .../customPyTorchKernel/README.md | 54 ++ .../customPyTorchKernel.py | 390 +++++++++ .../customPyTorchKernel/requirements.txt | 5 + .../customTensorFlowKernel/README.md | 81 ++ .../customTensorFlowKernel.py | 430 ++++++++++ .../customTensorFlowKernel/requirements.txt | 14 + .../ipcMemoryPool/README.md | 140 ++++ .../ipcMemoryPool/ipcMemoryPool.py | 220 +++++ .../ipcMemoryPool/requirements.txt | 4 + .../multiGPUGradientAverage/README.md | 110 +++ .../multiGPUGradientAverage.py | 416 ++++++++++ .../multiGPUGradientAverage/requirements.txt | 21 + .../simpleP2P/README.md | 190 +++++ .../simpleP2P/requirements.txt | 6 + .../simpleP2P/simpleP2P.py | 377 +++++++++ python/Utilities/README.md | 134 ++++ python/Utilities/__init__.py | 47 ++ python/Utilities/cuda_samples_utils.py | 144 ++++ python/requirements.txt | 7 + 2072 files changed, 15077 insertions(+), 1970 deletions(-) delete mode 100644 Samples/0_Introduction/UnifiedMemoryStreams/.vscode/launch.json delete mode 100644 Samples/0_Introduction/asyncAPI/.vscode/launch.json delete mode 100644 Samples/0_Introduction/clock/.vscode/launch.json delete mode 100644 Samples/0_Introduction/clock_nvrtc/.vscode/launch.json delete mode 100644 Samples/0_Introduction/cudaOpenMP/.vscode/launch.json delete mode 100644 Samples/0_Introduction/fp16ScalarProduct/.vscode/launch.json delete mode 100644 Samples/0_Introduction/matrixMul/.vscode/launch.json delete mode 100644 Samples/0_Introduction/matrixMulDrv/.vscode/launch.json delete mode 100644 Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/launch.json delete mode 100644 Samples/0_Introduction/matrixMul_nvrtc/.vscode/launch.json delete mode 100644 Samples/0_Introduction/mergeSort/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAWBarrier/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAssert/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAssert_nvrtc/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleAttributes/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleCUDA2GL/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleCallback/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleCooperativeGroups/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleCubemapTexture/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleDrvRuntime/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleHyperQ/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleIPC/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleLayeredTexture/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleMPI/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleMultiCopy/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleMultiGPU/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleOccupancy/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleP2P/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simplePitchLinearTexture/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simplePrintf/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleStreams/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleSurfaceWrite/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleTemplates/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleTexture/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleTexture3D/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleTextureDrv/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleVoteIntrinsics/.vscode/launch.json delete mode 100644 Samples/0_Introduction/simpleZeroCopy/.vscode/launch.json delete mode 100644 Samples/0_Introduction/systemWideAtomics/.vscode/launch.json delete mode 100644 Samples/0_Introduction/template/.vscode/launch.json delete mode 100644 Samples/0_Introduction/vectorAdd/.vscode/launch.json delete mode 100644 Samples/0_Introduction/vectorAddDrv/.vscode/launch.json delete mode 100644 Samples/0_Introduction/vectorAddMMAP/.vscode/launch.json delete mode 100644 Samples/0_Introduction/vectorAdd_nvrtc/.vscode/launch.json delete mode 100644 Samples/1_Utilities/deviceQuery/.vscode/launch.json delete mode 100644 Samples/1_Utilities/deviceQueryDrv/.vscode/launch.json delete mode 100644 Samples/1_Utilities/topologyQuery/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/boxFilter/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/dct8x8/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/histogram/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/interval/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/particles/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/reduction/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/scalarProd/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/scan/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/launch.json delete mode 100644 Samples/2_Concepts_and_Techniques/threadMigration/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/StreamPriorities/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/binaryPartitionCG/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/bindlessTexture/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cdpQuadtree/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cdpSimplePrint/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/graphMemoryNodes/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/memMapIPCDrv/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/newdelete/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/ptxjit/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/launch.json delete mode 100644 Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradient/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cuSolverRf/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/cudaNvSci/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/jitLto/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/lineOfSight/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/nvJPEG/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/oceanFFT/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/randomFog/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/launch.json delete mode 100644 Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/BlackScholes/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/FDTD3d/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/HSOpticalFlow/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/Mandelbrot/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/SobelFilter/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/SobolQRNG/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/bicubicTexture/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/bilateralFilter/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/binomialOptions/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/convolutionFFT2D/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/dwtHaar1D/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/dxtc/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/fastWalshTransform/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/fluidsGL/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/marchingCubes/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/nbody/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/postProcessGL/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/quasirandomGenerator/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/recursiveGaussian/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/simpleGL/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/simpleVulkan/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/smokeParticles/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/stereoDisparity/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/volumeFiltering/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/volumeRender/.vscode/launch.json delete mode 100644 Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/launch.json delete mode 100644 Samples/6_Performance/LargeKernelParameter/.vscode/launch.json delete mode 100644 Samples/6_Performance/UnifiedMemoryPerf/.vscode/launch.json delete mode 100644 Samples/6_Performance/alignedTypes/.vscode/launch.json delete mode 100644 Samples/6_Performance/cudaGraphsPerfScaling/.vscode/launch.json delete mode 100644 Samples/6_Performance/transpose/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/launch.json delete mode 100644 Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/launch.json rename {Samples => cpp}/0_Introduction/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/README.md (100%) rename {Samples => cpp}/0_Introduction/UnifiedMemoryStreams/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/UnifiedMemoryStreams/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt (82%) rename {Samples => cpp}/0_Introduction/UnifiedMemoryStreams/README.md (100%) rename {Samples => cpp}/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu (100%) rename {Samples => cpp}/0_Introduction/asyncAPI/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/asyncAPI/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/asyncAPI/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/asyncAPI/README.md (100%) rename {Samples => cpp}/0_Introduction/asyncAPI/asyncAPI.cu (100%) rename {Samples => cpp}/0_Introduction/clock/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/clock/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/clock/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/clock/README.md (100%) rename {Samples => cpp}/0_Introduction/clock/clock.cu (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/README.md (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/clock.cpp (100%) rename {Samples => cpp}/0_Introduction/clock_nvrtc/clock_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/cudaOpenMP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/cudaOpenMP/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/cudaOpenMP/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/cudaOpenMP/README.md (100%) rename {Samples => cpp}/0_Introduction/cudaOpenMP/cudaOpenMP.cu (100%) rename {Samples => cpp}/0_Introduction/fp16ScalarProduct/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/fp16ScalarProduct/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/fp16ScalarProduct/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/fp16ScalarProduct/README.md (100%) rename {Samples => cpp}/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu (100%) rename {Samples => cpp}/0_Introduction/matrixMul/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/matrixMul/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/matrixMul/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/matrixMul/README.md (100%) rename {Samples => cpp}/0_Introduction/matrixMul/matrixMul.cu (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/README.md (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/matrixMul.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/matrixMulDrv.cpp (100%) rename {Samples => cpp}/0_Introduction/matrixMulDrv/matrixMul_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/README.md (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink_cuda.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/extras/README.TXT (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_32.ptx (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_64.ptx (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/extras/ptx2c.py (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT.cpp (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul_gold.cpp (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.c (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.h (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.c (100%) rename {Samples => cpp}/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.h (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/README.md (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/matrixMul.cpp (100%) rename {Samples => cpp}/0_Introduction/matrixMul_nvrtc/matrixMul_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/mergeSort/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/mergeSort/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/mergeSort/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/mergeSort/README.md (100%) rename {Samples => cpp}/0_Introduction/mergeSort/bitonic.cu (100%) rename {Samples => cpp}/0_Introduction/mergeSort/main.cpp (100%) rename {Samples => cpp}/0_Introduction/mergeSort/mergeSort.cu (100%) rename {Samples => cpp}/0_Introduction/mergeSort/mergeSort_common.h (100%) rename {Samples => cpp}/0_Introduction/mergeSort/mergeSort_host.cpp (100%) rename {Samples => cpp}/0_Introduction/mergeSort/mergeSort_validate.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleAWBarrier/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAWBarrier/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAWBarrier/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAWBarrier/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu (100%) rename {Samples => cpp}/0_Introduction/simpleAssert/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAssert/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAssert/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAssert/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAssert/simpleAssert.cu (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/simpleAssert.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleAssert_nvrtc/simpleAssert_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_cpu.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_kernel.cuh (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_cpu.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_kernel.cuh (100%) rename {Samples => cpp}/0_Introduction/simpleAttributes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleAttributes/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleAttributes/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleAttributes/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleAttributes/simpleAttributes.cu (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/main.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleCUDA2GL/simpleCUDA2GL.cu (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/multithreading.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/multithreading.h (100%) rename {Samples => cpp}/0_Introduction/simpleCallback/simpleCallback.cu (100%) rename {Samples => cpp}/0_Introduction/simpleCooperativeGroups/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleCooperativeGroups/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleCooperativeGroups/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleCooperativeGroups/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups.cu (100%) rename {Samples => cpp}/0_Introduction/simpleCubemapTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleCubemapTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleCubemapTexture/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleCubemapTexture/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleCubemapTexture/simpleCubemapTexture.cu (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleDrvRuntime/vectorAdd_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/doc/HyperQ.docx (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/doc/HyperQ.pdf (100%) rename {Samples => cpp}/0_Introduction/simpleHyperQ/simpleHyperQ.cu (100%) rename {Samples => cpp}/0_Introduction/simpleIPC/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleIPC/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleIPC/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleIPC/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleIPC/simpleIPC.cu (100%) rename {Samples => cpp}/0_Introduction/simpleLayeredTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleLayeredTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleLayeredTexture/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleLayeredTexture/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleLayeredTexture/simpleLayeredTexture.cu (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/simpleMPI.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/simpleMPI.cu (100%) rename {Samples => cpp}/0_Introduction/simpleMPI/simpleMPI.h (100%) rename {Samples => cpp}/0_Introduction/simpleMultiCopy/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleMultiCopy/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleMultiCopy/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleMultiCopy/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/simpleMultiGPU.cu (100%) rename {Samples => cpp}/0_Introduction/simpleMultiGPU/simpleMultiGPU.h (100%) rename {Samples => cpp}/0_Introduction/simpleOccupancy/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleOccupancy/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleOccupancy/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleOccupancy/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleOccupancy/simpleOccupancy.cu (100%) rename {Samples => cpp}/0_Introduction/simpleP2P/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleP2P/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleP2P/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleP2P/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleP2P/simpleP2P.cu (100%) rename {Samples => cpp}/0_Introduction/simplePitchLinearTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simplePitchLinearTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simplePitchLinearTexture/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simplePitchLinearTexture/README.md (100%) rename {Samples => cpp}/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu (100%) rename {Samples => cpp}/0_Introduction/simplePrintf/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simplePrintf/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simplePrintf/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simplePrintf/README.md (100%) rename {Samples => cpp}/0_Introduction/simplePrintf/simplePrintf.cu (100%) rename {Samples => cpp}/0_Introduction/simpleStreams/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleStreams/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleStreams/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleStreams/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleStreams/simpleStreams.cu (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/data/ref_rotated.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/data/teapot512.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite.cu (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/sharedmem.cuh (100%) rename {Samples => cpp}/0_Introduction/simpleTemplates/simpleTemplates.cu (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/data/ref_rotated.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/data/teapot512.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/data/teapot512_out.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTexture/simpleTexture.cu (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/data/Bucky.raw (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/data/ref_texture3D.bin (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/doc/sshot_md.JPG (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/simpleTexture3D.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleTexture3D/simpleTexture3D_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/data/ref_rotated.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/data/teapot512.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/data/teapot512_out.pgm (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp (100%) rename {Samples => cpp}/0_Introduction/simpleTextureDrv/simpleTexture_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics.cu (100%) rename {Samples => cpp}/0_Introduction/simpleVoteIntrinsics/simpleVote_kernel.cuh (100%) rename {Samples => cpp}/0_Introduction/simpleZeroCopy/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/simpleZeroCopy/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/simpleZeroCopy/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/simpleZeroCopy/README.md (100%) rename {Samples => cpp}/0_Introduction/simpleZeroCopy/simpleZeroCopy.cu (100%) rename {Samples => cpp}/0_Introduction/systemWideAtomics/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/systemWideAtomics/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/systemWideAtomics/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/systemWideAtomics/README.md (100%) rename {Samples => cpp}/0_Introduction/systemWideAtomics/systemWideAtomics.cu (100%) rename {Samples => cpp}/0_Introduction/template/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/template/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/template/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/template/README.md (100%) rename {Samples => cpp}/0_Introduction/template/template.cu (100%) rename {Samples => cpp}/0_Introduction/template/template_cpu.cpp (100%) rename {Samples => cpp}/0_Introduction/vectorAdd/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/vectorAdd/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/vectorAdd/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/vectorAdd/README.md (100%) rename {Samples => cpp}/0_Introduction/vectorAdd/vectorAdd.cu (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/README.md (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/vectorAddDrv.cpp (100%) rename {Samples => cpp}/0_Introduction/vectorAddDrv/vectorAdd_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/README.md (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.cpp (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.hpp (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp (100%) rename {Samples => cpp}/0_Introduction/vectorAddMMAP/vectorAdd_kernel.cu (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/README.md (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/vectorAdd.cpp (100%) rename {Samples => cpp}/0_Introduction/vectorAdd_nvrtc/vectorAdd_kernel.cu (100%) rename {Samples => cpp}/1_Utilities/CMakeLists.txt (100%) rename {Samples => cpp}/1_Utilities/README.md (100%) rename {Samples => cpp}/1_Utilities/deviceQuery/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/1_Utilities/deviceQuery/.vscode/extensions.json (100%) rename {Samples => cpp}/1_Utilities/deviceQuery/CMakeLists.txt (100%) rename {Samples => cpp}/1_Utilities/deviceQuery/README.md (100%) rename {Samples => cpp}/1_Utilities/deviceQuery/deviceQuery.cpp (100%) rename {Samples => cpp}/1_Utilities/deviceQueryDrv/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/1_Utilities/deviceQueryDrv/.vscode/extensions.json (100%) rename {Samples => cpp}/1_Utilities/deviceQueryDrv/CMakeLists.txt (100%) rename {Samples => cpp}/1_Utilities/deviceQueryDrv/README.md (100%) rename {Samples => cpp}/1_Utilities/deviceQueryDrv/deviceQueryDrv.cpp (100%) rename {Samples => cpp}/1_Utilities/topologyQuery/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/1_Utilities/topologyQuery/.vscode/extensions.json (100%) rename {Samples => cpp}/1_Utilities/topologyQuery/CMakeLists.txt (100%) rename {Samples => cpp}/1_Utilities/topologyQuery/README.md (100%) rename {Samples => cpp}/1_Utilities/topologyQuery/topologyQuery.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/helper.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/kernel.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_1.yuv (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_2.yuv (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/data/ref_orig.pgm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/data/ref_shared.pgm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/data/ref_tex.pgm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/FunctionPointers/data/teapot512.pgm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/cudasharedmem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/piestimator.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/test.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/piestimator.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/test.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/cudasharedmem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/piestimator.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/test.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/piestimator.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/test.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/inc/cudasharedmem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/inc/piestimator.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/inc/test.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/src/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/src/piestimator.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiP/src/test.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/cudasharedmem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/piestimator.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/test.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/src/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/src/piestimator.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_EstimatePiQ/src/test.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/asianoption.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/cudasharedmem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/pricingengine.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/test.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/pricingengine.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/test.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/boxFilter.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/boxFilter_cpu.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/boxFilter_kernel.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/data/ref_14.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/data/ref_22.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/boxFilter/data/teapot1024.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_gold.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.vsd (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionSeparable/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_gold.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/doc/Performance.xls (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/convolutionTexture/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/BmpUtil.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/Common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/data/teapot512.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/data/teapot512.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/dct8x8.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel1.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel2.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_quantization.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_short.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks1.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks2.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks3.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/CosineBasis.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/Cosines.xls (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/DctJpeg.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/barbara.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/barbara_lg.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/barbara_md.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/barbara_sm.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/teapot512_cuda1.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/teapot512_cuda2.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/teapot512_cuda_short.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/teapot512_gold1.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/dct8x8/teapot512_gold2.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_multi.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_onei.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_small.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_large.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_large.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_small.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_small.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/bisect_util.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/config.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/data/diagonal.dat (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/data/reference.dat (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/data/superdiagonal.dat (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/gerschgorin.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/gerschgorin.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/main.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/matlab.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/matlab.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/structs.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/eigenvalues/util.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/doc/histogram.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/doc/histogram.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/doc/histogram.vsd (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/histogram256.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/histogram64.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/histogram_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/histogram_gold.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/histogram/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/bmploader.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/data/portrait_noise.bmp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/data/ref_knn.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm2.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/data/ref_passthru.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/doc/NLM_lg.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/doc/NLM_md.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/doc/NLM_sm.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoisingGL.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising_copy_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising_knn_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm2_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX/inlinePTX.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_kernel.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi/borland_prefix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi/borland_suffix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_prefix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_suffix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi_prefix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/abi_suffix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/auto_link.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/borland.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/codegear.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/comeau.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/common_edg.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/compaq_cxx.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/digitalmars.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc_xml.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/greenhills.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/hp_acc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/intel.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/kai.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/metrowerks.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/mpw.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/pgi.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/sgi_mipspro.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/sunpro_cc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/vacpp.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/compiler/visualc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/no_tr1/cmath.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/no_tr1/complex.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/no_tr1/functional.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/no_tr1/memory.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/no_tr1/utility.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/aix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/amigaos.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/beos.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/bsd.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/cygwin.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/hpux.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/irix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/linux.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/macos.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/qnxnto.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/solaris.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/vxworks.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/platform/win32.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/posix_features.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/requires_threads.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/select_compiler_config.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/select_platform_config.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/select_stdlib_config.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/dinkumware.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/libcomo.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/libstdcpp3.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/modena.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/msl.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/roguewave.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/sgi.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/stlport.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/stdlib/vacpp.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/suffix.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/user.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/config/warning_disable.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/limits.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith2.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith3.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/checking.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/certain.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/explicit.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/lexicographic.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/possible.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/set.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/tribool.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/constants.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/alpha_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bcc_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bugs.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99sub_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/division.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ia64_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/interval_prototype.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/msvc_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ppc_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/sparc_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/test_input.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86gcc_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/integer.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/x86_fast_rounding_control.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/hw_rounding.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/interval.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/io.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/limits.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/policies.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_arith.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_transc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounding.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/transc.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/boost/numeric/interval/utility.hpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/cpu_interval.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/cuda_interval.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/cuda_interval_lib.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/cuda_interval_rounded_arith.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/interval.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/interval/interval.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/data/ref_particles.bin (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/doc/particles.doc (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/doc/particles.pdf (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/doc/screenshot_lg.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/doc/screenshot_md.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/doc/screenshot_sm.png (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particleSystem.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particleSystem.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particleSystem.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particles.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particles_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/render_particles.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/render_particles.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/shaders.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/particles/shaders.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/doc/readme.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/reduction.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/reduction.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reduction/reduction_kernel.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/scalarProd.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/scalarProd_cpu.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scalarProd/scalarProd_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/scan.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/scan_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/scan/scan_gold.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/common.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_00.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_09.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/data/test.ppm (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/shfl_integral_image.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/shfl_scan/util.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/bitonicSort.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/main.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/oddEvenMergeSort.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_validate.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocation/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.cu (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.h (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_kernel.cuh (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/.vscode/extensions.json (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/README.md (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp (100%) rename {Samples => cpp}/2_Concepts_and_Techniques/threadMigration/threadMigration_kernel.cu (100%) rename {Samples => cpp}/3_CUDA_Features/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/StreamPriorities/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/StreamPriorities/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/StreamPriorities/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/StreamPriorities/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/StreamPriorities/StreamPriorities.cu (100%) rename {Samples => cpp}/3_CUDA_Features/bf16TensorCoreGemm/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/bf16TensorCoreGemm/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/bf16TensorCoreGemm/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu (100%) rename {Samples => cpp}/3_CUDA_Features/binaryPartitionCG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/binaryPartitionCG/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/binaryPartitionCG/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG.cu (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/bindlessTexture.cpp (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/bindlessTexture.h (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/bindlessTexture_kernel.cu (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/data/flower.ppm (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/data/person.ppm (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/data/ref_bindlessTexture.bin (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/data/sponge.ppm (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/doc/sshot_md.JPG (100%) rename {Samples => cpp}/3_CUDA_Features/bindlessTexture/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/cdpBitonicSort.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cdpAdvancedQuicksort/cdpQuicksort.h (100%) rename {Samples => cpp}/3_CUDA_Features/cdpBezierTessellation/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpBezierTessellation/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpBezierTessellation/BezierLineCDP.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cdpBezierTessellation/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cdpQuadtree/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpQuadtree/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpQuadtree/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cdpQuadtree/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimplePrint/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimplePrint/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimplePrint/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimpleQuicksort/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimpleQuicksort/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimpleQuicksort/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/compMalloc.cpp (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/compMalloc.h (100%) rename {Samples => cpp}/3_CUDA_Features/cudaCompressibleMemory/saxpy.cu (100%) rename {Samples => cpp}/3_CUDA_Features/cudaTensorCoreGemm/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/cudaTensorCoreGemm/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/cudaTensorCoreGemm/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu (100%) rename {Samples => cpp}/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/dmmaTensorCoreGemm/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu (100%) rename {Samples => cpp}/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/globalToShmemAsyncCopy/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu (100%) rename {Samples => cpp}/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/graphConditionalNodes/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryFootprint/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryFootprint/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryFootprint/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryNodes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryNodes/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryNodes/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes.cu (100%) rename {Samples => cpp}/3_CUDA_Features/immaTensorCoreGemm/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/immaTensorCoreGemm/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/immaTensorCoreGemm/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm.cu (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/jacobi.cu (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/jacobi.h (100%) rename {Samples => cpp}/3_CUDA_Features/jacobiCudaGraphs/main.cpp (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp (100%) rename {Samples => cpp}/3_CUDA_Features/memMapIPCDrv/memMapIpc_kernel.cu (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/container.hpp (100%) rename {Samples => cpp}/3_CUDA_Features/newdelete/newdelete.cu (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/ptxjit.cpp (100%) rename {Samples => cpp}/3_CUDA_Features/ptxjit/ptxjit_kernel.cu (100%) rename {Samples => cpp}/3_CUDA_Features/simpleCudaGraphs/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/simpleCudaGraphs/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/simpleCudaGraphs/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs.cu (100%) rename {Samples => cpp}/3_CUDA_Features/tf32TensorCoreGemm/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/tf32TensorCoreGemm/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/tf32TensorCoreGemm/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm.cu (100%) rename {Samples => cpp}/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/extensions.json (100%) rename {Samples => cpp}/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt (100%) rename {Samples => cpp}/3_CUDA_Features/warpAggregatedAtomicsCG/README.md (100%) rename {Samples => cpp}/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm (100%) rename {Samples => cpp}/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwister.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/MersenneTwisterGP11213/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/boxFilterNPP/teapot512.pgm (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512.pgm (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradient/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradient/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradient/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradient/main.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu (98%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientPrecond/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientUM/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientUM/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientUM/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/conjugateGradientUM/main.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/gr_900_900_crg.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.c (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio_wrapper.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/lap2D_5pt_n100.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/lap3D_7pt_n20.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/mmio.c (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/mmio.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverRf/mmio_wrapper.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.c (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio_wrapper.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap2D_5pt_n100.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap3D_7pt_n20.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.c (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio_wrapper.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.c (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio_wrapper.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/cudaNvSci.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/imageKernels.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/main.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/cudaNvSci/teapot1024.ppm (100%) rename {Samples => cpp}/4_CUDA_Libraries/freeImageInteropNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/freeImageInteropNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/freeImageInteropNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/histEqualizationNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/histEqualizationNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/histEqualizationNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/jitLto/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/jitLto/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/jitLto/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/jitLto/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/jitLto/jitLto.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/lineOfSight/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/lineOfSight/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/lineOfSight/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/lineOfSight/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/lineOfSight/lineOfSight.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/matrixMulCUBLAS/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img1.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img2.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img3.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img4.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img5.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img6.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img7.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/images/img8.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG/nvJPEG.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img1.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img2.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img3.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img4.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img5.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img6.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img7.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img8.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img1.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img2.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img3.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img4.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img5.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img6.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img7.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/images/img8.jpg (100%) rename {Samples => cpp}/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/data/ocean.frag (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/data/ocean.vert (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/data/ref_slopeShading.bin (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/data/ref_spatialDomain.bin (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/data/reference.ppm (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/doc/sshot_lg.png (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/doc/sshot_md.png (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/doc/sshot_sm.png (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/oceanFFT.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/oceanFFT/oceanFFT_kernel.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/data/ref_randomFog.bin (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/randomFog.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/rng.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/randomFog/rng.h (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLASXT/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLASXT/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLASXT/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS_LU/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU.cpp (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_callback/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/simpleCUFFT_callback/simpleCUFFT_callback.cu (100%) rename {Samples => cpp}/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/extensions.json (100%) rename {Samples => cpp}/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt (100%) rename {Samples => cpp}/4_CUDA_Libraries/watershedSegmentationNPP/README.md (100%) rename {Samples => cpp}/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/BlackScholes.cu (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/BlackScholes_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/BlackScholes_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/doc/BlackScholes.doc (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes/doc/BlackScholes.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/BlackScholes_nvrtc/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/inc/FDTD3d.h (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/inc/FDTD3dGPUKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/inc/FDTD3dReference.h (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/src/FDTD3d.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu (100%) rename {Samples => cpp}/5_Domain_Specific/FDTD3d/src/FDTD3dReference.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/addKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/common.h (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/data/frame10.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/data/frame11.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/derivativesKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.docx (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/downscaleKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/flowCUDA.cu (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/flowCUDA.h (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/flowGold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/flowGold.h (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/solverKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/upscaleKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/HSOpticalFlow/warpingKernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.h (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.h (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp32.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp64.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/data/params.txt (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp32.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp64.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/doc/sshot_md.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/Mandelbrot/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_reduction.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.doc (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/multithreading.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/multithreading.h (100%) rename {Samples => cpp}/5_Domain_Specific/MonteCarloMultiGPU/realtype.h (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/bgr_resize.cu (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/data/test1280x720.nv12 (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/data/test1920x1080.nv12 (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/data/test640x480.nv12 (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/nv12_resize.cu (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/nv12_to_bgr_planar.cu (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/resize_convert.h (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/resize_convert_main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/utils.cu (100%) rename {Samples => cpp}/5_Domain_Specific/NV12toBGRandResize/utils.h (100%) rename {Samples => cpp}/5_Domain_Specific/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/SobelFilter.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/SobelFilter_kernels.cu (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/SobelFilter_kernels.h (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/data/ref_orig.pgm (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/data/ref_shared.pgm (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/data/ref_tex.pgm (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/data/teapot.pgm (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/doc/sshot_md.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/SobelFilter/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol.h (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_gold.h (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_gpu.cu (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_gpu.h (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_primitives.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/SobolQRNG/sobol_primitives.h (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/bicubicTexture.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/bicubicTexture_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/bicubicTexture_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/0_nearest.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/1_bilinear.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/2_bicubic.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/3_fastbicubic.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/4_catmull-rom.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bicubicTexture/data/teapot512.pgm (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/bilateralFilter.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/bilateralFilter_cpu.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/bilateral_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/bmploader.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/data/nature_monte.bmp (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/data/ref_05.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/data/ref_06.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/data/ref_07.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/bilateralFilter/data/ref_08.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/binomialOptions.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/binomialOptions_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/doc/binomialOptions.doc (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/doc/binomialOptions.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions/realtype.h (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gpu.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/common_gpu_header.h (100%) rename {Samples => cpp}/5_Domain_Specific/binomialOptions_nvrtc/realtype.h (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cu (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/convolutionFFT2D/main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/regression.gold.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/regression_2_14.gold.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/regression_2_18.gold.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/signal.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/signal_2_14.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/data/signal_2_18.dat (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu (100%) rename {Samples => cpp}/5_Domain_Specific/dwtHaar1D/dwtHaar1D_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/CudaMath.h (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/data/teapot512_ref.dds (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/data/teapot512_std.dds (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/data/teapot512_std.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/dds.h (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/doc/cuda_dxtc.doc (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/dxtc.cu (100%) rename {Samples => cpp}/5_Domain_Specific/dxtc/permutations.h (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/doc/FWT.doc (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/fastWalshTransform.cu (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/fastWalshTransform_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/fastWalshTransform/fastWalshTransform_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/data/ref_fluidsGL.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/defines.h (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/doc/fluidsGL.doc (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/doc/fluidsGL.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/doc/fluidsGL_lg.gif (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/doc/fluidsGL_md.gif (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/doc/fluidsGL_sm.gif (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/fluidsGL.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cu (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/fluidsGL/fluidsGL_kernels.h (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/data/Bucky.raw (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/data/compVoxelArray.bin (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/data/normalArray.bin (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/data/posArray.bin (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/data/ref_march_cubes.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/defines.h (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/doc/screenshot_lg.png (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/doc/screenshot_md.png (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/doc/screenshot_sm.png (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/marchingCubes.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/marchingCubes_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/marchingCubes/tables.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystem.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystemcpu.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystemcpu_impl.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystemcuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystemcuda.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/bodysystemcuda_impl.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/doc/nbody_gems3_ch31.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/doc/screenshot_lg.jpg (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/doc/screenshot_md.jpg (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/doc/screenshot_sm.jpg (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/nbody.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/render_particles.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/render_particles.h (100%) rename {Samples => cpp}/5_Domain_Specific/nbody/tipsy.h (100%) rename {Samples => cpp}/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/p2pBandwidthLatencyTest/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/data/teapot_2.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/data/teapot_4.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/data/teapot_8.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/data/teapot_orig.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/doc/postProcessGL_lg.gif (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/doc/postProcessGL_md.gif (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/doc/postProcessGL_sm.gif (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/postProcessGL/postProcessGL.cu (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_common.h (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gold.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gpu.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/data/ref_10.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/data/ref_14.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/data/ref_18.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/data/ref_22.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/data/teapot512.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/recursiveGaussian.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/recursiveGaussian_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/recursiveGaussian/recursiveGaussian_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/ShaderStructs.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/data/ref_simpleD3D11.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/simpleD3D11.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/sinewave_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11/sinewave_cuda.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/d3dx11effect/d3dx11effect.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/data/ref_simpleD3D11Texture.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/texture_2d.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/texture_3d.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D11Texture/texture_cube.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/DX12CudaSample.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/DX12CudaSample.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/DXSampleHelper.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/Main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/ShaderStructs.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/Win32Application.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/Win32Application.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/d3dx12.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/shaders.hlsl (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/simpleD3D12.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/simpleD3D12.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/sinewave_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/stdafx.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleD3D12/stdafx.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/data/ref_simpleGL.bin (100%) rename {Samples => cpp}/5_Domain_Specific/simpleGL/simpleGL.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/Build_instructions.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/SineWaveSimulation.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/frag.spv (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/linmath.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/sinewave.frag (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/sinewave.vert (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkan/vert.spv (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/Build_instructions.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/VulkanCudaInterop.h (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/frag.spv (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/main.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/montecarlo.frag (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/montecarlo.vert (100%) rename {Samples => cpp}/5_Domain_Specific/simpleVulkanMMAP/vert.spv (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/GLSLProgram.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/GLSLProgram.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/GpuArray.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/ParticleSystem.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/ParticleSystem.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/ParticleSystem.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/SmokeRenderer.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/SmokeRenderer.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/SmokeShaders.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/SmokeShaders.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/data/floortile.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/data/ref_smokePart_pos.bin (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/data/ref_smokePart_vel.bin (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/doc/screenshot_lg.png (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/doc/screenshot_md.png (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/doc/screenshot_sm.png (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/doc/smokeParticles.doc (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/doc/smokeParticles.pdf (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/framebufferObject.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/framebufferObject.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/nvMath.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/nvMatrix.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/nvQuaternion.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/nvVector.h (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/particleDemo.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/particles_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/renderbuffer.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/smokeParticles/renderbuffer.h (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/data/stereo.im0.640x533.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/data/stereo.im1.640x533.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/stereoDisparity.cu (100%) rename {Samples => cpp}/5_Domain_Specific/stereoDisparity/stereoDisparity_kernel.cuh (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/data/Bucky.raw (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/data/ref_volumefilter.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/doc/sshot_md.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volume.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volume.h (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volumeFilter.h (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volumeFilter_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volumeFiltering.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volumeRender.h (100%) rename {Samples => cpp}/5_Domain_Specific/volumeFiltering/volumeRender_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/data/Bucky.raw (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/data/ref_volume.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/doc/sshot_lg.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/doc/sshot_md.jpg (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/doc/sshot_sm.JPG (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/volume.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/volumeRender.cpp (100%) rename {Samples => cpp}/5_Domain_Specific/volumeRender/volumeRender_kernel.cu (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/.vscode/extensions.json (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/Build_instructions.txt (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/README.md (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/frag.spv (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/linmath.h (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/shader.frag (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/shader.vert (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/teapot1024.ppm (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/vert.spv (100%) rename {Samples => cpp}/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu (100%) rename {Samples => cpp}/6_Performance/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/LargeKernelParameter/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/6_Performance/LargeKernelParameter/.vscode/extensions.json (100%) rename {Samples => cpp}/6_Performance/LargeKernelParameter/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/LargeKernelParameter/LargeKernelParameter.cu (100%) rename {Samples => cpp}/6_Performance/LargeKernelParameter/README.md (100%) rename {Samples => cpp}/6_Performance/README.md (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/.vscode/extensions.json (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/README.md (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/commonDefs.hpp (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/commonKernels.cu (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/commonKernels.hpp (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/helperFunctions.cpp (100%) rename {Samples => cpp}/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu (100%) rename {Samples => cpp}/6_Performance/alignedTypes/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/6_Performance/alignedTypes/.vscode/extensions.json (100%) rename {Samples => cpp}/6_Performance/alignedTypes/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/alignedTypes/README.md (100%) rename {Samples => cpp}/6_Performance/alignedTypes/alignedTypes.cu (100%) rename {Samples => cpp}/6_Performance/alignedTypes/doc/alignedTypes.txt (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/.vscode/extensions.json (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/README.md (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/cudaGraphPerfScaling.cu (100%) rename {Samples => cpp}/6_Performance/cudaGraphsPerfScaling/dataCollection.bash (100%) rename {Samples => cpp}/6_Performance/transpose/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/6_Performance/transpose/.vscode/extensions.json (100%) rename {Samples => cpp}/6_Performance/transpose/CMakeLists.txt (100%) rename {Samples => cpp}/6_Performance/transpose/README.md (100%) rename {Samples => cpp}/6_Performance/transpose/doc/MatrixTranspose.docx (100%) rename {Samples => cpp}/6_Performance/transpose/doc/MatrixTranspose.pdf (100%) rename {Samples => cpp}/6_Performance/transpose/transpose.cu (100%) rename {Samples => cpp}/7_libNVVM/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/README.md (100%) rename {Samples => cpp}/7_libNVVM/common/include/DDSWriter.h (100%) rename {Samples => cpp}/7_libNVVM/cuda-c-linking/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/cuda-c-linking/README.md (100%) rename {Samples => cpp}/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp (100%) rename {Samples => cpp}/7_libNVVM/cuda-c-linking/math-funcs.cu (100%) rename {Samples => cpp}/7_libNVVM/cuda-shared-memory/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/cuda-shared-memory/extern_shared_memory.ll (100%) rename {Samples => cpp}/7_libNVVM/cuda-shared-memory/shared_memory.ll (100%) rename {Samples => cpp}/7_libNVVM/device-side-launch/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/device-side-launch/README.md (100%) rename {Samples => cpp}/7_libNVVM/device-side-launch/dsl-gpu64.ll (100%) rename {Samples => cpp}/7_libNVVM/device-side-launch/dsl.c (100%) rename {Samples => cpp}/7_libNVVM/ptxgen/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/ptxgen/README.md (100%) rename {Samples => cpp}/7_libNVVM/ptxgen/ptxgen.c (100%) rename {Samples => cpp}/7_libNVVM/ptxgen/test.ll (100%) rename {Samples => cpp}/7_libNVVM/simple/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/simple/README.md (100%) rename {Samples => cpp}/7_libNVVM/simple/simple-gpu64.ll (100%) rename {Samples => cpp}/7_libNVVM/simple/simple.c (100%) rename {Samples => cpp}/7_libNVVM/syscalls/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/syscalls/malloc-free.ll (100%) rename {Samples => cpp}/7_libNVVM/syscalls/vprintf.ll (100%) rename {Samples => cpp}/7_libNVVM/utils/build.bat (100%) rename {Samples => cpp}/7_libNVVM/utils/build.sh (100%) rename {Samples => cpp}/7_libNVVM/uvmlite/CMakeLists.txt (100%) rename {Samples => cpp}/7_libNVVM/uvmlite/README.md (100%) rename {Samples => cpp}/7_libNVVM/uvmlite/uvmlite.c (100%) rename {Samples => cpp}/7_libNVVM/uvmlite/uvmlite64.ll (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAErrorReporting/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAErrorReporting/main.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAHybridMode/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAHybridMode/main.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/main.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/main.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAStandaloneMode/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cuDLAStandaloneMode/main.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cudaNvSciNvMedia_Readme.pdf (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/main.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/sample.cfg (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/cudaNvSciNvMedia/teapot.rgba (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/data/ref_fluidsGLES.ppm (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/defines.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cuh (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/graphics_interface.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/mesh.frag.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/fluidsGLES/mesh.vert.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystem.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu_impl.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda_impl.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/galaxy_20K.bin (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/nbody_opengles.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/render_particles.cpp (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/render_particles.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/nbody_opengles/tipsy.h (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/data/ref_simpleGL.bin (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/graphics_interface.c (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/mesh.frag.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/mesh.vert.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES/simpleGLES.cu (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/c_cpp_properties.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/extensions.json (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/README.md (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/data/ref_simpleGLES_EGLOutput.bin (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/graphics_interface_egloutput_via_egl.c (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.frag.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.vert.glsl (100%) rename {Samples => cpp}/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/simpleGLES_EGLOutput.cu (100%) rename {Samples => cpp}/CMakeLists.txt (100%) create mode 100644 pyproject.toml create mode 100644 python/.gitignore create mode 100644 python/1_GettingStarted/blurImageUnifiedMemory/README.md create mode 100644 python/1_GettingStarted/blurImageUnifiedMemory/blurImageUnifiedMemory.py create mode 100644 python/1_GettingStarted/blurImageUnifiedMemory/requirements.txt create mode 100644 python/1_GettingStarted/copyImageArraytoGPU/README.md create mode 100644 python/1_GettingStarted/copyImageArraytoGPU/copyImageArraytoGPU.py create mode 100644 python/1_GettingStarted/copyImageArraytoGPU/requirements.txt create mode 100644 python/1_GettingStarted/deviceQuery/README.md create mode 100755 python/1_GettingStarted/deviceQuery/deviceQuery.py create mode 100644 python/1_GettingStarted/deviceQuery/requirements.txt create mode 100644 python/1_GettingStarted/kernelNsysProfile/README.md create mode 100644 python/1_GettingStarted/kernelNsysProfile/kernelNsysProfile.py create mode 100644 python/1_GettingStarted/kernelNsysProfile/requirements.txt create mode 100644 python/1_GettingStarted/numpyVsCupy/README.md create mode 100644 python/1_GettingStarted/numpyVsCupy/numpyVsCupy.py create mode 100644 python/1_GettingStarted/numpyVsCupy/requirements.txt create mode 100644 python/1_GettingStarted/simplePrint/README.md create mode 100644 python/1_GettingStarted/simplePrint/requirements.txt create mode 100644 python/1_GettingStarted/simplePrint/simplePrint.py create mode 100644 python/1_GettingStarted/systemInfo/README.md create mode 100644 python/1_GettingStarted/systemInfo/requirements.txt create mode 100644 python/1_GettingStarted/systemInfo/systemInfo.py create mode 100644 python/1_GettingStarted/vectorAdd/README.md create mode 100644 python/1_GettingStarted/vectorAdd/requirements.txt create mode 100755 python/1_GettingStarted/vectorAdd/vectorAdd.py create mode 100644 python/2_CoreConcepts/blockwiseSum/README.md create mode 100644 python/2_CoreConcepts/blockwiseSum/blockwiseSum.py create mode 100644 python/2_CoreConcepts/blockwiseSum/requirements.txt create mode 100644 python/2_CoreConcepts/cudaGraphs/README.md create mode 100644 python/2_CoreConcepts/cudaGraphs/cudaGraphs.py create mode 100644 python/2_CoreConcepts/cudaGraphs/requirements.txt create mode 100644 python/2_CoreConcepts/fftSignalAnalysis/README.md create mode 100644 python/2_CoreConcepts/fftSignalAnalysis/fftSignalAnalysis.py create mode 100644 python/2_CoreConcepts/fftSignalAnalysis/requirements.txt create mode 100644 python/2_CoreConcepts/greenContext/README.md create mode 100644 python/2_CoreConcepts/greenContext/greenContext.py create mode 100644 python/2_CoreConcepts/greenContext/requirements.txt create mode 100644 python/2_CoreConcepts/jitLtoLinking/README.md create mode 100644 python/2_CoreConcepts/jitLtoLinking/jitLtoLinking.py create mode 100644 python/2_CoreConcepts/jitLtoLinking/requirements.txt create mode 100644 python/2_CoreConcepts/launchConfigTuning/README.md create mode 100644 python/2_CoreConcepts/launchConfigTuning/launchConfigTuning.py create mode 100644 python/2_CoreConcepts/launchConfigTuning/requirements.txt create mode 100644 python/2_CoreConcepts/matrixMulSharedMem/README.md create mode 100644 python/2_CoreConcepts/matrixMulSharedMem/matrixMulSharedMem.py create mode 100644 python/2_CoreConcepts/matrixMulSharedMem/requirements.txt create mode 100644 python/2_CoreConcepts/memoryResources/README.md create mode 100644 python/2_CoreConcepts/memoryResources/memoryResources.py create mode 100644 python/2_CoreConcepts/memoryResources/requirements.txt create mode 100644 python/2_CoreConcepts/pageRank/README.md create mode 100644 python/2_CoreConcepts/pageRank/pageRank.py create mode 100644 python/2_CoreConcepts/pageRank/requirements.txt create mode 100644 python/2_CoreConcepts/parallelHistogram/README.md create mode 100644 python/2_CoreConcepts/parallelHistogram/parallelHistogram.py create mode 100644 python/2_CoreConcepts/parallelHistogram/requirements.txt create mode 100644 python/2_CoreConcepts/parallelReduction/README.md create mode 100644 python/2_CoreConcepts/parallelReduction/parallelReduction.py create mode 100644 python/2_CoreConcepts/parallelReduction/requirements.txt create mode 100644 python/2_CoreConcepts/prefixSum/README.md create mode 100644 python/2_CoreConcepts/prefixSum/prefixSum.py create mode 100644 python/2_CoreConcepts/prefixSum/requirements.txt create mode 100644 python/2_CoreConcepts/processCheckpoint/README.md create mode 100644 python/2_CoreConcepts/processCheckpoint/processCheckpoint.py create mode 100644 python/2_CoreConcepts/processCheckpoint/requirements.txt create mode 100644 python/2_CoreConcepts/reduction/README.md create mode 100644 python/2_CoreConcepts/reduction/reduction.py create mode 100644 python/2_CoreConcepts/reduction/requirements.txt create mode 100644 python/2_CoreConcepts/reductionMultiBlockCG/README.md create mode 100644 python/2_CoreConcepts/reductionMultiBlockCG/reductionMultiBlockCG.py create mode 100644 python/2_CoreConcepts/reductionMultiBlockCG/requirements.txt create mode 100644 python/2_CoreConcepts/simpleZeroCopy/README.md create mode 100644 python/2_CoreConcepts/simpleZeroCopy/requirements.txt create mode 100644 python/2_CoreConcepts/simpleZeroCopy/simpleZeroCopy.py create mode 100644 python/2_CoreConcepts/streamingCopyComputeOverlap/README.md create mode 100644 python/2_CoreConcepts/streamingCopyComputeOverlap/requirements.txt create mode 100644 python/2_CoreConcepts/streamingCopyComputeOverlap/streamingCopyComputeOverlap.py create mode 100644 python/2_CoreConcepts/tmaTensorMap/README.md create mode 100644 python/2_CoreConcepts/tmaTensorMap/requirements.txt create mode 100644 python/2_CoreConcepts/tmaTensorMap/tmaTensorMap.py create mode 100644 python/3_FrameworkInterop/customPyTorchKernel/README.md create mode 100644 python/3_FrameworkInterop/customPyTorchKernel/customPyTorchKernel.py create mode 100644 python/3_FrameworkInterop/customPyTorchKernel/requirements.txt create mode 100644 python/3_FrameworkInterop/customTensorFlowKernel/README.md create mode 100644 python/3_FrameworkInterop/customTensorFlowKernel/customTensorFlowKernel.py create mode 100644 python/3_FrameworkInterop/customTensorFlowKernel/requirements.txt create mode 100644 python/4_DistributedComputing/ipcMemoryPool/README.md create mode 100644 python/4_DistributedComputing/ipcMemoryPool/ipcMemoryPool.py create mode 100644 python/4_DistributedComputing/ipcMemoryPool/requirements.txt create mode 100644 python/4_DistributedComputing/multiGPUGradientAverage/README.md create mode 100644 python/4_DistributedComputing/multiGPUGradientAverage/multiGPUGradientAverage.py create mode 100644 python/4_DistributedComputing/multiGPUGradientAverage/requirements.txt create mode 100644 python/4_DistributedComputing/simpleP2P/README.md create mode 100644 python/4_DistributedComputing/simpleP2P/requirements.txt create mode 100644 python/4_DistributedComputing/simpleP2P/simpleP2P.py create mode 100644 python/Utilities/README.md create mode 100644 python/Utilities/__init__.py create mode 100644 python/Utilities/cuda_samples_utils.py create mode 100644 python/requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c83f2a7..ed8a9d62 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -104,3 +104,13 @@ repos: Common/.* ) args: ["-fallback-style=none", "-style=file", "-i"] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.4 + hooks: + - id: ruff + name: ruff lint + args: [--fix] + files: ^python/.*\.py$ + - id: ruff-format + name: ruff format + files: ^python/.*\.py$ diff --git a/CHANGELOG.md b/CHANGELOG.md index 784a15a1..5e604cef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Changelog +### CUDA 13.2 (update) +* Added **CUDA Python samples** under `python/`. These scripts use [CUDA Python](https://nvidia.github.io/cuda-python/) (including `cuda.core`) and are organized like the C++ tree: `1_GettingStarted`, `2_CoreConcepts`, `3_FrameworkInterop`, and `4_DistributedComputing`, plus shared helpers in `python/Utilities`. Each sample includes a `README.md` and `requirements.txt`. They are **not** built by the root CMake project; install dependencies with `pip install -r requirements.txt` in the sample directory, then run the corresponding `.py` file as documented in that sample’s README. +* Renamed top-level `Samples` directory to `cpp` to accommodate Python samples alongside existing C++ samples; updated path references in `CMakeLists.txt`, `README.md`, and `Common` headers accordingly. + ### CUDA 13.2 * Added the MSVC compile flag `-Xcompiler=/Zc:preprocessor` in CMakeLists.txt to comply with CUDA13.2 CCCL. Previously, using the traditional preprocessor triggered the warning “MSVC/cl.exe with traditional preprocessor is used…”, which now leads to a build error. diff --git a/CMakeLists.txt b/CMakeLists.txt index 49d69f9c..bf040754 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,4 +30,4 @@ endif() # Include installation configuration before processing samples include(cmake/InstallSamples.cmake) -add_subdirectory(Samples) +add_subdirectory(cpp) diff --git a/Common/helper_nvJPEG.hxx b/Common/helper_nvJPEG.hxx index 0293d8bc..9e25a6b4 100644 --- a/Common/helper_nvJPEG.hxx +++ b/Common/helper_nvJPEG.hxx @@ -400,9 +400,9 @@ int getInputDir(std::string &input_dir, const char *executable_path) { std::string pathname = ""; const char *searchPath[] = { "./images", - "../../../../Samples/4_CUDA_Libraries//images", - "../../../Samples/4_CUDA_Libraries//images", - "../../Samples/4_CUDA_Libraries//images"}; + "../../../../cpp/4_CUDA_Libraries//images", + "../../../cpp/4_CUDA_Libraries//images", + "../../cpp/4_CUDA_Libraries//images"}; for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) { std::string pathname(searchPath[i]); diff --git a/Common/helper_string.h b/Common/helper_string.h index 39a1b380..89a3fce0 100644 --- a/Common/helper_string.h +++ b/Common/helper_string.h @@ -288,69 +288,69 @@ inline char *sdkFindFilePath(const char *filename, "./", // same dir "./data/", // same dir - "../../../../Samples//", // up 4 in tree - "../../../Samples//", // up 3 in tree - "../../Samples//", // up 2 in tree + "../../../../cpp//", // up 4 in tree + "../../../cpp//", // up 3 in tree + "../../cpp//", // up 2 in tree - "../../../../Samples//data/", // up 4 in tree - "../../../Samples//data/", // up 3 in tree - "../../Samples//data/", // up 2 in tree + "../../../../cpp//data/", // up 4 in tree + "../../../cpp//data/", // up 3 in tree + "../../cpp//data/", // up 2 in tree - "../../../../Samples/0_Introduction//", // up 4 in tree - "../../../Samples/0_Introduction//", // up 3 in tree - "../../Samples/0_Introduction//", // up 2 in tree + "../../../../cpp/0_Introduction//", // up 4 in tree + "../../../cpp/0_Introduction//", // up 3 in tree + "../../cpp/0_Introduction//", // up 2 in tree - "../../../../Samples/1_Utilities//", // up 4 in tree - "../../../Samples/1_Utilities//", // up 3 in tree - "../../Samples/1_Utilities//", // up 2 in tree + "../../../../cpp/1_Utilities//", // up 4 in tree + "../../../cpp/1_Utilities//", // up 3 in tree + "../../cpp/1_Utilities//", // up 2 in tree - "../../../../Samples/2_Concepts_and_Techniques//", // up 4 in tree - "../../../Samples/2_Concepts_and_Techniques//", // up 3 in tree - "../../Samples/2_Concepts_and_Techniques//", // up 2 in tree + "../../../../cpp/2_Concepts_and_Techniques//", // up 4 in tree + "../../../cpp/2_Concepts_and_Techniques//", // up 3 in tree + "../../cpp/2_Concepts_and_Techniques//", // up 2 in tree - "../../../../Samples/3_CUDA_Features//", // up 4 in tree - "../../../Samples/3_CUDA_Features//", // up 3 in tree - "../../Samples/3_CUDA_Features//", // up 2 in tree + "../../../../cpp/3_CUDA_Features//", // up 4 in tree + "../../../cpp/3_CUDA_Features//", // up 3 in tree + "../../cpp/3_CUDA_Features//", // up 2 in tree - "../../../../Samples/4_CUDA_Libraries//", // up 4 in tree - "../../../Samples/4_CUDA_Libraries//", // up 3 in tree - "../../Samples/4_CUDA_Libraries//", // up 2 in tree + "../../../../cpp/4_CUDA_Libraries//", // up 4 in tree + "../../../cpp/4_CUDA_Libraries//", // up 3 in tree + "../../cpp/4_CUDA_Libraries//", // up 2 in tree - "../../../../Samples/5_Domain_Specific//", // up 4 in tree - "../../../Samples/5_Domain_Specific//", // up 3 in tree - "../../Samples/5_Domain_Specific//", // up 2 in tree + "../../../../cpp/5_Domain_Specific//", // up 4 in tree + "../../../cpp/5_Domain_Specific//", // up 3 in tree + "../../cpp/5_Domain_Specific//", // up 2 in tree - "../../../../Samples/6_Performance//", // up 4 in tree - "../../../Samples/6_Performance//", // up 3 in tree - "../../Samples/6_Performance//", // up 2 in tree + "../../../../cpp/6_Performance//", // up 4 in tree + "../../../cpp/6_Performance//", // up 3 in tree + "../../cpp/6_Performance//", // up 2 in tree - "../../../../Samples/0_Introduction//data/", // up 4 in tree - "../../../Samples/0_Introduction//data/", // up 3 in tree - "../../Samples/0_Introduction//data/", // up 2 in tree + "../../../../cpp/0_Introduction//data/", // up 4 in tree + "../../../cpp/0_Introduction//data/", // up 3 in tree + "../../cpp/0_Introduction//data/", // up 2 in tree - "../../../../Samples/1_Utilities//data/", // up 4 in tree - "../../../Samples/1_Utilities//data/", // up 3 in tree - "../../Samples/1_Utilities//data/", // up 2 in tree + "../../../../cpp/1_Utilities//data/", // up 4 in tree + "../../../cpp/1_Utilities//data/", // up 3 in tree + "../../cpp/1_Utilities//data/", // up 2 in tree - "../../../../Samples/2_Concepts_and_Techniques//data/", // up 4 in tree - "../../../Samples/2_Concepts_and_Techniques//data/", // up 3 in tree - "../../Samples/2_Concepts_and_Techniques//data/", // up 2 in tree + "../../../../cpp/2_Concepts_and_Techniques//data/", // up 4 in tree + "../../../cpp/2_Concepts_and_Techniques//data/", // up 3 in tree + "../../cpp/2_Concepts_and_Techniques//data/", // up 2 in tree - "../../../../Samples/3_CUDA_Features//data/", // up 4 in tree - "../../../Samples/3_CUDA_Features//data/", // up 3 in tree - "../../Samples/3_CUDA_Features//data/", // up 2 in tree + "../../../../cpp/3_CUDA_Features//data/", // up 4 in tree + "../../../cpp/3_CUDA_Features//data/", // up 3 in tree + "../../cpp/3_CUDA_Features//data/", // up 2 in tree - "../../../../Samples/4_CUDA_Libraries//data/", // up 4 in tree - "../../../Samples/4_CUDA_Libraries//data/", // up 3 in tree - "../../Samples/4_CUDA_Libraries//data/", // up 2 in tree + "../../../../cpp/4_CUDA_Libraries//data/", // up 4 in tree + "../../../cpp/4_CUDA_Libraries//data/", // up 3 in tree + "../../cpp/4_CUDA_Libraries//data/", // up 2 in tree - "../../../../Samples/5_Domain_Specific//data/", // up 4 in tree - "../../../Samples/5_Domain_Specific//data/", // up 3 in tree - "../../Samples/5_Domain_Specific//data/", // up 2 in tree + "../../../../cpp/5_Domain_Specific//data/", // up 4 in tree + "../../../cpp/5_Domain_Specific//data/", // up 3 in tree + "../../cpp/5_Domain_Specific//data/", // up 2 in tree - "../../../../Samples/6_Performance//data/", // up 4 in tree - "../../../Samples/6_Performance//data/", // up 3 in tree - "../../Samples/6_Performance//data/", // up 2 in tree + "../../../../cpp/6_Performance//data/", // up 4 in tree + "../../../cpp/6_Performance//data/", // up 3 in tree + "../../cpp/6_Performance//data/", // up 2 in tree "../../../../Common/data/", // up 4 in tree "../../../Common/data/", // up 3 in tree diff --git a/README.md b/README.md index 056e7d5c..a0785858 100644 --- a/README.md +++ b/README.md @@ -159,12 +159,12 @@ $ make -j$(nproc) --ignore-errors # or --keep-going ``` ``` -# In Samples/5_Domain_Specific/CMakeList.txt +# In cpp/5_Domain_Specific/CMakeList.txt # add_subdirectory(simpleGL) # add_subdirectory(simpleVulkan) # add_subdirectory(simpleVulkanMMAP) -# In Samples/8_Platform_Specific/Tegra/CMakeList.txt +# In cpp/8_Platform_Specific/Tegra/CMakeList.txt # add_subdirectory(simpleGLES_EGLOutput) ``` @@ -195,6 +195,30 @@ To build samples with new CUDA Toolkit(CUDA 13.0 or later) and UMD(Version 580 o cmake -DCMAKE_PREFIX_PATH=/usr/local/cuda/lib64/stubs/ .. ``` +## CUDA Python samples + +The repository includes **Python** examples under the [`python/`](./python) directory. **These samples are cuda.core–focused:** they use [CUDA Python](https://nvidia.github.io/cuda-python/), with [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) for devices, programs, launches, and memory, alongside NumPy, CuPy, or framework interop where each sample notes. + +**Layout (same themes as the C++ samples):** + +| Directory | Contents | +|-----------|----------| +| `python/1_GettingStarted/` | Introductory scripts (e.g. `vectorAdd`, `deviceQuery`, `systemInfo`, image blur with unified memory, NumPy vs CuPy). | +| `python/2_CoreConcepts/` | Algorithms and techniques (e.g. reductions, histograms, FFT, stream overlap, `memoryResources`, `cudaGraphs`, `jitLtoLinking`, `tmaTensorMap`). | +| `python/3_FrameworkInterop/` | Integration with PyTorch and TensorFlow. | +| `python/4_DistributedComputing/` | Multi-GPU, peer-to-peer, and IPC patterns (`ipcMemoryPool`). | +| `python/Utilities/` | Shared helpers imported by some samples. | + +**How to run:** The top-level CMake build does **not** compile these samples. For each sample, use a Python 3.10+ environment with a matching [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) (the samples target CUDA 13.x and document exact package pins in `requirements.txt`): + +```bash +cd python// +pip install -r requirements.txt +python .py +``` + +Use each sample’s `README.md` for prerequisites, CLI options, and expected output. + ## Install Samples ### Installation Path Structure @@ -272,7 +296,7 @@ the following command line arguments: | Switch | Purpose | Example | | ---------- | -------------------------------------------------------------------------------------------------------------- | ----------------------- | -| --dir | Specify the root directory to search for executables (recursively) | --dir ./build/Samples | +| --dir | Specify the root directory to search for executables (recursively) | --dir ./build/cpp | | --config | JSON configuration file for executable arguments | --config test_args.json | | --output | Output directory for test results (stdout saved to .txt files - directory will be created if it doesn't exist) | --output ./test | | --args | Global arguments to pass to all executables (not currently used) | --args arg_1 arg_2 ... | @@ -397,7 +421,7 @@ make -j$(nproc) Now, return to the samples root directory and run the test script: ```bash cd .. -python3 run_tests.py --output ./test --dir ./build/Samples --config test_args.json +python3 run_tests.py --output ./test --dir ./build/cpp --config test_args.json ``` If all applications run successfully, you will see something similar to this (the specific number of samples will depend on your build type @@ -425,31 +449,31 @@ incorrectly on your system. ## Samples list -### [0. Introduction](./Samples/0_Introduction/README.md) +### [0. Introduction](./cpp/0_Introduction/README.md) Basic CUDA samples for beginners that illustrate key concepts with using CUDA and CUDA runtime APIs. -### [1. Utilities](./Samples/1_Utilities/README.md) +### [1. Utilities](./cpp/1_Utilities/README.md) Utility samples that demonstrate how to query device capabilities and measure GPU/CPU bandwidth. -### [2. Concepts and Techniques](./Samples/2_Concepts_and_Techniques/README.md) +### [2. Concepts and Techniques](./cpp/2_Concepts_and_Techniques/README.md) Samples that demonstrate CUDA related concepts and common problem solving techniques. -### [3. CUDA Features](./Samples/3_CUDA_Features/README.md) +### [3. CUDA Features](./cpp/3_CUDA_Features/README.md) Samples that demonstrate CUDA Features (Cooperative Groups, CUDA Dynamic Parallelism, CUDA Graphs etc). -### [4. CUDA Libraries](./Samples/4_CUDA_Libraries/README.md) +### [4. CUDA Libraries](./cpp/4_CUDA_Libraries/README.md) Samples that demonstrate how to use CUDA platform libraries (NPP, NVJPEG, NVGRAPH cuBLAS, cuFFT, cuSPARSE, cuSOLVER and cuRAND). -### [5. Domain Specific](./Samples/5_Domain_Specific/README.md) +### [5. Domain Specific](./cpp/5_Domain_Specific/README.md) Samples that are specific to domain (Graphics, Finance, Image Processing). -### [6. Performance](./Samples/6_Performance/README.md) +### [6. Performance](./cpp/6_Performance/README.md) Samples that demonstrate performance optimization. -### [7. libNVVM](./Samples/7_libNVVM/README.md) +### [7. libNVVM](./cpp/7_libNVVM/README.md) Samples that demonstrate the use of libNVVVM and NVVM IR. -### [8. Platform Specific](./Samples/8_Platform_Specific/Tegra/README.md) +### [8. Platform Specific](./cpp/8_Platform_Specific/Tegra/README.md) Samples that are specific to certain platforms (Tegra, cuDLA, NvMedia, NvSci, OpenGL ES). ## Dependencies @@ -514,7 +538,7 @@ To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW w #### OpenMP -OpenMP is an API for multiprocessing programming. OpenMP can be installed using your Linux distribution's package manager system. It usually comes preinstalled with GCC. It can also be found at the [OpenMP website](http://openmp.org/). For compilers such as clang, `libomp.so` and other components for LLVM must be installed separated. You will also need to set additional flags in your CMake configuration files, such as: `-DOpenMP_CXX_FLAGS="-fopenmp=libomp" -DOpenMP_CXX_LIB_NAMES="omp" -DOpenMP_omp_LIBRARY="/path/to/libomp.so"`. +OpenMP is an API for multiprocessing programming. OpenMP can be installed using your Linux distribution's package manager system. It usually comes preinstalled with GCC. It can also be found at the [OpenMP website](http://openmp.org/). For compilers such as clang, make sure OpenMP is enabled when building LLVM by including `openmp` in `LLVM_ENABLE_PROJECTS`. If you use clang (from an installed prefix or directly from an LLVM build tree) with OpenMP enabled, set CMAKE_CXX_COMPILER and CMAKE_CUDA_HOST_COMPILER to that clang++ and let CMake detect OpenMP; extra OpenMP_* CMake variables are usually not needed. When using clang++ directly from an LLVM build tree, you may need to copy the generated `omp.h` into the `include/` directory under the path reported by `clang++ --print-resource-dir` before building the samples. When using an installed clang with OpenMP, if you see libomp.so: cannot open shared object file at runtime, add the directory that contains libomp.so to LD_LIBRARY_PATH (or configure it via ld.so.conf.d) so the dynamic linker can locate the OpenMP runtime. #### Screen diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/.vscode/launch.json b/Samples/0_Introduction/UnifiedMemoryStreams/.vscode/launch.json deleted file mode 100644 index 8506146f..00000000 --- a/Samples/0_Introduction/UnifiedMemoryStreams/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/UnifiedMemoryStreams" - } - ] -} diff --git a/Samples/0_Introduction/asyncAPI/.vscode/launch.json b/Samples/0_Introduction/asyncAPI/.vscode/launch.json deleted file mode 100644 index 6247c443..00000000 --- a/Samples/0_Introduction/asyncAPI/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/asyncAPI" - } - ] -} diff --git a/Samples/0_Introduction/clock/.vscode/launch.json b/Samples/0_Introduction/clock/.vscode/launch.json deleted file mode 100644 index 4c411a61..00000000 --- a/Samples/0_Introduction/clock/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/clock" - } - ] -} diff --git a/Samples/0_Introduction/clock_nvrtc/.vscode/launch.json b/Samples/0_Introduction/clock_nvrtc/.vscode/launch.json deleted file mode 100644 index 3238ade8..00000000 --- a/Samples/0_Introduction/clock_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/clock_nvrtc" - } - ] -} diff --git a/Samples/0_Introduction/cudaOpenMP/.vscode/launch.json b/Samples/0_Introduction/cudaOpenMP/.vscode/launch.json deleted file mode 100644 index a0c757bd..00000000 --- a/Samples/0_Introduction/cudaOpenMP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaOpenMP" - } - ] -} diff --git a/Samples/0_Introduction/fp16ScalarProduct/.vscode/launch.json b/Samples/0_Introduction/fp16ScalarProduct/.vscode/launch.json deleted file mode 100644 index 91fa4dfc..00000000 --- a/Samples/0_Introduction/fp16ScalarProduct/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/fp16ScalarProduct" - } - ] -} diff --git a/Samples/0_Introduction/matrixMul/.vscode/launch.json b/Samples/0_Introduction/matrixMul/.vscode/launch.json deleted file mode 100644 index 5e283ebd..00000000 --- a/Samples/0_Introduction/matrixMul/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/matrixMul" - } - ] -} diff --git a/Samples/0_Introduction/matrixMulDrv/.vscode/launch.json b/Samples/0_Introduction/matrixMulDrv/.vscode/launch.json deleted file mode 100644 index d984da1f..00000000 --- a/Samples/0_Introduction/matrixMulDrv/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/matrixMulDrv" - } - ] -} diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/launch.json b/Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/launch.json deleted file mode 100644 index da7c74d3..00000000 --- a/Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/matrixMulDynlinkJIT" - } - ] -} diff --git a/Samples/0_Introduction/matrixMul_nvrtc/.vscode/launch.json b/Samples/0_Introduction/matrixMul_nvrtc/.vscode/launch.json deleted file mode 100644 index 89475444..00000000 --- a/Samples/0_Introduction/matrixMul_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/matrixMul_nvrtc" - } - ] -} diff --git a/Samples/0_Introduction/mergeSort/.vscode/launch.json b/Samples/0_Introduction/mergeSort/.vscode/launch.json deleted file mode 100644 index 45eff2b1..00000000 --- a/Samples/0_Introduction/mergeSort/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/mergeSort" - } - ] -} diff --git a/Samples/0_Introduction/simpleAWBarrier/.vscode/launch.json b/Samples/0_Introduction/simpleAWBarrier/.vscode/launch.json deleted file mode 100644 index b0fff2d8..00000000 --- a/Samples/0_Introduction/simpleAWBarrier/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAWBarrier" - } - ] -} diff --git a/Samples/0_Introduction/simpleAssert/.vscode/launch.json b/Samples/0_Introduction/simpleAssert/.vscode/launch.json deleted file mode 100644 index 3da1ad8c..00000000 --- a/Samples/0_Introduction/simpleAssert/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAssert" - } - ] -} diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/.vscode/launch.json b/Samples/0_Introduction/simpleAssert_nvrtc/.vscode/launch.json deleted file mode 100644 index b23ab0ee..00000000 --- a/Samples/0_Introduction/simpleAssert_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAssert_nvrtc" - } - ] -} diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/launch.json b/Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/launch.json deleted file mode 100644 index fcf14128..00000000 --- a/Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAtomicIntrinsics" - } - ] -} diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/launch.json b/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/launch.json deleted file mode 100644 index f857c6fa..00000000 --- a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAtomicIntrinsics_nvrtc" - } - ] -} diff --git a/Samples/0_Introduction/simpleAttributes/.vscode/launch.json b/Samples/0_Introduction/simpleAttributes/.vscode/launch.json deleted file mode 100644 index 4fbad2e8..00000000 --- a/Samples/0_Introduction/simpleAttributes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleAttributes" - } - ] -} diff --git a/Samples/0_Introduction/simpleCUDA2GL/.vscode/launch.json b/Samples/0_Introduction/simpleCUDA2GL/.vscode/launch.json deleted file mode 100644 index 56ff7492..00000000 --- a/Samples/0_Introduction/simpleCUDA2GL/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUDA2GL" - } - ] -} diff --git a/Samples/0_Introduction/simpleCallback/.vscode/launch.json b/Samples/0_Introduction/simpleCallback/.vscode/launch.json deleted file mode 100644 index 37a31bdc..00000000 --- a/Samples/0_Introduction/simpleCallback/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCallback" - } - ] -} diff --git a/Samples/0_Introduction/simpleCooperativeGroups/.vscode/launch.json b/Samples/0_Introduction/simpleCooperativeGroups/.vscode/launch.json deleted file mode 100644 index 9bfab4c8..00000000 --- a/Samples/0_Introduction/simpleCooperativeGroups/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCooperativeGroups" - } - ] -} diff --git a/Samples/0_Introduction/simpleCubemapTexture/.vscode/launch.json b/Samples/0_Introduction/simpleCubemapTexture/.vscode/launch.json deleted file mode 100644 index 7ac2e611..00000000 --- a/Samples/0_Introduction/simpleCubemapTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCubemapTexture" - } - ] -} diff --git a/Samples/0_Introduction/simpleDrvRuntime/.vscode/launch.json b/Samples/0_Introduction/simpleDrvRuntime/.vscode/launch.json deleted file mode 100644 index 867f41a9..00000000 --- a/Samples/0_Introduction/simpleDrvRuntime/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleDrvRuntime" - } - ] -} diff --git a/Samples/0_Introduction/simpleHyperQ/.vscode/launch.json b/Samples/0_Introduction/simpleHyperQ/.vscode/launch.json deleted file mode 100644 index 9acb61f2..00000000 --- a/Samples/0_Introduction/simpleHyperQ/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleHyperQ" - } - ] -} diff --git a/Samples/0_Introduction/simpleIPC/.vscode/launch.json b/Samples/0_Introduction/simpleIPC/.vscode/launch.json deleted file mode 100644 index a4016a09..00000000 --- a/Samples/0_Introduction/simpleIPC/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleIPC" - } - ] -} diff --git a/Samples/0_Introduction/simpleLayeredTexture/.vscode/launch.json b/Samples/0_Introduction/simpleLayeredTexture/.vscode/launch.json deleted file mode 100644 index 79c23877..00000000 --- a/Samples/0_Introduction/simpleLayeredTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleLayeredTexture" - } - ] -} diff --git a/Samples/0_Introduction/simpleMPI/.vscode/launch.json b/Samples/0_Introduction/simpleMPI/.vscode/launch.json deleted file mode 100644 index 7c6bcb22..00000000 --- a/Samples/0_Introduction/simpleMPI/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleMPI" - } - ] -} diff --git a/Samples/0_Introduction/simpleMultiCopy/.vscode/launch.json b/Samples/0_Introduction/simpleMultiCopy/.vscode/launch.json deleted file mode 100644 index 378eeded..00000000 --- a/Samples/0_Introduction/simpleMultiCopy/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleMultiCopy" - } - ] -} diff --git a/Samples/0_Introduction/simpleMultiGPU/.vscode/launch.json b/Samples/0_Introduction/simpleMultiGPU/.vscode/launch.json deleted file mode 100644 index dc9ffcf9..00000000 --- a/Samples/0_Introduction/simpleMultiGPU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleMultiGPU" - } - ] -} diff --git a/Samples/0_Introduction/simpleOccupancy/.vscode/launch.json b/Samples/0_Introduction/simpleOccupancy/.vscode/launch.json deleted file mode 100644 index 5fd6d29c..00000000 --- a/Samples/0_Introduction/simpleOccupancy/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleOccupancy" - } - ] -} diff --git a/Samples/0_Introduction/simpleP2P/.vscode/launch.json b/Samples/0_Introduction/simpleP2P/.vscode/launch.json deleted file mode 100644 index a839e506..00000000 --- a/Samples/0_Introduction/simpleP2P/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleP2P" - } - ] -} diff --git a/Samples/0_Introduction/simplePitchLinearTexture/.vscode/launch.json b/Samples/0_Introduction/simplePitchLinearTexture/.vscode/launch.json deleted file mode 100644 index 7519a8f5..00000000 --- a/Samples/0_Introduction/simplePitchLinearTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simplePitchLinearTexture" - } - ] -} diff --git a/Samples/0_Introduction/simplePrintf/.vscode/launch.json b/Samples/0_Introduction/simplePrintf/.vscode/launch.json deleted file mode 100644 index 995d7b35..00000000 --- a/Samples/0_Introduction/simplePrintf/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simplePrintf" - } - ] -} diff --git a/Samples/0_Introduction/simpleStreams/.vscode/launch.json b/Samples/0_Introduction/simpleStreams/.vscode/launch.json deleted file mode 100644 index 602b02ca..00000000 --- a/Samples/0_Introduction/simpleStreams/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleStreams" - } - ] -} diff --git a/Samples/0_Introduction/simpleSurfaceWrite/.vscode/launch.json b/Samples/0_Introduction/simpleSurfaceWrite/.vscode/launch.json deleted file mode 100644 index 264df266..00000000 --- a/Samples/0_Introduction/simpleSurfaceWrite/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleSurfaceWrite" - } - ] -} diff --git a/Samples/0_Introduction/simpleTemplates/.vscode/launch.json b/Samples/0_Introduction/simpleTemplates/.vscode/launch.json deleted file mode 100644 index aa187401..00000000 --- a/Samples/0_Introduction/simpleTemplates/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleTemplates" - } - ] -} diff --git a/Samples/0_Introduction/simpleTexture/.vscode/launch.json b/Samples/0_Introduction/simpleTexture/.vscode/launch.json deleted file mode 100644 index 16332eca..00000000 --- a/Samples/0_Introduction/simpleTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleTexture" - } - ] -} diff --git a/Samples/0_Introduction/simpleTexture3D/.vscode/launch.json b/Samples/0_Introduction/simpleTexture3D/.vscode/launch.json deleted file mode 100644 index 9d93585e..00000000 --- a/Samples/0_Introduction/simpleTexture3D/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleTexture3D" - } - ] -} diff --git a/Samples/0_Introduction/simpleTextureDrv/.vscode/launch.json b/Samples/0_Introduction/simpleTextureDrv/.vscode/launch.json deleted file mode 100644 index 3ac21125..00000000 --- a/Samples/0_Introduction/simpleTextureDrv/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleTextureDrv" - } - ] -} diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/.vscode/launch.json b/Samples/0_Introduction/simpleVoteIntrinsics/.vscode/launch.json deleted file mode 100644 index fbc5a778..00000000 --- a/Samples/0_Introduction/simpleVoteIntrinsics/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleVoteIntrinsics" - } - ] -} diff --git a/Samples/0_Introduction/simpleZeroCopy/.vscode/launch.json b/Samples/0_Introduction/simpleZeroCopy/.vscode/launch.json deleted file mode 100644 index 03d5ce8c..00000000 --- a/Samples/0_Introduction/simpleZeroCopy/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleZeroCopy" - } - ] -} diff --git a/Samples/0_Introduction/systemWideAtomics/.vscode/launch.json b/Samples/0_Introduction/systemWideAtomics/.vscode/launch.json deleted file mode 100644 index 86856a92..00000000 --- a/Samples/0_Introduction/systemWideAtomics/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/systemWideAtomics" - } - ] -} diff --git a/Samples/0_Introduction/template/.vscode/launch.json b/Samples/0_Introduction/template/.vscode/launch.json deleted file mode 100644 index 72e9c3de..00000000 --- a/Samples/0_Introduction/template/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/template" - } - ] -} diff --git a/Samples/0_Introduction/vectorAdd/.vscode/launch.json b/Samples/0_Introduction/vectorAdd/.vscode/launch.json deleted file mode 100644 index ae4353a5..00000000 --- a/Samples/0_Introduction/vectorAdd/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/vectorAdd" - } - ] -} diff --git a/Samples/0_Introduction/vectorAddDrv/.vscode/launch.json b/Samples/0_Introduction/vectorAddDrv/.vscode/launch.json deleted file mode 100644 index b65cb510..00000000 --- a/Samples/0_Introduction/vectorAddDrv/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/vectorAddDrv" - } - ] -} diff --git a/Samples/0_Introduction/vectorAddMMAP/.vscode/launch.json b/Samples/0_Introduction/vectorAddMMAP/.vscode/launch.json deleted file mode 100644 index 4d82e199..00000000 --- a/Samples/0_Introduction/vectorAddMMAP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/vectorAddMMAP" - } - ] -} diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/.vscode/launch.json b/Samples/0_Introduction/vectorAdd_nvrtc/.vscode/launch.json deleted file mode 100644 index a329c097..00000000 --- a/Samples/0_Introduction/vectorAdd_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/vectorAdd_nvrtc" - } - ] -} diff --git a/Samples/1_Utilities/deviceQuery/.vscode/launch.json b/Samples/1_Utilities/deviceQuery/.vscode/launch.json deleted file mode 100644 index e8e28a04..00000000 --- a/Samples/1_Utilities/deviceQuery/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/deviceQuery" - } - ] -} diff --git a/Samples/1_Utilities/deviceQueryDrv/.vscode/launch.json b/Samples/1_Utilities/deviceQueryDrv/.vscode/launch.json deleted file mode 100644 index 55a2c4a0..00000000 --- a/Samples/1_Utilities/deviceQueryDrv/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/deviceQueryDrv" - } - ] -} diff --git a/Samples/1_Utilities/topologyQuery/.vscode/launch.json b/Samples/1_Utilities/topologyQuery/.vscode/launch.json deleted file mode 100644 index fc0f8859..00000000 --- a/Samples/1_Utilities/topologyQuery/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/topologyQuery" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/launch.json deleted file mode 100644 index 994fedea..00000000 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/EGLStream_CUDA_CrossGPU" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/launch.json deleted file mode 100644 index e6ea4a94..00000000 --- a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/EGLStream_CUDA_Interop" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/launch.json deleted file mode 100644 index 70f1eae7..00000000 --- a/Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/FunctionPointers" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/launch.json deleted file mode 100644 index b928343f..00000000 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MC_EstimatePiInlineP" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/launch.json deleted file mode 100644 index bbb33858..00000000 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MC_EstimatePiInlineQ" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/launch.json deleted file mode 100644 index 6173da38..00000000 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MC_EstimatePiP" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/launch.json deleted file mode 100644 index caa47e4e..00000000 --- a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MC_EstimatePiQ" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/launch.json deleted file mode 100644 index dc01990e..00000000 --- a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MC_SingleAsianOptionP" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/boxFilter/.vscode/launch.json deleted file mode 100644 index 10da1ef6..00000000 --- a/Samples/2_Concepts_and_Techniques/boxFilter/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/boxFilter" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/launch.json deleted file mode 100644 index c19ab1c9..00000000 --- a/Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/convolutionSeparable" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/launch.json deleted file mode 100644 index b3ddfe16..00000000 --- a/Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/convolutionTexture" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/dct8x8/.vscode/launch.json deleted file mode 100644 index 2938a13d..00000000 --- a/Samples/2_Concepts_and_Techniques/dct8x8/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/dct8x8" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/launch.json deleted file mode 100644 index aecf3fa9..00000000 --- a/Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/eigenvalues" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/histogram/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/histogram/.vscode/launch.json deleted file mode 100644 index 46f498d6..00000000 --- a/Samples/2_Concepts_and_Techniques/histogram/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/histogram" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/launch.json deleted file mode 100644 index e8c0cb8e..00000000 --- a/Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/imageDenoising" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/launch.json deleted file mode 100644 index 287be359..00000000 --- a/Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/inlinePTX" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/launch.json deleted file mode 100644 index 65bb00cd..00000000 --- a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/inlinePTX_nvrtc" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/interval/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/interval/.vscode/launch.json deleted file mode 100644 index 3db38a9a..00000000 --- a/Samples/2_Concepts_and_Techniques/interval/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/interval" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/particles/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/particles/.vscode/launch.json deleted file mode 100644 index 6fb36a44..00000000 --- a/Samples/2_Concepts_and_Techniques/particles/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/particles" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/launch.json deleted file mode 100644 index 8104c752..00000000 --- a/Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/radixSortThrust" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/reduction/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/reduction/.vscode/launch.json deleted file mode 100644 index 839aba3e..00000000 --- a/Samples/2_Concepts_and_Techniques/reduction/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/reduction" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/launch.json deleted file mode 100644 index 1c6fe4d8..00000000 --- a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/reductionMultiBlockCG" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/scalarProd/.vscode/launch.json deleted file mode 100644 index 0b384843..00000000 --- a/Samples/2_Concepts_and_Techniques/scalarProd/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/scalarProd" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/scan/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/scan/.vscode/launch.json deleted file mode 100644 index 7096b1d3..00000000 --- a/Samples/2_Concepts_and_Techniques/scan/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/scan" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/launch.json deleted file mode 100644 index c7f4974e..00000000 --- a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/segmentationTreeThrust" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/launch.json deleted file mode 100644 index 02498832..00000000 --- a/Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/shfl_scan" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/launch.json deleted file mode 100644 index 5fc52568..00000000 --- a/Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/sortingNetworks" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/launch.json deleted file mode 100644 index 430cded3..00000000 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/streamOrderedAllocation" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/launch.json deleted file mode 100644 index 7d62cb1f..00000000 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/streamOrderedAllocationIPC" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/launch.json deleted file mode 100644 index 11e7f592..00000000 --- a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/streamOrderedAllocationP2P" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/launch.json deleted file mode 100644 index 2215ba6c..00000000 --- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/threadFenceReduction" - } - ] -} diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/.vscode/launch.json b/Samples/2_Concepts_and_Techniques/threadMigration/.vscode/launch.json deleted file mode 100644 index ee4a29d1..00000000 --- a/Samples/2_Concepts_and_Techniques/threadMigration/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/threadMigration" - } - ] -} diff --git a/Samples/3_CUDA_Features/StreamPriorities/.vscode/launch.json b/Samples/3_CUDA_Features/StreamPriorities/.vscode/launch.json deleted file mode 100644 index 49dd476f..00000000 --- a/Samples/3_CUDA_Features/StreamPriorities/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/StreamPriorities" - } - ] -} diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/launch.json b/Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/launch.json deleted file mode 100644 index 8b826879..00000000 --- a/Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/bf16TensorCoreGemm" - } - ] -} diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/.vscode/launch.json b/Samples/3_CUDA_Features/binaryPartitionCG/.vscode/launch.json deleted file mode 100644 index 0997257c..00000000 --- a/Samples/3_CUDA_Features/binaryPartitionCG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/binaryPartitionCG" - } - ] -} diff --git a/Samples/3_CUDA_Features/bindlessTexture/.vscode/launch.json b/Samples/3_CUDA_Features/bindlessTexture/.vscode/launch.json deleted file mode 100644 index e3bd915a..00000000 --- a/Samples/3_CUDA_Features/bindlessTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/bindlessTexture" - } - ] -} diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/launch.json b/Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/launch.json deleted file mode 100644 index f3fadb13..00000000 --- a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cdpAdvancedQuicksort" - } - ] -} diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/launch.json b/Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/launch.json deleted file mode 100644 index 68bd046a..00000000 --- a/Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cdpBezierTessellation" - } - ] -} diff --git a/Samples/3_CUDA_Features/cdpQuadtree/.vscode/launch.json b/Samples/3_CUDA_Features/cdpQuadtree/.vscode/launch.json deleted file mode 100644 index 40de8deb..00000000 --- a/Samples/3_CUDA_Features/cdpQuadtree/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cdpQuadtree" - } - ] -} diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/.vscode/launch.json b/Samples/3_CUDA_Features/cdpSimplePrint/.vscode/launch.json deleted file mode 100644 index 43c1e3a3..00000000 --- a/Samples/3_CUDA_Features/cdpSimplePrint/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cdpSimplePrint" - } - ] -} diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/launch.json b/Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/launch.json deleted file mode 100644 index 121be5b8..00000000 --- a/Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cdpSimpleQuicksort" - } - ] -} diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/launch.json b/Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/launch.json deleted file mode 100644 index 0b5dac55..00000000 --- a/Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaCompressibleMemory" - } - ] -} diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/launch.json b/Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/launch.json deleted file mode 100644 index b47ed8bd..00000000 --- a/Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaTensorCoreGemm" - } - ] -} diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/launch.json b/Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/launch.json deleted file mode 100644 index 1848df00..00000000 --- a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/dmmaTensorCoreGemm" - } - ] -} diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/launch.json b/Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/launch.json deleted file mode 100644 index 1d224d71..00000000 --- a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/globalToShmemAsyncCopy" - } - ] -} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json deleted file mode 100644 index 0c68d127..00000000 --- a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/graphConditionalNodes" - } - ] -} diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/launch.json b/Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/launch.json deleted file mode 100644 index 63b2bb0d..00000000 --- a/Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/graphMemoryFootprint" - } - ] -} diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/.vscode/launch.json b/Samples/3_CUDA_Features/graphMemoryNodes/.vscode/launch.json deleted file mode 100644 index 7133b29e..00000000 --- a/Samples/3_CUDA_Features/graphMemoryNodes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/graphMemoryNodes" - } - ] -} diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/launch.json b/Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/launch.json deleted file mode 100644 index 0a76fdae..00000000 --- a/Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/immaTensorCoreGemm" - } - ] -} diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/launch.json b/Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/launch.json deleted file mode 100644 index 9341425d..00000000 --- a/Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/jacobiCudaGraphs" - } - ] -} diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/.vscode/launch.json b/Samples/3_CUDA_Features/memMapIPCDrv/.vscode/launch.json deleted file mode 100644 index 4a1053e0..00000000 --- a/Samples/3_CUDA_Features/memMapIPCDrv/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/memMapIPCDrv" - } - ] -} diff --git a/Samples/3_CUDA_Features/newdelete/.vscode/launch.json b/Samples/3_CUDA_Features/newdelete/.vscode/launch.json deleted file mode 100644 index bd5c69d9..00000000 --- a/Samples/3_CUDA_Features/newdelete/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/newdelete" - } - ] -} diff --git a/Samples/3_CUDA_Features/ptxjit/.vscode/launch.json b/Samples/3_CUDA_Features/ptxjit/.vscode/launch.json deleted file mode 100644 index a3e72006..00000000 --- a/Samples/3_CUDA_Features/ptxjit/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/ptxjit" - } - ] -} diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/launch.json b/Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/launch.json deleted file mode 100644 index 8c2d64d7..00000000 --- a/Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCudaGraphs" - } - ] -} diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/launch.json b/Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/launch.json deleted file mode 100644 index 7935e090..00000000 --- a/Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/tf32TensorCoreGemm" - } - ] -} diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/launch.json b/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/launch.json deleted file mode 100644 index 3518b99c..00000000 --- a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/warpAggregatedAtomicsCG" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/launch.json deleted file mode 100644 index de8ab657..00000000 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/FilterBorderControlNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/launch.json b/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/launch.json deleted file mode 100644 index 31f6bd3a..00000000 --- a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MersenneTwisterGP11213" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/launch.json b/Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/launch.json deleted file mode 100644 index bec36710..00000000 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/batchCUBLAS" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/launch.json deleted file mode 100644 index 43f70dcd..00000000 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/boxFilterNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/launch.json deleted file mode 100644 index 934330cc..00000000 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cannyEdgeDetectorNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradient/.vscode/launch.json deleted file mode 100644 index 34d7f66a..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradient/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradient" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/launch.json deleted file mode 100644 index 318a22f3..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradientCudaGraphs" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/launch.json deleted file mode 100644 index a040ddcc..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradientMultiBlockCG" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/launch.json deleted file mode 100644 index fbf0c3aa..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradientMultiDeviceCG" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/launch.json deleted file mode 100644 index ca7fb30f..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradientPrecond" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/launch.json b/Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/launch.json deleted file mode 100644 index 718489e7..00000000 --- a/Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/conjugateGradientUM" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/launch.json b/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/launch.json deleted file mode 100644 index 74eecd7f..00000000 --- a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuSolverDn_LinearSolver" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/.vscode/launch.json b/Samples/4_CUDA_Libraries/cuSolverRf/.vscode/launch.json deleted file mode 100644 index 6f265f26..00000000 --- a/Samples/4_CUDA_Libraries/cuSolverRf/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuSolverRf" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/launch.json b/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/launch.json deleted file mode 100644 index 07a7f23f..00000000 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuSolverSp_LinearSolver" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/launch.json b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/launch.json deleted file mode 100644 index c0f600c7..00000000 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuSolverSp_LowlevelCholesky" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/launch.json b/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/launch.json deleted file mode 100644 index 530166e0..00000000 --- a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuSolverSp_LowlevelQR" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/.vscode/launch.json b/Samples/4_CUDA_Libraries/cudaNvSci/.vscode/launch.json deleted file mode 100644 index 19773a1b..00000000 --- a/Samples/4_CUDA_Libraries/cudaNvSci/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaNvSci" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/launch.json deleted file mode 100644 index 34d1f97b..00000000 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/freeImageInteropNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/launch.json deleted file mode 100644 index e1f0ffc6..00000000 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/histEqualizationNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/jitLto/.vscode/launch.json b/Samples/4_CUDA_Libraries/jitLto/.vscode/launch.json deleted file mode 100644 index 4c106115..00000000 --- a/Samples/4_CUDA_Libraries/jitLto/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/jitLto" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/lineOfSight/.vscode/launch.json b/Samples/4_CUDA_Libraries/lineOfSight/.vscode/launch.json deleted file mode 100644 index e70888f4..00000000 --- a/Samples/4_CUDA_Libraries/lineOfSight/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/lineOfSight" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/launch.json b/Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/launch.json deleted file mode 100644 index 86c59d2b..00000000 --- a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/matrixMulCUBLAS" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/nvJPEG/.vscode/launch.json b/Samples/4_CUDA_Libraries/nvJPEG/.vscode/launch.json deleted file mode 100644 index 0970a982..00000000 --- a/Samples/4_CUDA_Libraries/nvJPEG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/nvJPEG" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/launch.json b/Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/launch.json deleted file mode 100644 index 35222222..00000000 --- a/Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/nvJPEG_encoder" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/oceanFFT/.vscode/launch.json b/Samples/4_CUDA_Libraries/oceanFFT/.vscode/launch.json deleted file mode 100644 index 1f950f7a..00000000 --- a/Samples/4_CUDA_Libraries/oceanFFT/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/oceanFFT" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/randomFog/.vscode/launch.json b/Samples/4_CUDA_Libraries/randomFog/.vscode/launch.json deleted file mode 100644 index 780f0d30..00000000 --- a/Samples/4_CUDA_Libraries/randomFog/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/randomFog" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/launch.json deleted file mode 100644 index b465962f..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUBLAS" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/launch.json deleted file mode 100644 index 5ae18260..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUBLASXT" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/launch.json deleted file mode 100644 index 34ba1759..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUBLAS_LU" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/launch.json deleted file mode 100644 index 13bd62c1..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUFFT" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/launch.json deleted file mode 100644 index dd843fd4..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUFFT_2d_MGPU" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/launch.json deleted file mode 100644 index 508a51e1..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUFFT_MGPU" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/launch.json b/Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/launch.json deleted file mode 100644 index 8ca8b6a0..00000000 --- a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleCUFFT_callback" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/launch.json deleted file mode 100644 index 3032fa0f..00000000 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/watershedSegmentationNPP" - } - ] -} diff --git a/Samples/5_Domain_Specific/BlackScholes/.vscode/launch.json b/Samples/5_Domain_Specific/BlackScholes/.vscode/launch.json deleted file mode 100644 index 668b27d1..00000000 --- a/Samples/5_Domain_Specific/BlackScholes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/BlackScholes" - } - ] -} diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/launch.json b/Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/launch.json deleted file mode 100644 index 2cec2b60..00000000 --- a/Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/BlackScholes_nvrtc" - } - ] -} diff --git a/Samples/5_Domain_Specific/FDTD3d/.vscode/launch.json b/Samples/5_Domain_Specific/FDTD3d/.vscode/launch.json deleted file mode 100644 index 7c90e9eb..00000000 --- a/Samples/5_Domain_Specific/FDTD3d/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/FDTD3d" - } - ] -} diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/.vscode/launch.json b/Samples/5_Domain_Specific/HSOpticalFlow/.vscode/launch.json deleted file mode 100644 index bd943ed6..00000000 --- a/Samples/5_Domain_Specific/HSOpticalFlow/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/HSOpticalFlow" - } - ] -} diff --git a/Samples/5_Domain_Specific/Mandelbrot/.vscode/launch.json b/Samples/5_Domain_Specific/Mandelbrot/.vscode/launch.json deleted file mode 100644 index 847274ce..00000000 --- a/Samples/5_Domain_Specific/Mandelbrot/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/Mandelbrot" - } - ] -} diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/launch.json b/Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/launch.json deleted file mode 100644 index f3bd0316..00000000 --- a/Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/MonteCarloMultiGPU" - } - ] -} diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/launch.json b/Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/launch.json deleted file mode 100644 index dca8c30b..00000000 --- a/Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/NV12toBGRandResize" - } - ] -} diff --git a/Samples/5_Domain_Specific/SobelFilter/.vscode/launch.json b/Samples/5_Domain_Specific/SobelFilter/.vscode/launch.json deleted file mode 100644 index e66123e1..00000000 --- a/Samples/5_Domain_Specific/SobelFilter/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/SobelFilter" - } - ] -} diff --git a/Samples/5_Domain_Specific/SobolQRNG/.vscode/launch.json b/Samples/5_Domain_Specific/SobolQRNG/.vscode/launch.json deleted file mode 100644 index 81474401..00000000 --- a/Samples/5_Domain_Specific/SobolQRNG/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/SobolQRNG" - } - ] -} diff --git a/Samples/5_Domain_Specific/bicubicTexture/.vscode/launch.json b/Samples/5_Domain_Specific/bicubicTexture/.vscode/launch.json deleted file mode 100644 index 9cd49ea3..00000000 --- a/Samples/5_Domain_Specific/bicubicTexture/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/bicubicTexture" - } - ] -} diff --git a/Samples/5_Domain_Specific/bilateralFilter/.vscode/launch.json b/Samples/5_Domain_Specific/bilateralFilter/.vscode/launch.json deleted file mode 100644 index 005565ce..00000000 --- a/Samples/5_Domain_Specific/bilateralFilter/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/bilateralFilter" - } - ] -} diff --git a/Samples/5_Domain_Specific/binomialOptions/.vscode/launch.json b/Samples/5_Domain_Specific/binomialOptions/.vscode/launch.json deleted file mode 100644 index b3f41c41..00000000 --- a/Samples/5_Domain_Specific/binomialOptions/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/binomialOptions" - } - ] -} diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/launch.json b/Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/launch.json deleted file mode 100644 index 9ff8920f..00000000 --- a/Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/binomialOptions_nvrtc" - } - ] -} diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/.vscode/launch.json b/Samples/5_Domain_Specific/convolutionFFT2D/.vscode/launch.json deleted file mode 100644 index 70b752b1..00000000 --- a/Samples/5_Domain_Specific/convolutionFFT2D/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/convolutionFFT2D" - } - ] -} diff --git a/Samples/5_Domain_Specific/dwtHaar1D/.vscode/launch.json b/Samples/5_Domain_Specific/dwtHaar1D/.vscode/launch.json deleted file mode 100644 index 85dadde0..00000000 --- a/Samples/5_Domain_Specific/dwtHaar1D/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/dwtHaar1D" - } - ] -} diff --git a/Samples/5_Domain_Specific/dxtc/.vscode/launch.json b/Samples/5_Domain_Specific/dxtc/.vscode/launch.json deleted file mode 100644 index 12854dd9..00000000 --- a/Samples/5_Domain_Specific/dxtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/dxtc" - } - ] -} diff --git a/Samples/5_Domain_Specific/fastWalshTransform/.vscode/launch.json b/Samples/5_Domain_Specific/fastWalshTransform/.vscode/launch.json deleted file mode 100644 index b1164606..00000000 --- a/Samples/5_Domain_Specific/fastWalshTransform/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/fastWalshTransform" - } - ] -} diff --git a/Samples/5_Domain_Specific/fluidsGL/.vscode/launch.json b/Samples/5_Domain_Specific/fluidsGL/.vscode/launch.json deleted file mode 100644 index 5b5929a1..00000000 --- a/Samples/5_Domain_Specific/fluidsGL/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/fluidsGL" - } - ] -} diff --git a/Samples/5_Domain_Specific/marchingCubes/.vscode/launch.json b/Samples/5_Domain_Specific/marchingCubes/.vscode/launch.json deleted file mode 100644 index a1dcc63c..00000000 --- a/Samples/5_Domain_Specific/marchingCubes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/marchingCubes" - } - ] -} diff --git a/Samples/5_Domain_Specific/nbody/.vscode/launch.json b/Samples/5_Domain_Specific/nbody/.vscode/launch.json deleted file mode 100644 index 34af8c09..00000000 --- a/Samples/5_Domain_Specific/nbody/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/nbody" - } - ] -} diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/launch.json b/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/launch.json deleted file mode 100644 index db93963a..00000000 --- a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/p2pBandwidthLatencyTest" - } - ] -} diff --git a/Samples/5_Domain_Specific/postProcessGL/.vscode/launch.json b/Samples/5_Domain_Specific/postProcessGL/.vscode/launch.json deleted file mode 100644 index b4fae8b2..00000000 --- a/Samples/5_Domain_Specific/postProcessGL/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/postProcessGL" - } - ] -} diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/.vscode/launch.json b/Samples/5_Domain_Specific/quasirandomGenerator/.vscode/launch.json deleted file mode 100644 index c28abb78..00000000 --- a/Samples/5_Domain_Specific/quasirandomGenerator/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/quasirandomGenerator" - } - ] -} diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/launch.json b/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/launch.json deleted file mode 100644 index e1b3c4b2..00000000 --- a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/quasirandomGenerator_nvrtc" - } - ] -} diff --git a/Samples/5_Domain_Specific/recursiveGaussian/.vscode/launch.json b/Samples/5_Domain_Specific/recursiveGaussian/.vscode/launch.json deleted file mode 100644 index 831df4dc..00000000 --- a/Samples/5_Domain_Specific/recursiveGaussian/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/recursiveGaussian" - } - ] -} diff --git a/Samples/5_Domain_Specific/simpleGL/.vscode/launch.json b/Samples/5_Domain_Specific/simpleGL/.vscode/launch.json deleted file mode 100644 index ab9f31f3..00000000 --- a/Samples/5_Domain_Specific/simpleGL/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleGL" - } - ] -} diff --git a/Samples/5_Domain_Specific/simpleVulkan/.vscode/launch.json b/Samples/5_Domain_Specific/simpleVulkan/.vscode/launch.json deleted file mode 100644 index c04b5a40..00000000 --- a/Samples/5_Domain_Specific/simpleVulkan/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleVulkan" - } - ] -} diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/launch.json b/Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/launch.json deleted file mode 100644 index 65cd3004..00000000 --- a/Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleVulkanMMAP" - } - ] -} diff --git a/Samples/5_Domain_Specific/smokeParticles/.vscode/launch.json b/Samples/5_Domain_Specific/smokeParticles/.vscode/launch.json deleted file mode 100644 index 2c9df4d9..00000000 --- a/Samples/5_Domain_Specific/smokeParticles/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/smokeParticles" - } - ] -} diff --git a/Samples/5_Domain_Specific/stereoDisparity/.vscode/launch.json b/Samples/5_Domain_Specific/stereoDisparity/.vscode/launch.json deleted file mode 100644 index f233a4da..00000000 --- a/Samples/5_Domain_Specific/stereoDisparity/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/stereoDisparity" - } - ] -} diff --git a/Samples/5_Domain_Specific/volumeFiltering/.vscode/launch.json b/Samples/5_Domain_Specific/volumeFiltering/.vscode/launch.json deleted file mode 100644 index f330529d..00000000 --- a/Samples/5_Domain_Specific/volumeFiltering/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/volumeFiltering" - } - ] -} diff --git a/Samples/5_Domain_Specific/volumeRender/.vscode/launch.json b/Samples/5_Domain_Specific/volumeRender/.vscode/launch.json deleted file mode 100644 index 021c985d..00000000 --- a/Samples/5_Domain_Specific/volumeRender/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/volumeRender" - } - ] -} diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/launch.json b/Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/launch.json deleted file mode 100644 index 33d6476b..00000000 --- a/Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/vulkanImageCUDA" - } - ] -} diff --git a/Samples/6_Performance/LargeKernelParameter/.vscode/launch.json b/Samples/6_Performance/LargeKernelParameter/.vscode/launch.json deleted file mode 100644 index aa5fdb50..00000000 --- a/Samples/6_Performance/LargeKernelParameter/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/LargeKernelParameter" - } - ] -} diff --git a/Samples/6_Performance/UnifiedMemoryPerf/.vscode/launch.json b/Samples/6_Performance/UnifiedMemoryPerf/.vscode/launch.json deleted file mode 100644 index d4c47b27..00000000 --- a/Samples/6_Performance/UnifiedMemoryPerf/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/UnifiedMemoryPerf" - } - ] -} diff --git a/Samples/6_Performance/alignedTypes/.vscode/launch.json b/Samples/6_Performance/alignedTypes/.vscode/launch.json deleted file mode 100644 index aaace890..00000000 --- a/Samples/6_Performance/alignedTypes/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/alignedTypes" - } - ] -} diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/.vscode/launch.json b/Samples/6_Performance/cudaGraphsPerfScaling/.vscode/launch.json deleted file mode 100644 index c7dfe643..00000000 --- a/Samples/6_Performance/cudaGraphsPerfScaling/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaGraphsPerfScaling" - } - ] -} diff --git a/Samples/6_Performance/transpose/.vscode/launch.json b/Samples/6_Performance/transpose/.vscode/launch.json deleted file mode 100644 index afa465d6..00000000 --- a/Samples/6_Performance/transpose/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/transpose" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json deleted file mode 100644 index a451a843..00000000 --- a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/EGLSync_CUDAEvent_Interop" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/launch.json deleted file mode 100644 index 9b034145..00000000 --- a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuDLAErrorReporting" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/launch.json deleted file mode 100644 index 49e50944..00000000 --- a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuDLAHybridMode" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/launch.json deleted file mode 100644 index 65a931b5..00000000 --- a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuDLALayerwiseStatsHybrid" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/launch.json deleted file mode 100644 index a0aa79e8..00000000 --- a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuDLALayerwiseStatsStandalone" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/launch.json deleted file mode 100644 index 227c3d54..00000000 --- a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cuDLAStandaloneMode" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json deleted file mode 100644 index df6af9c7..00000000 --- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaNvSciBufMultiplanar" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/launch.json deleted file mode 100644 index 6c84ba13..00000000 --- a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/cudaNvSciNvMedia" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/launch.json deleted file mode 100644 index 1a29c27d..00000000 --- a/Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/fluidsGLES" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/launch.json deleted file mode 100644 index db2777da..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/nbody_opengles" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/launch.json deleted file mode 100644 index 7952a060..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleGLES" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/launch.json deleted file mode 100644 index f0a1bbe3..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleGLES_EGLOutput" - } - ] -} diff --git a/cmake/toolchains/toolchain-aarch64-linux.cmake b/cmake/toolchains/toolchain-aarch64-linux.cmake index ffe08512..35602c17 100644 --- a/cmake/toolchains/toolchain-aarch64-linux.cmake +++ b/cmake/toolchains/toolchain-aarch64-linux.cmake @@ -50,8 +50,11 @@ if(DEFINED TARGET_FS) NO_CMAKE_PATH ) if(TARGET_CUDA_NVCC_PATH) - # Get the real path of CUDA installation on TARGET_FS + # Get the real path of CUDA installation on TARGET_FS. + # Relative symlinks resolve with the host mount prefix (TARGET_FS) included; + # strip it so the resulting path is valid on the target device at runtime. get_filename_component(TARGET_CUDA_PATH "${TARGET_CUDA_NVCC_PATH}" REALPATH) + string(REPLACE "${TARGET_FS}" "" TARGET_CUDA_PATH "${TARGET_CUDA_PATH}") get_filename_component(TARGET_CUDA_ROOT "${TARGET_CUDA_PATH}" DIRECTORY) get_filename_component(TARGET_CUDA_ROOT "${TARGET_CUDA_ROOT}" DIRECTORY) endif() diff --git a/Samples/0_Introduction/CMakeLists.txt b/cpp/0_Introduction/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/CMakeLists.txt rename to cpp/0_Introduction/CMakeLists.txt diff --git a/Samples/0_Introduction/README.md b/cpp/0_Introduction/README.md similarity index 100% rename from Samples/0_Introduction/README.md rename to cpp/0_Introduction/README.md diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/.vscode/c_cpp_properties.json b/cpp/0_Introduction/UnifiedMemoryStreams/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/UnifiedMemoryStreams/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/UnifiedMemoryStreams/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/.vscode/extensions.json b/cpp/0_Introduction/UnifiedMemoryStreams/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/UnifiedMemoryStreams/.vscode/extensions.json rename to cpp/0_Introduction/UnifiedMemoryStreams/.vscode/extensions.json diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt b/cpp/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt similarity index 82% rename from Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt rename to cpp/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt index b86e9cb1..2fd880d0 100644 --- a/Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt +++ b/cpp/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt @@ -25,14 +25,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "QNX") return() endif() -# Source file -if(CMAKE_GENERATOR MATCHES "Visual Studio") - find_package(OpenMP REQUIRED C CXX) -else() - find_package(OpenMP REQUIRED) -endif() +# FindOpenMP: request COMPONENTS CXX only. From CMake 3.31 on, a bare find_package(OpenMP) also +# probes OpenMP for CUDA when the project enables CUDA; that check can fail even when C++ OpenMP +# works, and this sample only needs the C++ OpenMP package. Link OpenMP::OpenMP_CXX for libs/headers. +find_package(OpenMP COMPONENTS CXX) -if(${OpenMP_FOUND}) +if(OpenMP_CXX_FOUND) # Add target for UnifiedMemoryStreams add_executable(UnifiedMemoryStreams UnifiedMemoryStreams.cu) diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/README.md b/cpp/0_Introduction/UnifiedMemoryStreams/README.md similarity index 100% rename from Samples/0_Introduction/UnifiedMemoryStreams/README.md rename to cpp/0_Introduction/UnifiedMemoryStreams/README.md diff --git a/Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu b/cpp/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu similarity index 100% rename from Samples/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu rename to cpp/0_Introduction/UnifiedMemoryStreams/UnifiedMemoryStreams.cu diff --git a/Samples/0_Introduction/asyncAPI/.vscode/c_cpp_properties.json b/cpp/0_Introduction/asyncAPI/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/asyncAPI/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/asyncAPI/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/asyncAPI/.vscode/extensions.json b/cpp/0_Introduction/asyncAPI/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/asyncAPI/.vscode/extensions.json rename to cpp/0_Introduction/asyncAPI/.vscode/extensions.json diff --git a/Samples/0_Introduction/asyncAPI/CMakeLists.txt b/cpp/0_Introduction/asyncAPI/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/asyncAPI/CMakeLists.txt rename to cpp/0_Introduction/asyncAPI/CMakeLists.txt diff --git a/Samples/0_Introduction/asyncAPI/README.md b/cpp/0_Introduction/asyncAPI/README.md similarity index 100% rename from Samples/0_Introduction/asyncAPI/README.md rename to cpp/0_Introduction/asyncAPI/README.md diff --git a/Samples/0_Introduction/asyncAPI/asyncAPI.cu b/cpp/0_Introduction/asyncAPI/asyncAPI.cu similarity index 100% rename from Samples/0_Introduction/asyncAPI/asyncAPI.cu rename to cpp/0_Introduction/asyncAPI/asyncAPI.cu diff --git a/Samples/0_Introduction/clock/.vscode/c_cpp_properties.json b/cpp/0_Introduction/clock/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/clock/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/clock/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/clock/.vscode/extensions.json b/cpp/0_Introduction/clock/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/clock/.vscode/extensions.json rename to cpp/0_Introduction/clock/.vscode/extensions.json diff --git a/Samples/0_Introduction/clock/CMakeLists.txt b/cpp/0_Introduction/clock/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/clock/CMakeLists.txt rename to cpp/0_Introduction/clock/CMakeLists.txt diff --git a/Samples/0_Introduction/clock/README.md b/cpp/0_Introduction/clock/README.md similarity index 100% rename from Samples/0_Introduction/clock/README.md rename to cpp/0_Introduction/clock/README.md diff --git a/Samples/0_Introduction/clock/clock.cu b/cpp/0_Introduction/clock/clock.cu similarity index 100% rename from Samples/0_Introduction/clock/clock.cu rename to cpp/0_Introduction/clock/clock.cu diff --git a/Samples/0_Introduction/clock_nvrtc/.vscode/c_cpp_properties.json b/cpp/0_Introduction/clock_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/clock_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/clock_nvrtc/.vscode/extensions.json b/cpp/0_Introduction/clock_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/.vscode/extensions.json rename to cpp/0_Introduction/clock_nvrtc/.vscode/extensions.json diff --git a/Samples/0_Introduction/clock_nvrtc/CMakeLists.txt b/cpp/0_Introduction/clock_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/CMakeLists.txt rename to cpp/0_Introduction/clock_nvrtc/CMakeLists.txt diff --git a/Samples/0_Introduction/clock_nvrtc/README.md b/cpp/0_Introduction/clock_nvrtc/README.md similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/README.md rename to cpp/0_Introduction/clock_nvrtc/README.md diff --git a/Samples/0_Introduction/clock_nvrtc/clock.cpp b/cpp/0_Introduction/clock_nvrtc/clock.cpp similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/clock.cpp rename to cpp/0_Introduction/clock_nvrtc/clock.cpp diff --git a/Samples/0_Introduction/clock_nvrtc/clock_kernel.cu b/cpp/0_Introduction/clock_nvrtc/clock_kernel.cu similarity index 100% rename from Samples/0_Introduction/clock_nvrtc/clock_kernel.cu rename to cpp/0_Introduction/clock_nvrtc/clock_kernel.cu diff --git a/Samples/0_Introduction/cudaOpenMP/.vscode/c_cpp_properties.json b/cpp/0_Introduction/cudaOpenMP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/cudaOpenMP/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/cudaOpenMP/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/cudaOpenMP/.vscode/extensions.json b/cpp/0_Introduction/cudaOpenMP/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/cudaOpenMP/.vscode/extensions.json rename to cpp/0_Introduction/cudaOpenMP/.vscode/extensions.json diff --git a/Samples/0_Introduction/cudaOpenMP/CMakeLists.txt b/cpp/0_Introduction/cudaOpenMP/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/cudaOpenMP/CMakeLists.txt rename to cpp/0_Introduction/cudaOpenMP/CMakeLists.txt diff --git a/Samples/0_Introduction/cudaOpenMP/README.md b/cpp/0_Introduction/cudaOpenMP/README.md similarity index 100% rename from Samples/0_Introduction/cudaOpenMP/README.md rename to cpp/0_Introduction/cudaOpenMP/README.md diff --git a/Samples/0_Introduction/cudaOpenMP/cudaOpenMP.cu b/cpp/0_Introduction/cudaOpenMP/cudaOpenMP.cu similarity index 100% rename from Samples/0_Introduction/cudaOpenMP/cudaOpenMP.cu rename to cpp/0_Introduction/cudaOpenMP/cudaOpenMP.cu diff --git a/Samples/0_Introduction/fp16ScalarProduct/.vscode/c_cpp_properties.json b/cpp/0_Introduction/fp16ScalarProduct/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/fp16ScalarProduct/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/fp16ScalarProduct/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/fp16ScalarProduct/.vscode/extensions.json b/cpp/0_Introduction/fp16ScalarProduct/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/fp16ScalarProduct/.vscode/extensions.json rename to cpp/0_Introduction/fp16ScalarProduct/.vscode/extensions.json diff --git a/Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt b/cpp/0_Introduction/fp16ScalarProduct/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/fp16ScalarProduct/CMakeLists.txt rename to cpp/0_Introduction/fp16ScalarProduct/CMakeLists.txt diff --git a/Samples/0_Introduction/fp16ScalarProduct/README.md b/cpp/0_Introduction/fp16ScalarProduct/README.md similarity index 100% rename from Samples/0_Introduction/fp16ScalarProduct/README.md rename to cpp/0_Introduction/fp16ScalarProduct/README.md diff --git a/Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu b/cpp/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu similarity index 100% rename from Samples/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu rename to cpp/0_Introduction/fp16ScalarProduct/fp16ScalarProduct.cu diff --git a/Samples/0_Introduction/matrixMul/.vscode/c_cpp_properties.json b/cpp/0_Introduction/matrixMul/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/matrixMul/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/matrixMul/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/matrixMul/.vscode/extensions.json b/cpp/0_Introduction/matrixMul/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/matrixMul/.vscode/extensions.json rename to cpp/0_Introduction/matrixMul/.vscode/extensions.json diff --git a/Samples/0_Introduction/matrixMul/CMakeLists.txt b/cpp/0_Introduction/matrixMul/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/matrixMul/CMakeLists.txt rename to cpp/0_Introduction/matrixMul/CMakeLists.txt diff --git a/Samples/0_Introduction/matrixMul/README.md b/cpp/0_Introduction/matrixMul/README.md similarity index 100% rename from Samples/0_Introduction/matrixMul/README.md rename to cpp/0_Introduction/matrixMul/README.md diff --git a/Samples/0_Introduction/matrixMul/matrixMul.cu b/cpp/0_Introduction/matrixMul/matrixMul.cu similarity index 100% rename from Samples/0_Introduction/matrixMul/matrixMul.cu rename to cpp/0_Introduction/matrixMul/matrixMul.cu diff --git a/Samples/0_Introduction/matrixMulDrv/.vscode/c_cpp_properties.json b/cpp/0_Introduction/matrixMulDrv/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/matrixMulDrv/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/matrixMulDrv/.vscode/extensions.json b/cpp/0_Introduction/matrixMulDrv/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/.vscode/extensions.json rename to cpp/0_Introduction/matrixMulDrv/.vscode/extensions.json diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/cpp/0_Introduction/matrixMulDrv/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/CMakeLists.txt rename to cpp/0_Introduction/matrixMulDrv/CMakeLists.txt diff --git a/Samples/0_Introduction/matrixMulDrv/README.md b/cpp/0_Introduction/matrixMulDrv/README.md similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/README.md rename to cpp/0_Introduction/matrixMulDrv/README.md diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMul.h b/cpp/0_Introduction/matrixMulDrv/matrixMul.h similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/matrixMul.h rename to cpp/0_Introduction/matrixMulDrv/matrixMul.h diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp b/cpp/0_Introduction/matrixMulDrv/matrixMulDrv.cpp similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/matrixMulDrv.cpp rename to cpp/0_Introduction/matrixMulDrv/matrixMulDrv.cpp diff --git a/Samples/0_Introduction/matrixMulDrv/matrixMul_kernel.cu b/cpp/0_Introduction/matrixMulDrv/matrixMul_kernel.cu similarity index 100% rename from Samples/0_Introduction/matrixMulDrv/matrixMul_kernel.cu rename to cpp/0_Introduction/matrixMulDrv/matrixMul_kernel.cu diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/c_cpp_properties.json b/cpp/0_Introduction/matrixMulDynlinkJIT/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/matrixMulDynlinkJIT/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/extensions.json b/cpp/0_Introduction/matrixMulDynlinkJIT/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/.vscode/extensions.json rename to cpp/0_Introduction/matrixMulDynlinkJIT/.vscode/extensions.json diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt b/cpp/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt rename to cpp/0_Introduction/matrixMulDynlinkJIT/CMakeLists.txt diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/README.md b/cpp/0_Introduction/matrixMulDynlinkJIT/README.md similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/README.md rename to cpp/0_Introduction/matrixMulDynlinkJIT/README.md diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c b/cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c rename to cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.c diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.h b/cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink.h diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink_cuda.h b/cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink_cuda.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink_cuda.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/cuda_drvapi_dynlink_cuda.h diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/extras/README.TXT b/cpp/0_Introduction/matrixMulDynlinkJIT/extras/README.TXT similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/extras/README.TXT rename to cpp/0_Introduction/matrixMulDynlinkJIT/extras/README.TXT diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_32.ptx b/cpp/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_32.ptx similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_32.ptx rename to cpp/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_32.ptx diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_64.ptx b/cpp/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_64.ptx similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_64.ptx rename to cpp/0_Introduction/matrixMulDynlinkJIT/extras/matrixMul_kernel_64.ptx diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/extras/ptx2c.py b/cpp/0_Introduction/matrixMulDynlinkJIT/extras/ptx2c.py similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/extras/ptx2c.py rename to cpp/0_Introduction/matrixMulDynlinkJIT/extras/ptx2c.py diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h b/cpp/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/helper_cuda_drvapi.h diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul.h b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul.h diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT.cpp b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT.cpp similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT.cpp rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMulDynlinkJIT.cpp diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_gold.cpp b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_gold.cpp similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_gold.cpp rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_gold.cpp diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.c b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.c similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.c rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.c diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.h b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_32_ptxdump.h diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.c b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.c similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.c rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.c diff --git a/Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.h b/cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.h similarity index 100% rename from Samples/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.h rename to cpp/0_Introduction/matrixMulDynlinkJIT/matrixMul_kernel_64_ptxdump.h diff --git a/Samples/0_Introduction/matrixMul_nvrtc/.vscode/c_cpp_properties.json b/cpp/0_Introduction/matrixMul_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/matrixMul_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/matrixMul_nvrtc/.vscode/extensions.json b/cpp/0_Introduction/matrixMul_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/.vscode/extensions.json rename to cpp/0_Introduction/matrixMul_nvrtc/.vscode/extensions.json diff --git a/Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt b/cpp/0_Introduction/matrixMul_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/CMakeLists.txt rename to cpp/0_Introduction/matrixMul_nvrtc/CMakeLists.txt diff --git a/Samples/0_Introduction/matrixMul_nvrtc/README.md b/cpp/0_Introduction/matrixMul_nvrtc/README.md similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/README.md rename to cpp/0_Introduction/matrixMul_nvrtc/README.md diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul.cpp b/cpp/0_Introduction/matrixMul_nvrtc/matrixMul.cpp similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/matrixMul.cpp rename to cpp/0_Introduction/matrixMul_nvrtc/matrixMul.cpp diff --git a/Samples/0_Introduction/matrixMul_nvrtc/matrixMul_kernel.cu b/cpp/0_Introduction/matrixMul_nvrtc/matrixMul_kernel.cu similarity index 100% rename from Samples/0_Introduction/matrixMul_nvrtc/matrixMul_kernel.cu rename to cpp/0_Introduction/matrixMul_nvrtc/matrixMul_kernel.cu diff --git a/Samples/0_Introduction/mergeSort/.vscode/c_cpp_properties.json b/cpp/0_Introduction/mergeSort/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/mergeSort/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/mergeSort/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/mergeSort/.vscode/extensions.json b/cpp/0_Introduction/mergeSort/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/mergeSort/.vscode/extensions.json rename to cpp/0_Introduction/mergeSort/.vscode/extensions.json diff --git a/Samples/0_Introduction/mergeSort/CMakeLists.txt b/cpp/0_Introduction/mergeSort/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/mergeSort/CMakeLists.txt rename to cpp/0_Introduction/mergeSort/CMakeLists.txt diff --git a/Samples/0_Introduction/mergeSort/README.md b/cpp/0_Introduction/mergeSort/README.md similarity index 100% rename from Samples/0_Introduction/mergeSort/README.md rename to cpp/0_Introduction/mergeSort/README.md diff --git a/Samples/0_Introduction/mergeSort/bitonic.cu b/cpp/0_Introduction/mergeSort/bitonic.cu similarity index 100% rename from Samples/0_Introduction/mergeSort/bitonic.cu rename to cpp/0_Introduction/mergeSort/bitonic.cu diff --git a/Samples/0_Introduction/mergeSort/main.cpp b/cpp/0_Introduction/mergeSort/main.cpp similarity index 100% rename from Samples/0_Introduction/mergeSort/main.cpp rename to cpp/0_Introduction/mergeSort/main.cpp diff --git a/Samples/0_Introduction/mergeSort/mergeSort.cu b/cpp/0_Introduction/mergeSort/mergeSort.cu similarity index 100% rename from Samples/0_Introduction/mergeSort/mergeSort.cu rename to cpp/0_Introduction/mergeSort/mergeSort.cu diff --git a/Samples/0_Introduction/mergeSort/mergeSort_common.h b/cpp/0_Introduction/mergeSort/mergeSort_common.h similarity index 100% rename from Samples/0_Introduction/mergeSort/mergeSort_common.h rename to cpp/0_Introduction/mergeSort/mergeSort_common.h diff --git a/Samples/0_Introduction/mergeSort/mergeSort_host.cpp b/cpp/0_Introduction/mergeSort/mergeSort_host.cpp similarity index 100% rename from Samples/0_Introduction/mergeSort/mergeSort_host.cpp rename to cpp/0_Introduction/mergeSort/mergeSort_host.cpp diff --git a/Samples/0_Introduction/mergeSort/mergeSort_validate.cpp b/cpp/0_Introduction/mergeSort/mergeSort_validate.cpp similarity index 100% rename from Samples/0_Introduction/mergeSort/mergeSort_validate.cpp rename to cpp/0_Introduction/mergeSort/mergeSort_validate.cpp diff --git a/Samples/0_Introduction/simpleAWBarrier/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAWBarrier/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAWBarrier/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAWBarrier/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAWBarrier/.vscode/extensions.json b/cpp/0_Introduction/simpleAWBarrier/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAWBarrier/.vscode/extensions.json rename to cpp/0_Introduction/simpleAWBarrier/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt b/cpp/0_Introduction/simpleAWBarrier/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAWBarrier/CMakeLists.txt rename to cpp/0_Introduction/simpleAWBarrier/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAWBarrier/README.md b/cpp/0_Introduction/simpleAWBarrier/README.md similarity index 100% rename from Samples/0_Introduction/simpleAWBarrier/README.md rename to cpp/0_Introduction/simpleAWBarrier/README.md diff --git a/Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu b/cpp/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu similarity index 100% rename from Samples/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu rename to cpp/0_Introduction/simpleAWBarrier/simpleAWBarrier.cu diff --git a/Samples/0_Introduction/simpleAssert/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAssert/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAssert/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAssert/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAssert/.vscode/extensions.json b/cpp/0_Introduction/simpleAssert/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAssert/.vscode/extensions.json rename to cpp/0_Introduction/simpleAssert/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAssert/CMakeLists.txt b/cpp/0_Introduction/simpleAssert/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAssert/CMakeLists.txt rename to cpp/0_Introduction/simpleAssert/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAssert/README.md b/cpp/0_Introduction/simpleAssert/README.md similarity index 100% rename from Samples/0_Introduction/simpleAssert/README.md rename to cpp/0_Introduction/simpleAssert/README.md diff --git a/Samples/0_Introduction/simpleAssert/simpleAssert.cu b/cpp/0_Introduction/simpleAssert/simpleAssert.cu similarity index 100% rename from Samples/0_Introduction/simpleAssert/simpleAssert.cu rename to cpp/0_Introduction/simpleAssert/simpleAssert.cu diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAssert_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAssert_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/.vscode/extensions.json b/cpp/0_Introduction/simpleAssert_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/.vscode/extensions.json rename to cpp/0_Introduction/simpleAssert_nvrtc/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt b/cpp/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt rename to cpp/0_Introduction/simpleAssert_nvrtc/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/README.md b/cpp/0_Introduction/simpleAssert_nvrtc/README.md similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/README.md rename to cpp/0_Introduction/simpleAssert_nvrtc/README.md diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert.cpp b/cpp/0_Introduction/simpleAssert_nvrtc/simpleAssert.cpp similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert.cpp rename to cpp/0_Introduction/simpleAssert_nvrtc/simpleAssert.cpp diff --git a/Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_kernel.cu b/cpp/0_Introduction/simpleAssert_nvrtc/simpleAssert_kernel.cu similarity index 100% rename from Samples/0_Introduction/simpleAssert_nvrtc/simpleAssert_kernel.cu rename to cpp/0_Introduction/simpleAssert_nvrtc/simpleAssert_kernel.cu diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAtomicIntrinsics/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAtomicIntrinsics/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/extensions.json b/cpp/0_Introduction/simpleAtomicIntrinsics/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/.vscode/extensions.json rename to cpp/0_Introduction/simpleAtomicIntrinsics/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt b/cpp/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt rename to cpp/0_Introduction/simpleAtomicIntrinsics/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/README.md b/cpp/0_Introduction/simpleAtomicIntrinsics/README.md similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/README.md rename to cpp/0_Introduction/simpleAtomicIntrinsics/README.md diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu b/cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu rename to cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_cpu.cpp b/cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_cpu.cpp similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_cpu.cpp rename to cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_cpu.cpp diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_kernel.cuh b/cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_kernel.cuh similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_kernel.cuh rename to cpp/0_Introduction/simpleAtomicIntrinsics/simpleAtomicIntrinsics_kernel.cuh diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/extensions.json b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/extensions.json rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/README.md diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics.cpp b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics.cpp similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics.cpp rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics.cpp diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_cpu.cpp b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_cpu.cpp similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_cpu.cpp rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_cpu.cpp diff --git a/Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_kernel.cuh b/cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_kernel.cuh similarity index 100% rename from Samples/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_kernel.cuh rename to cpp/0_Introduction/simpleAtomicIntrinsics_nvrtc/simpleAtomicIntrinsics_kernel.cuh diff --git a/Samples/0_Introduction/simpleAttributes/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleAttributes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleAttributes/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleAttributes/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleAttributes/.vscode/extensions.json b/cpp/0_Introduction/simpleAttributes/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleAttributes/.vscode/extensions.json rename to cpp/0_Introduction/simpleAttributes/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleAttributes/CMakeLists.txt b/cpp/0_Introduction/simpleAttributes/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleAttributes/CMakeLists.txt rename to cpp/0_Introduction/simpleAttributes/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleAttributes/README.md b/cpp/0_Introduction/simpleAttributes/README.md similarity index 100% rename from Samples/0_Introduction/simpleAttributes/README.md rename to cpp/0_Introduction/simpleAttributes/README.md diff --git a/Samples/0_Introduction/simpleAttributes/simpleAttributes.cu b/cpp/0_Introduction/simpleAttributes/simpleAttributes.cu similarity index 100% rename from Samples/0_Introduction/simpleAttributes/simpleAttributes.cu rename to cpp/0_Introduction/simpleAttributes/simpleAttributes.cu diff --git a/Samples/0_Introduction/simpleCUDA2GL/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleCUDA2GL/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleCUDA2GL/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleCUDA2GL/.vscode/extensions.json b/cpp/0_Introduction/simpleCUDA2GL/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/.vscode/extensions.json rename to cpp/0_Introduction/simpleCUDA2GL/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt b/cpp/0_Introduction/simpleCUDA2GL/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/CMakeLists.txt rename to cpp/0_Introduction/simpleCUDA2GL/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleCUDA2GL/README.md b/cpp/0_Introduction/simpleCUDA2GL/README.md similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/README.md rename to cpp/0_Introduction/simpleCUDA2GL/README.md diff --git a/Samples/0_Introduction/simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm b/cpp/0_Introduction/simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm rename to cpp/0_Introduction/simpleCUDA2GL/data/ref_simpleCUDA2GL.ppm diff --git a/Samples/0_Introduction/simpleCUDA2GL/main.cpp b/cpp/0_Introduction/simpleCUDA2GL/main.cpp similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/main.cpp rename to cpp/0_Introduction/simpleCUDA2GL/main.cpp diff --git a/Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL.cu b/cpp/0_Introduction/simpleCUDA2GL/simpleCUDA2GL.cu similarity index 100% rename from Samples/0_Introduction/simpleCUDA2GL/simpleCUDA2GL.cu rename to cpp/0_Introduction/simpleCUDA2GL/simpleCUDA2GL.cu diff --git a/Samples/0_Introduction/simpleCallback/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleCallback/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleCallback/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleCallback/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleCallback/.vscode/extensions.json b/cpp/0_Introduction/simpleCallback/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleCallback/.vscode/extensions.json rename to cpp/0_Introduction/simpleCallback/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleCallback/CMakeLists.txt b/cpp/0_Introduction/simpleCallback/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleCallback/CMakeLists.txt rename to cpp/0_Introduction/simpleCallback/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleCallback/README.md b/cpp/0_Introduction/simpleCallback/README.md similarity index 100% rename from Samples/0_Introduction/simpleCallback/README.md rename to cpp/0_Introduction/simpleCallback/README.md diff --git a/Samples/0_Introduction/simpleCallback/multithreading.cpp b/cpp/0_Introduction/simpleCallback/multithreading.cpp similarity index 100% rename from Samples/0_Introduction/simpleCallback/multithreading.cpp rename to cpp/0_Introduction/simpleCallback/multithreading.cpp diff --git a/Samples/0_Introduction/simpleCallback/multithreading.h b/cpp/0_Introduction/simpleCallback/multithreading.h similarity index 100% rename from Samples/0_Introduction/simpleCallback/multithreading.h rename to cpp/0_Introduction/simpleCallback/multithreading.h diff --git a/Samples/0_Introduction/simpleCallback/simpleCallback.cu b/cpp/0_Introduction/simpleCallback/simpleCallback.cu similarity index 100% rename from Samples/0_Introduction/simpleCallback/simpleCallback.cu rename to cpp/0_Introduction/simpleCallback/simpleCallback.cu diff --git a/Samples/0_Introduction/simpleCooperativeGroups/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleCooperativeGroups/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleCooperativeGroups/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleCooperativeGroups/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleCooperativeGroups/.vscode/extensions.json b/cpp/0_Introduction/simpleCooperativeGroups/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleCooperativeGroups/.vscode/extensions.json rename to cpp/0_Introduction/simpleCooperativeGroups/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt b/cpp/0_Introduction/simpleCooperativeGroups/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleCooperativeGroups/CMakeLists.txt rename to cpp/0_Introduction/simpleCooperativeGroups/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleCooperativeGroups/README.md b/cpp/0_Introduction/simpleCooperativeGroups/README.md similarity index 100% rename from Samples/0_Introduction/simpleCooperativeGroups/README.md rename to cpp/0_Introduction/simpleCooperativeGroups/README.md diff --git a/Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups.cu b/cpp/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups.cu similarity index 100% rename from Samples/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups.cu rename to cpp/0_Introduction/simpleCooperativeGroups/simpleCooperativeGroups.cu diff --git a/Samples/0_Introduction/simpleCubemapTexture/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleCubemapTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleCubemapTexture/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleCubemapTexture/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleCubemapTexture/.vscode/extensions.json b/cpp/0_Introduction/simpleCubemapTexture/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleCubemapTexture/.vscode/extensions.json rename to cpp/0_Introduction/simpleCubemapTexture/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt b/cpp/0_Introduction/simpleCubemapTexture/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleCubemapTexture/CMakeLists.txt rename to cpp/0_Introduction/simpleCubemapTexture/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleCubemapTexture/README.md b/cpp/0_Introduction/simpleCubemapTexture/README.md similarity index 100% rename from Samples/0_Introduction/simpleCubemapTexture/README.md rename to cpp/0_Introduction/simpleCubemapTexture/README.md diff --git a/Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture.cu b/cpp/0_Introduction/simpleCubemapTexture/simpleCubemapTexture.cu similarity index 100% rename from Samples/0_Introduction/simpleCubemapTexture/simpleCubemapTexture.cu rename to cpp/0_Introduction/simpleCubemapTexture/simpleCubemapTexture.cu diff --git a/Samples/0_Introduction/simpleDrvRuntime/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleDrvRuntime/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleDrvRuntime/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleDrvRuntime/.vscode/extensions.json b/cpp/0_Introduction/simpleDrvRuntime/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/.vscode/extensions.json rename to cpp/0_Introduction/simpleDrvRuntime/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/cpp/0_Introduction/simpleDrvRuntime/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt rename to cpp/0_Introduction/simpleDrvRuntime/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleDrvRuntime/README.md b/cpp/0_Introduction/simpleDrvRuntime/README.md similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/README.md rename to cpp/0_Introduction/simpleDrvRuntime/README.md diff --git a/Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp b/cpp/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp rename to cpp/0_Introduction/simpleDrvRuntime/simpleDrvRuntime.cpp diff --git a/Samples/0_Introduction/simpleDrvRuntime/vectorAdd_kernel.cu b/cpp/0_Introduction/simpleDrvRuntime/vectorAdd_kernel.cu similarity index 100% rename from Samples/0_Introduction/simpleDrvRuntime/vectorAdd_kernel.cu rename to cpp/0_Introduction/simpleDrvRuntime/vectorAdd_kernel.cu diff --git a/Samples/0_Introduction/simpleHyperQ/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleHyperQ/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleHyperQ/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleHyperQ/.vscode/extensions.json b/cpp/0_Introduction/simpleHyperQ/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/.vscode/extensions.json rename to cpp/0_Introduction/simpleHyperQ/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleHyperQ/CMakeLists.txt b/cpp/0_Introduction/simpleHyperQ/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/CMakeLists.txt rename to cpp/0_Introduction/simpleHyperQ/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleHyperQ/README.md b/cpp/0_Introduction/simpleHyperQ/README.md similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/README.md rename to cpp/0_Introduction/simpleHyperQ/README.md diff --git a/Samples/0_Introduction/simpleHyperQ/doc/HyperQ.docx b/cpp/0_Introduction/simpleHyperQ/doc/HyperQ.docx similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/doc/HyperQ.docx rename to cpp/0_Introduction/simpleHyperQ/doc/HyperQ.docx diff --git a/Samples/0_Introduction/simpleHyperQ/doc/HyperQ.pdf b/cpp/0_Introduction/simpleHyperQ/doc/HyperQ.pdf similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/doc/HyperQ.pdf rename to cpp/0_Introduction/simpleHyperQ/doc/HyperQ.pdf diff --git a/Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu b/cpp/0_Introduction/simpleHyperQ/simpleHyperQ.cu similarity index 100% rename from Samples/0_Introduction/simpleHyperQ/simpleHyperQ.cu rename to cpp/0_Introduction/simpleHyperQ/simpleHyperQ.cu diff --git a/Samples/0_Introduction/simpleIPC/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleIPC/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleIPC/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleIPC/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleIPC/.vscode/extensions.json b/cpp/0_Introduction/simpleIPC/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleIPC/.vscode/extensions.json rename to cpp/0_Introduction/simpleIPC/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleIPC/CMakeLists.txt b/cpp/0_Introduction/simpleIPC/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleIPC/CMakeLists.txt rename to cpp/0_Introduction/simpleIPC/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleIPC/README.md b/cpp/0_Introduction/simpleIPC/README.md similarity index 100% rename from Samples/0_Introduction/simpleIPC/README.md rename to cpp/0_Introduction/simpleIPC/README.md diff --git a/Samples/0_Introduction/simpleIPC/simpleIPC.cu b/cpp/0_Introduction/simpleIPC/simpleIPC.cu similarity index 100% rename from Samples/0_Introduction/simpleIPC/simpleIPC.cu rename to cpp/0_Introduction/simpleIPC/simpleIPC.cu diff --git a/Samples/0_Introduction/simpleLayeredTexture/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleLayeredTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleLayeredTexture/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleLayeredTexture/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleLayeredTexture/.vscode/extensions.json b/cpp/0_Introduction/simpleLayeredTexture/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleLayeredTexture/.vscode/extensions.json rename to cpp/0_Introduction/simpleLayeredTexture/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt b/cpp/0_Introduction/simpleLayeredTexture/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleLayeredTexture/CMakeLists.txt rename to cpp/0_Introduction/simpleLayeredTexture/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleLayeredTexture/README.md b/cpp/0_Introduction/simpleLayeredTexture/README.md similarity index 100% rename from Samples/0_Introduction/simpleLayeredTexture/README.md rename to cpp/0_Introduction/simpleLayeredTexture/README.md diff --git a/Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture.cu b/cpp/0_Introduction/simpleLayeredTexture/simpleLayeredTexture.cu similarity index 100% rename from Samples/0_Introduction/simpleLayeredTexture/simpleLayeredTexture.cu rename to cpp/0_Introduction/simpleLayeredTexture/simpleLayeredTexture.cu diff --git a/Samples/0_Introduction/simpleMPI/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleMPI/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleMPI/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleMPI/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleMPI/.vscode/extensions.json b/cpp/0_Introduction/simpleMPI/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleMPI/.vscode/extensions.json rename to cpp/0_Introduction/simpleMPI/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleMPI/CMakeLists.txt b/cpp/0_Introduction/simpleMPI/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleMPI/CMakeLists.txt rename to cpp/0_Introduction/simpleMPI/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleMPI/README.md b/cpp/0_Introduction/simpleMPI/README.md similarity index 100% rename from Samples/0_Introduction/simpleMPI/README.md rename to cpp/0_Introduction/simpleMPI/README.md diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI.cpp b/cpp/0_Introduction/simpleMPI/simpleMPI.cpp similarity index 100% rename from Samples/0_Introduction/simpleMPI/simpleMPI.cpp rename to cpp/0_Introduction/simpleMPI/simpleMPI.cpp diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI.cu b/cpp/0_Introduction/simpleMPI/simpleMPI.cu similarity index 100% rename from Samples/0_Introduction/simpleMPI/simpleMPI.cu rename to cpp/0_Introduction/simpleMPI/simpleMPI.cu diff --git a/Samples/0_Introduction/simpleMPI/simpleMPI.h b/cpp/0_Introduction/simpleMPI/simpleMPI.h similarity index 100% rename from Samples/0_Introduction/simpleMPI/simpleMPI.h rename to cpp/0_Introduction/simpleMPI/simpleMPI.h diff --git a/Samples/0_Introduction/simpleMultiCopy/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleMultiCopy/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleMultiCopy/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleMultiCopy/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleMultiCopy/.vscode/extensions.json b/cpp/0_Introduction/simpleMultiCopy/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleMultiCopy/.vscode/extensions.json rename to cpp/0_Introduction/simpleMultiCopy/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt b/cpp/0_Introduction/simpleMultiCopy/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleMultiCopy/CMakeLists.txt rename to cpp/0_Introduction/simpleMultiCopy/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleMultiCopy/README.md b/cpp/0_Introduction/simpleMultiCopy/README.md similarity index 100% rename from Samples/0_Introduction/simpleMultiCopy/README.md rename to cpp/0_Introduction/simpleMultiCopy/README.md diff --git a/Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu b/cpp/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu similarity index 100% rename from Samples/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu rename to cpp/0_Introduction/simpleMultiCopy/simpleMultiCopy.cu diff --git a/Samples/0_Introduction/simpleMultiGPU/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleMultiGPU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleMultiGPU/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleMultiGPU/.vscode/extensions.json b/cpp/0_Introduction/simpleMultiGPU/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/.vscode/extensions.json rename to cpp/0_Introduction/simpleMultiGPU/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt b/cpp/0_Introduction/simpleMultiGPU/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/CMakeLists.txt rename to cpp/0_Introduction/simpleMultiGPU/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleMultiGPU/README.md b/cpp/0_Introduction/simpleMultiGPU/README.md similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/README.md rename to cpp/0_Introduction/simpleMultiGPU/README.md diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU.cu b/cpp/0_Introduction/simpleMultiGPU/simpleMultiGPU.cu similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU.cu rename to cpp/0_Introduction/simpleMultiGPU/simpleMultiGPU.cu diff --git a/Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU.h b/cpp/0_Introduction/simpleMultiGPU/simpleMultiGPU.h similarity index 100% rename from Samples/0_Introduction/simpleMultiGPU/simpleMultiGPU.h rename to cpp/0_Introduction/simpleMultiGPU/simpleMultiGPU.h diff --git a/Samples/0_Introduction/simpleOccupancy/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleOccupancy/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleOccupancy/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleOccupancy/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleOccupancy/.vscode/extensions.json b/cpp/0_Introduction/simpleOccupancy/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleOccupancy/.vscode/extensions.json rename to cpp/0_Introduction/simpleOccupancy/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleOccupancy/CMakeLists.txt b/cpp/0_Introduction/simpleOccupancy/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleOccupancy/CMakeLists.txt rename to cpp/0_Introduction/simpleOccupancy/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleOccupancy/README.md b/cpp/0_Introduction/simpleOccupancy/README.md similarity index 100% rename from Samples/0_Introduction/simpleOccupancy/README.md rename to cpp/0_Introduction/simpleOccupancy/README.md diff --git a/Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu b/cpp/0_Introduction/simpleOccupancy/simpleOccupancy.cu similarity index 100% rename from Samples/0_Introduction/simpleOccupancy/simpleOccupancy.cu rename to cpp/0_Introduction/simpleOccupancy/simpleOccupancy.cu diff --git a/Samples/0_Introduction/simpleP2P/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleP2P/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleP2P/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleP2P/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleP2P/.vscode/extensions.json b/cpp/0_Introduction/simpleP2P/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleP2P/.vscode/extensions.json rename to cpp/0_Introduction/simpleP2P/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleP2P/CMakeLists.txt b/cpp/0_Introduction/simpleP2P/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleP2P/CMakeLists.txt rename to cpp/0_Introduction/simpleP2P/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleP2P/README.md b/cpp/0_Introduction/simpleP2P/README.md similarity index 100% rename from Samples/0_Introduction/simpleP2P/README.md rename to cpp/0_Introduction/simpleP2P/README.md diff --git a/Samples/0_Introduction/simpleP2P/simpleP2P.cu b/cpp/0_Introduction/simpleP2P/simpleP2P.cu similarity index 100% rename from Samples/0_Introduction/simpleP2P/simpleP2P.cu rename to cpp/0_Introduction/simpleP2P/simpleP2P.cu diff --git a/Samples/0_Introduction/simplePitchLinearTexture/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simplePitchLinearTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simplePitchLinearTexture/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simplePitchLinearTexture/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simplePitchLinearTexture/.vscode/extensions.json b/cpp/0_Introduction/simplePitchLinearTexture/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simplePitchLinearTexture/.vscode/extensions.json rename to cpp/0_Introduction/simplePitchLinearTexture/.vscode/extensions.json diff --git a/Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt b/cpp/0_Introduction/simplePitchLinearTexture/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simplePitchLinearTexture/CMakeLists.txt rename to cpp/0_Introduction/simplePitchLinearTexture/CMakeLists.txt diff --git a/Samples/0_Introduction/simplePitchLinearTexture/README.md b/cpp/0_Introduction/simplePitchLinearTexture/README.md similarity index 100% rename from Samples/0_Introduction/simplePitchLinearTexture/README.md rename to cpp/0_Introduction/simplePitchLinearTexture/README.md diff --git a/Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu b/cpp/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu similarity index 100% rename from Samples/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu rename to cpp/0_Introduction/simplePitchLinearTexture/simplePitchLinearTexture.cu diff --git a/Samples/0_Introduction/simplePrintf/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simplePrintf/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simplePrintf/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simplePrintf/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simplePrintf/.vscode/extensions.json b/cpp/0_Introduction/simplePrintf/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simplePrintf/.vscode/extensions.json rename to cpp/0_Introduction/simplePrintf/.vscode/extensions.json diff --git a/Samples/0_Introduction/simplePrintf/CMakeLists.txt b/cpp/0_Introduction/simplePrintf/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simplePrintf/CMakeLists.txt rename to cpp/0_Introduction/simplePrintf/CMakeLists.txt diff --git a/Samples/0_Introduction/simplePrintf/README.md b/cpp/0_Introduction/simplePrintf/README.md similarity index 100% rename from Samples/0_Introduction/simplePrintf/README.md rename to cpp/0_Introduction/simplePrintf/README.md diff --git a/Samples/0_Introduction/simplePrintf/simplePrintf.cu b/cpp/0_Introduction/simplePrintf/simplePrintf.cu similarity index 100% rename from Samples/0_Introduction/simplePrintf/simplePrintf.cu rename to cpp/0_Introduction/simplePrintf/simplePrintf.cu diff --git a/Samples/0_Introduction/simpleStreams/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleStreams/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleStreams/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleStreams/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleStreams/.vscode/extensions.json b/cpp/0_Introduction/simpleStreams/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleStreams/.vscode/extensions.json rename to cpp/0_Introduction/simpleStreams/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleStreams/CMakeLists.txt b/cpp/0_Introduction/simpleStreams/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleStreams/CMakeLists.txt rename to cpp/0_Introduction/simpleStreams/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleStreams/README.md b/cpp/0_Introduction/simpleStreams/README.md similarity index 100% rename from Samples/0_Introduction/simpleStreams/README.md rename to cpp/0_Introduction/simpleStreams/README.md diff --git a/Samples/0_Introduction/simpleStreams/simpleStreams.cu b/cpp/0_Introduction/simpleStreams/simpleStreams.cu similarity index 100% rename from Samples/0_Introduction/simpleStreams/simpleStreams.cu rename to cpp/0_Introduction/simpleStreams/simpleStreams.cu diff --git a/Samples/0_Introduction/simpleSurfaceWrite/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleSurfaceWrite/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleSurfaceWrite/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleSurfaceWrite/.vscode/extensions.json b/cpp/0_Introduction/simpleSurfaceWrite/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/.vscode/extensions.json rename to cpp/0_Introduction/simpleSurfaceWrite/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt b/cpp/0_Introduction/simpleSurfaceWrite/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/CMakeLists.txt rename to cpp/0_Introduction/simpleSurfaceWrite/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleSurfaceWrite/README.md b/cpp/0_Introduction/simpleSurfaceWrite/README.md similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/README.md rename to cpp/0_Introduction/simpleSurfaceWrite/README.md diff --git a/Samples/0_Introduction/simpleSurfaceWrite/data/ref_rotated.pgm b/cpp/0_Introduction/simpleSurfaceWrite/data/ref_rotated.pgm similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/data/ref_rotated.pgm rename to cpp/0_Introduction/simpleSurfaceWrite/data/ref_rotated.pgm diff --git a/Samples/0_Introduction/simpleSurfaceWrite/data/teapot512.pgm b/cpp/0_Introduction/simpleSurfaceWrite/data/teapot512.pgm similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/data/teapot512.pgm rename to cpp/0_Introduction/simpleSurfaceWrite/data/teapot512.pgm diff --git a/Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite.cu b/cpp/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite.cu similarity index 100% rename from Samples/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite.cu rename to cpp/0_Introduction/simpleSurfaceWrite/simpleSurfaceWrite.cu diff --git a/Samples/0_Introduction/simpleTemplates/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleTemplates/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleTemplates/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleTemplates/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleTemplates/.vscode/extensions.json b/cpp/0_Introduction/simpleTemplates/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleTemplates/.vscode/extensions.json rename to cpp/0_Introduction/simpleTemplates/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleTemplates/CMakeLists.txt b/cpp/0_Introduction/simpleTemplates/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleTemplates/CMakeLists.txt rename to cpp/0_Introduction/simpleTemplates/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleTemplates/README.md b/cpp/0_Introduction/simpleTemplates/README.md similarity index 100% rename from Samples/0_Introduction/simpleTemplates/README.md rename to cpp/0_Introduction/simpleTemplates/README.md diff --git a/Samples/0_Introduction/simpleTemplates/sharedmem.cuh b/cpp/0_Introduction/simpleTemplates/sharedmem.cuh similarity index 100% rename from Samples/0_Introduction/simpleTemplates/sharedmem.cuh rename to cpp/0_Introduction/simpleTemplates/sharedmem.cuh diff --git a/Samples/0_Introduction/simpleTemplates/simpleTemplates.cu b/cpp/0_Introduction/simpleTemplates/simpleTemplates.cu similarity index 100% rename from Samples/0_Introduction/simpleTemplates/simpleTemplates.cu rename to cpp/0_Introduction/simpleTemplates/simpleTemplates.cu diff --git a/Samples/0_Introduction/simpleTexture/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleTexture/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleTexture/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleTexture/.vscode/extensions.json b/cpp/0_Introduction/simpleTexture/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleTexture/.vscode/extensions.json rename to cpp/0_Introduction/simpleTexture/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleTexture/CMakeLists.txt b/cpp/0_Introduction/simpleTexture/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleTexture/CMakeLists.txt rename to cpp/0_Introduction/simpleTexture/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleTexture/README.md b/cpp/0_Introduction/simpleTexture/README.md similarity index 100% rename from Samples/0_Introduction/simpleTexture/README.md rename to cpp/0_Introduction/simpleTexture/README.md diff --git a/Samples/0_Introduction/simpleTexture/data/ref_rotated.pgm b/cpp/0_Introduction/simpleTexture/data/ref_rotated.pgm similarity index 100% rename from Samples/0_Introduction/simpleTexture/data/ref_rotated.pgm rename to cpp/0_Introduction/simpleTexture/data/ref_rotated.pgm diff --git a/Samples/0_Introduction/simpleTexture/data/teapot512.pgm b/cpp/0_Introduction/simpleTexture/data/teapot512.pgm similarity index 100% rename from Samples/0_Introduction/simpleTexture/data/teapot512.pgm rename to cpp/0_Introduction/simpleTexture/data/teapot512.pgm diff --git a/Samples/0_Introduction/simpleTexture/data/teapot512_out.pgm b/cpp/0_Introduction/simpleTexture/data/teapot512_out.pgm similarity index 100% rename from Samples/0_Introduction/simpleTexture/data/teapot512_out.pgm rename to cpp/0_Introduction/simpleTexture/data/teapot512_out.pgm diff --git a/Samples/0_Introduction/simpleTexture/simpleTexture.cu b/cpp/0_Introduction/simpleTexture/simpleTexture.cu similarity index 100% rename from Samples/0_Introduction/simpleTexture/simpleTexture.cu rename to cpp/0_Introduction/simpleTexture/simpleTexture.cu diff --git a/Samples/0_Introduction/simpleTexture3D/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleTexture3D/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleTexture3D/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleTexture3D/.vscode/extensions.json b/cpp/0_Introduction/simpleTexture3D/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/.vscode/extensions.json rename to cpp/0_Introduction/simpleTexture3D/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleTexture3D/CMakeLists.txt b/cpp/0_Introduction/simpleTexture3D/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/CMakeLists.txt rename to cpp/0_Introduction/simpleTexture3D/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleTexture3D/README.md b/cpp/0_Introduction/simpleTexture3D/README.md similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/README.md rename to cpp/0_Introduction/simpleTexture3D/README.md diff --git a/Samples/0_Introduction/simpleTexture3D/data/Bucky.raw b/cpp/0_Introduction/simpleTexture3D/data/Bucky.raw similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/data/Bucky.raw rename to cpp/0_Introduction/simpleTexture3D/data/Bucky.raw diff --git a/Samples/0_Introduction/simpleTexture3D/data/ref_texture3D.bin b/cpp/0_Introduction/simpleTexture3D/data/ref_texture3D.bin similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/data/ref_texture3D.bin rename to cpp/0_Introduction/simpleTexture3D/data/ref_texture3D.bin diff --git a/Samples/0_Introduction/simpleTexture3D/doc/sshot_lg.JPG b/cpp/0_Introduction/simpleTexture3D/doc/sshot_lg.JPG similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/doc/sshot_lg.JPG rename to cpp/0_Introduction/simpleTexture3D/doc/sshot_lg.JPG diff --git a/Samples/0_Introduction/simpleTexture3D/doc/sshot_md.JPG b/cpp/0_Introduction/simpleTexture3D/doc/sshot_md.JPG similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/doc/sshot_md.JPG rename to cpp/0_Introduction/simpleTexture3D/doc/sshot_md.JPG diff --git a/Samples/0_Introduction/simpleTexture3D/doc/sshot_sm.JPG b/cpp/0_Introduction/simpleTexture3D/doc/sshot_sm.JPG similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/doc/sshot_sm.JPG rename to cpp/0_Introduction/simpleTexture3D/doc/sshot_sm.JPG diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D.cpp b/cpp/0_Introduction/simpleTexture3D/simpleTexture3D.cpp similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/simpleTexture3D.cpp rename to cpp/0_Introduction/simpleTexture3D/simpleTexture3D.cpp diff --git a/Samples/0_Introduction/simpleTexture3D/simpleTexture3D_kernel.cu b/cpp/0_Introduction/simpleTexture3D/simpleTexture3D_kernel.cu similarity index 100% rename from Samples/0_Introduction/simpleTexture3D/simpleTexture3D_kernel.cu rename to cpp/0_Introduction/simpleTexture3D/simpleTexture3D_kernel.cu diff --git a/Samples/0_Introduction/simpleTextureDrv/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleTextureDrv/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleTextureDrv/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleTextureDrv/.vscode/extensions.json b/cpp/0_Introduction/simpleTextureDrv/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/.vscode/extensions.json rename to cpp/0_Introduction/simpleTextureDrv/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/cpp/0_Introduction/simpleTextureDrv/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt rename to cpp/0_Introduction/simpleTextureDrv/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleTextureDrv/README.md b/cpp/0_Introduction/simpleTextureDrv/README.md similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/README.md rename to cpp/0_Introduction/simpleTextureDrv/README.md diff --git a/Samples/0_Introduction/simpleTextureDrv/data/ref_rotated.pgm b/cpp/0_Introduction/simpleTextureDrv/data/ref_rotated.pgm similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/data/ref_rotated.pgm rename to cpp/0_Introduction/simpleTextureDrv/data/ref_rotated.pgm diff --git a/Samples/0_Introduction/simpleTextureDrv/data/teapot512.pgm b/cpp/0_Introduction/simpleTextureDrv/data/teapot512.pgm similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/data/teapot512.pgm rename to cpp/0_Introduction/simpleTextureDrv/data/teapot512.pgm diff --git a/Samples/0_Introduction/simpleTextureDrv/data/teapot512_out.pgm b/cpp/0_Introduction/simpleTextureDrv/data/teapot512_out.pgm similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/data/teapot512_out.pgm rename to cpp/0_Introduction/simpleTextureDrv/data/teapot512_out.pgm diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp b/cpp/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp rename to cpp/0_Introduction/simpleTextureDrv/simpleTextureDrv.cpp diff --git a/Samples/0_Introduction/simpleTextureDrv/simpleTexture_kernel.cu b/cpp/0_Introduction/simpleTextureDrv/simpleTexture_kernel.cu similarity index 100% rename from Samples/0_Introduction/simpleTextureDrv/simpleTexture_kernel.cu rename to cpp/0_Introduction/simpleTextureDrv/simpleTexture_kernel.cu diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleVoteIntrinsics/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleVoteIntrinsics/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/.vscode/extensions.json b/cpp/0_Introduction/simpleVoteIntrinsics/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/.vscode/extensions.json rename to cpp/0_Introduction/simpleVoteIntrinsics/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt b/cpp/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt rename to cpp/0_Introduction/simpleVoteIntrinsics/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/README.md b/cpp/0_Introduction/simpleVoteIntrinsics/README.md similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/README.md rename to cpp/0_Introduction/simpleVoteIntrinsics/README.md diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics.cu b/cpp/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics.cu similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics.cu rename to cpp/0_Introduction/simpleVoteIntrinsics/simpleVoteIntrinsics.cu diff --git a/Samples/0_Introduction/simpleVoteIntrinsics/simpleVote_kernel.cuh b/cpp/0_Introduction/simpleVoteIntrinsics/simpleVote_kernel.cuh similarity index 100% rename from Samples/0_Introduction/simpleVoteIntrinsics/simpleVote_kernel.cuh rename to cpp/0_Introduction/simpleVoteIntrinsics/simpleVote_kernel.cuh diff --git a/Samples/0_Introduction/simpleZeroCopy/.vscode/c_cpp_properties.json b/cpp/0_Introduction/simpleZeroCopy/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/simpleZeroCopy/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/simpleZeroCopy/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/simpleZeroCopy/.vscode/extensions.json b/cpp/0_Introduction/simpleZeroCopy/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/simpleZeroCopy/.vscode/extensions.json rename to cpp/0_Introduction/simpleZeroCopy/.vscode/extensions.json diff --git a/Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt b/cpp/0_Introduction/simpleZeroCopy/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/simpleZeroCopy/CMakeLists.txt rename to cpp/0_Introduction/simpleZeroCopy/CMakeLists.txt diff --git a/Samples/0_Introduction/simpleZeroCopy/README.md b/cpp/0_Introduction/simpleZeroCopy/README.md similarity index 100% rename from Samples/0_Introduction/simpleZeroCopy/README.md rename to cpp/0_Introduction/simpleZeroCopy/README.md diff --git a/Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy.cu b/cpp/0_Introduction/simpleZeroCopy/simpleZeroCopy.cu similarity index 100% rename from Samples/0_Introduction/simpleZeroCopy/simpleZeroCopy.cu rename to cpp/0_Introduction/simpleZeroCopy/simpleZeroCopy.cu diff --git a/Samples/0_Introduction/systemWideAtomics/.vscode/c_cpp_properties.json b/cpp/0_Introduction/systemWideAtomics/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/systemWideAtomics/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/systemWideAtomics/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/systemWideAtomics/.vscode/extensions.json b/cpp/0_Introduction/systemWideAtomics/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/systemWideAtomics/.vscode/extensions.json rename to cpp/0_Introduction/systemWideAtomics/.vscode/extensions.json diff --git a/Samples/0_Introduction/systemWideAtomics/CMakeLists.txt b/cpp/0_Introduction/systemWideAtomics/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/systemWideAtomics/CMakeLists.txt rename to cpp/0_Introduction/systemWideAtomics/CMakeLists.txt diff --git a/Samples/0_Introduction/systemWideAtomics/README.md b/cpp/0_Introduction/systemWideAtomics/README.md similarity index 100% rename from Samples/0_Introduction/systemWideAtomics/README.md rename to cpp/0_Introduction/systemWideAtomics/README.md diff --git a/Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu b/cpp/0_Introduction/systemWideAtomics/systemWideAtomics.cu similarity index 100% rename from Samples/0_Introduction/systemWideAtomics/systemWideAtomics.cu rename to cpp/0_Introduction/systemWideAtomics/systemWideAtomics.cu diff --git a/Samples/0_Introduction/template/.vscode/c_cpp_properties.json b/cpp/0_Introduction/template/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/template/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/template/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/template/.vscode/extensions.json b/cpp/0_Introduction/template/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/template/.vscode/extensions.json rename to cpp/0_Introduction/template/.vscode/extensions.json diff --git a/Samples/0_Introduction/template/CMakeLists.txt b/cpp/0_Introduction/template/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/template/CMakeLists.txt rename to cpp/0_Introduction/template/CMakeLists.txt diff --git a/Samples/0_Introduction/template/README.md b/cpp/0_Introduction/template/README.md similarity index 100% rename from Samples/0_Introduction/template/README.md rename to cpp/0_Introduction/template/README.md diff --git a/Samples/0_Introduction/template/template.cu b/cpp/0_Introduction/template/template.cu similarity index 100% rename from Samples/0_Introduction/template/template.cu rename to cpp/0_Introduction/template/template.cu diff --git a/Samples/0_Introduction/template/template_cpu.cpp b/cpp/0_Introduction/template/template_cpu.cpp similarity index 100% rename from Samples/0_Introduction/template/template_cpu.cpp rename to cpp/0_Introduction/template/template_cpu.cpp diff --git a/Samples/0_Introduction/vectorAdd/.vscode/c_cpp_properties.json b/cpp/0_Introduction/vectorAdd/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/vectorAdd/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/vectorAdd/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/vectorAdd/.vscode/extensions.json b/cpp/0_Introduction/vectorAdd/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/vectorAdd/.vscode/extensions.json rename to cpp/0_Introduction/vectorAdd/.vscode/extensions.json diff --git a/Samples/0_Introduction/vectorAdd/CMakeLists.txt b/cpp/0_Introduction/vectorAdd/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/vectorAdd/CMakeLists.txt rename to cpp/0_Introduction/vectorAdd/CMakeLists.txt diff --git a/Samples/0_Introduction/vectorAdd/README.md b/cpp/0_Introduction/vectorAdd/README.md similarity index 100% rename from Samples/0_Introduction/vectorAdd/README.md rename to cpp/0_Introduction/vectorAdd/README.md diff --git a/Samples/0_Introduction/vectorAdd/vectorAdd.cu b/cpp/0_Introduction/vectorAdd/vectorAdd.cu similarity index 100% rename from Samples/0_Introduction/vectorAdd/vectorAdd.cu rename to cpp/0_Introduction/vectorAdd/vectorAdd.cu diff --git a/Samples/0_Introduction/vectorAddDrv/.vscode/c_cpp_properties.json b/cpp/0_Introduction/vectorAddDrv/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/vectorAddDrv/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/vectorAddDrv/.vscode/extensions.json b/cpp/0_Introduction/vectorAddDrv/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/.vscode/extensions.json rename to cpp/0_Introduction/vectorAddDrv/.vscode/extensions.json diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/cpp/0_Introduction/vectorAddDrv/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/CMakeLists.txt rename to cpp/0_Introduction/vectorAddDrv/CMakeLists.txt diff --git a/Samples/0_Introduction/vectorAddDrv/README.md b/cpp/0_Introduction/vectorAddDrv/README.md similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/README.md rename to cpp/0_Introduction/vectorAddDrv/README.md diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp b/cpp/0_Introduction/vectorAddDrv/vectorAddDrv.cpp similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/vectorAddDrv.cpp rename to cpp/0_Introduction/vectorAddDrv/vectorAddDrv.cpp diff --git a/Samples/0_Introduction/vectorAddDrv/vectorAdd_kernel.cu b/cpp/0_Introduction/vectorAddDrv/vectorAdd_kernel.cu similarity index 100% rename from Samples/0_Introduction/vectorAddDrv/vectorAdd_kernel.cu rename to cpp/0_Introduction/vectorAddDrv/vectorAdd_kernel.cu diff --git a/Samples/0_Introduction/vectorAddMMAP/.vscode/c_cpp_properties.json b/cpp/0_Introduction/vectorAddMMAP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/vectorAddMMAP/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/vectorAddMMAP/.vscode/extensions.json b/cpp/0_Introduction/vectorAddMMAP/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/.vscode/extensions.json rename to cpp/0_Introduction/vectorAddMMAP/.vscode/extensions.json diff --git a/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt b/cpp/0_Introduction/vectorAddMMAP/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt rename to cpp/0_Introduction/vectorAddMMAP/CMakeLists.txt diff --git a/Samples/0_Introduction/vectorAddMMAP/README.md b/cpp/0_Introduction/vectorAddMMAP/README.md similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/README.md rename to cpp/0_Introduction/vectorAddMMAP/README.md diff --git a/Samples/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.cpp b/cpp/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.cpp similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.cpp rename to cpp/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.cpp diff --git a/Samples/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.hpp b/cpp/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.hpp similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.hpp rename to cpp/0_Introduction/vectorAddMMAP/multidevicealloc_memmap.hpp diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp b/cpp/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp rename to cpp/0_Introduction/vectorAddMMAP/vectorAddMMAP.cpp diff --git a/Samples/0_Introduction/vectorAddMMAP/vectorAdd_kernel.cu b/cpp/0_Introduction/vectorAddMMAP/vectorAdd_kernel.cu similarity index 100% rename from Samples/0_Introduction/vectorAddMMAP/vectorAdd_kernel.cu rename to cpp/0_Introduction/vectorAddMMAP/vectorAdd_kernel.cu diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/.vscode/c_cpp_properties.json b/cpp/0_Introduction/vectorAdd_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/.vscode/c_cpp_properties.json rename to cpp/0_Introduction/vectorAdd_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/.vscode/extensions.json b/cpp/0_Introduction/vectorAdd_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/.vscode/extensions.json rename to cpp/0_Introduction/vectorAdd_nvrtc/.vscode/extensions.json diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt b/cpp/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt rename to cpp/0_Introduction/vectorAdd_nvrtc/CMakeLists.txt diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/README.md b/cpp/0_Introduction/vectorAdd_nvrtc/README.md similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/README.md rename to cpp/0_Introduction/vectorAdd_nvrtc/README.md diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd.cpp b/cpp/0_Introduction/vectorAdd_nvrtc/vectorAdd.cpp similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd.cpp rename to cpp/0_Introduction/vectorAdd_nvrtc/vectorAdd.cpp diff --git a/Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_kernel.cu b/cpp/0_Introduction/vectorAdd_nvrtc/vectorAdd_kernel.cu similarity index 100% rename from Samples/0_Introduction/vectorAdd_nvrtc/vectorAdd_kernel.cu rename to cpp/0_Introduction/vectorAdd_nvrtc/vectorAdd_kernel.cu diff --git a/Samples/1_Utilities/CMakeLists.txt b/cpp/1_Utilities/CMakeLists.txt similarity index 100% rename from Samples/1_Utilities/CMakeLists.txt rename to cpp/1_Utilities/CMakeLists.txt diff --git a/Samples/1_Utilities/README.md b/cpp/1_Utilities/README.md similarity index 100% rename from Samples/1_Utilities/README.md rename to cpp/1_Utilities/README.md diff --git a/Samples/1_Utilities/deviceQuery/.vscode/c_cpp_properties.json b/cpp/1_Utilities/deviceQuery/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/1_Utilities/deviceQuery/.vscode/c_cpp_properties.json rename to cpp/1_Utilities/deviceQuery/.vscode/c_cpp_properties.json diff --git a/Samples/1_Utilities/deviceQuery/.vscode/extensions.json b/cpp/1_Utilities/deviceQuery/.vscode/extensions.json similarity index 100% rename from Samples/1_Utilities/deviceQuery/.vscode/extensions.json rename to cpp/1_Utilities/deviceQuery/.vscode/extensions.json diff --git a/Samples/1_Utilities/deviceQuery/CMakeLists.txt b/cpp/1_Utilities/deviceQuery/CMakeLists.txt similarity index 100% rename from Samples/1_Utilities/deviceQuery/CMakeLists.txt rename to cpp/1_Utilities/deviceQuery/CMakeLists.txt diff --git a/Samples/1_Utilities/deviceQuery/README.md b/cpp/1_Utilities/deviceQuery/README.md similarity index 100% rename from Samples/1_Utilities/deviceQuery/README.md rename to cpp/1_Utilities/deviceQuery/README.md diff --git a/Samples/1_Utilities/deviceQuery/deviceQuery.cpp b/cpp/1_Utilities/deviceQuery/deviceQuery.cpp similarity index 100% rename from Samples/1_Utilities/deviceQuery/deviceQuery.cpp rename to cpp/1_Utilities/deviceQuery/deviceQuery.cpp diff --git a/Samples/1_Utilities/deviceQueryDrv/.vscode/c_cpp_properties.json b/cpp/1_Utilities/deviceQueryDrv/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/1_Utilities/deviceQueryDrv/.vscode/c_cpp_properties.json rename to cpp/1_Utilities/deviceQueryDrv/.vscode/c_cpp_properties.json diff --git a/Samples/1_Utilities/deviceQueryDrv/.vscode/extensions.json b/cpp/1_Utilities/deviceQueryDrv/.vscode/extensions.json similarity index 100% rename from Samples/1_Utilities/deviceQueryDrv/.vscode/extensions.json rename to cpp/1_Utilities/deviceQueryDrv/.vscode/extensions.json diff --git a/Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt b/cpp/1_Utilities/deviceQueryDrv/CMakeLists.txt similarity index 100% rename from Samples/1_Utilities/deviceQueryDrv/CMakeLists.txt rename to cpp/1_Utilities/deviceQueryDrv/CMakeLists.txt diff --git a/Samples/1_Utilities/deviceQueryDrv/README.md b/cpp/1_Utilities/deviceQueryDrv/README.md similarity index 100% rename from Samples/1_Utilities/deviceQueryDrv/README.md rename to cpp/1_Utilities/deviceQueryDrv/README.md diff --git a/Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv.cpp b/cpp/1_Utilities/deviceQueryDrv/deviceQueryDrv.cpp similarity index 100% rename from Samples/1_Utilities/deviceQueryDrv/deviceQueryDrv.cpp rename to cpp/1_Utilities/deviceQueryDrv/deviceQueryDrv.cpp diff --git a/Samples/1_Utilities/topologyQuery/.vscode/c_cpp_properties.json b/cpp/1_Utilities/topologyQuery/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/1_Utilities/topologyQuery/.vscode/c_cpp_properties.json rename to cpp/1_Utilities/topologyQuery/.vscode/c_cpp_properties.json diff --git a/Samples/1_Utilities/topologyQuery/.vscode/extensions.json b/cpp/1_Utilities/topologyQuery/.vscode/extensions.json similarity index 100% rename from Samples/1_Utilities/topologyQuery/.vscode/extensions.json rename to cpp/1_Utilities/topologyQuery/.vscode/extensions.json diff --git a/Samples/1_Utilities/topologyQuery/CMakeLists.txt b/cpp/1_Utilities/topologyQuery/CMakeLists.txt similarity index 100% rename from Samples/1_Utilities/topologyQuery/CMakeLists.txt rename to cpp/1_Utilities/topologyQuery/CMakeLists.txt diff --git a/Samples/1_Utilities/topologyQuery/README.md b/cpp/1_Utilities/topologyQuery/README.md similarity index 100% rename from Samples/1_Utilities/topologyQuery/README.md rename to cpp/1_Utilities/topologyQuery/README.md diff --git a/Samples/1_Utilities/topologyQuery/topologyQuery.cu b/cpp/1_Utilities/topologyQuery/topologyQuery.cu similarity index 100% rename from Samples/1_Utilities/topologyQuery/topologyQuery.cu rename to cpp/1_Utilities/topologyQuery/topologyQuery.cu diff --git a/Samples/2_Concepts_and_Techniques/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/README.md diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_consumer.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/cuda_producer.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/eglstrm_common.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/helper.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/helper.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/helper.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/helper.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/kernel.cu b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/kernel.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/kernel.cu rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/kernel.cu diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/main.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/main.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_CrossGPU/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/README.md diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_consumer.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_1.yuv b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_1.yuv similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_1.yuv rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_1.yuv diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_2.yuv b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_2.yuv similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_2.yuv rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_f_2.yuv diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_producer.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_1.yuv diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/cuda_yuv_f_2.yuv diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.cpp diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.h b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.h rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/eglstrm_common.h diff --git a/Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/main.cpp b/cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/main.cpp rename to cpp/2_Concepts_and_Techniques/EGLStream_CUDA_Interop/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/FunctionPointers/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/FunctionPointers/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/FunctionPointers/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/FunctionPointers/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/FunctionPointers/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers.cpp b/cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers.cpp rename to cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers.cpp diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.cu b/cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.cu rename to cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.cu diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.h b/cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.h rename to cpp/2_Concepts_and_Techniques/FunctionPointers/FunctionPointers_kernels.h diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/README.md b/cpp/2_Concepts_and_Techniques/FunctionPointers/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/README.md rename to cpp/2_Concepts_and_Techniques/FunctionPointers/README.md diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_orig.pgm b/cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_orig.pgm similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_orig.pgm rename to cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_orig.pgm diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_shared.pgm b/cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_shared.pgm similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_shared.pgm rename to cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_shared.pgm diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_tex.pgm b/cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_tex.pgm similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/data/ref_tex.pgm rename to cpp/2_Concepts_and_Techniques/FunctionPointers/data/ref_tex.pgm diff --git a/Samples/2_Concepts_and_Techniques/FunctionPointers/data/teapot512.pgm b/cpp/2_Concepts_and_Techniques/FunctionPointers/data/teapot512.pgm similarity index 100% rename from Samples/2_Concepts_and_Techniques/FunctionPointers/data/teapot512.pgm rename to cpp/2_Concepts_and_Techniques/FunctionPointers/data/teapot512.pgm diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/README.md diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/cudasharedmem.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/cudasharedmem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/cudasharedmem.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/cudasharedmem.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/piestimator.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/piestimator.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/piestimator.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/piestimator.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/test.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/test.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/test.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/inc/test.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/main.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/main.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/piestimator.cu b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/piestimator.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/piestimator.cu rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/piestimator.cu diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/test.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/test.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/test.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineP/src/test.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/README.md diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/cudasharedmem.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/cudasharedmem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/cudasharedmem.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/cudasharedmem.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/piestimator.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/piestimator.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/piestimator.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/piestimator.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/test.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/test.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/test.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/inc/test.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/main.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/main.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/piestimator.cu b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/piestimator.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/piestimator.cu rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/piestimator.cu diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/test.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/test.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/test.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiInlineQ/src/test.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/README.md rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/README.md diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/cudasharedmem.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/cudasharedmem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/cudasharedmem.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/cudasharedmem.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/piestimator.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/piestimator.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/piestimator.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/piestimator.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/test.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/test.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/inc/test.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/inc/test.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/main.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/main.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/piestimator.cu b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/piestimator.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/piestimator.cu rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/piestimator.cu diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/test.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/test.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiP/src/test.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiP/src/test.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/README.md diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/cudasharedmem.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/cudasharedmem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/cudasharedmem.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/cudasharedmem.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/piestimator.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/piestimator.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/piestimator.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/piestimator.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/test.h b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/test.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/test.h rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/inc/test.h diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/main.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/main.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/piestimator.cu b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/piestimator.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/piestimator.cu rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/piestimator.cu diff --git a/Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/test.cpp b/cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/test.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_EstimatePiQ/src/test.cpp rename to cpp/2_Concepts_and_Techniques/MC_EstimatePiQ/src/test.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/README.md diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/asianoption.h b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/asianoption.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/asianoption.h rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/asianoption.h diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/cudasharedmem.h b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/cudasharedmem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/cudasharedmem.h rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/cudasharedmem.h diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/pricingengine.h b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/pricingengine.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/pricingengine.h rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/pricingengine.h diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/test.h b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/test.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/test.h rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/inc/test.h diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/main.cpp b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/main.cpp rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/pricingengine.cu b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/pricingengine.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/pricingengine.cu rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/pricingengine.cu diff --git a/Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/test.cpp b/cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/test.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/test.cpp rename to cpp/2_Concepts_and_Techniques/MC_SingleAsianOptionP/src/test.cpp diff --git a/Samples/2_Concepts_and_Techniques/README.md b/cpp/2_Concepts_and_Techniques/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/README.md rename to cpp/2_Concepts_and_Techniques/README.md diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/boxFilter/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/boxFilter/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/boxFilter/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/boxFilter/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/boxFilter/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/README.md b/cpp/2_Concepts_and_Techniques/boxFilter/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/README.md rename to cpp/2_Concepts_and_Techniques/boxFilter/README.md diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter.cpp b/cpp/2_Concepts_and_Techniques/boxFilter/boxFilter.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/boxFilter.cpp rename to cpp/2_Concepts_and_Techniques/boxFilter/boxFilter.cpp diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_cpu.cpp b/cpp/2_Concepts_and_Techniques/boxFilter/boxFilter_cpu.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_cpu.cpp rename to cpp/2_Concepts_and_Techniques/boxFilter/boxFilter_cpu.cpp diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_kernel.cu b/cpp/2_Concepts_and_Techniques/boxFilter/boxFilter_kernel.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/boxFilter_kernel.cu rename to cpp/2_Concepts_and_Techniques/boxFilter/boxFilter_kernel.cu diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/data/ref_14.ppm b/cpp/2_Concepts_and_Techniques/boxFilter/data/ref_14.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/data/ref_14.ppm rename to cpp/2_Concepts_and_Techniques/boxFilter/data/ref_14.ppm diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/data/ref_22.ppm b/cpp/2_Concepts_and_Techniques/boxFilter/data/ref_22.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/data/ref_22.ppm rename to cpp/2_Concepts_and_Techniques/boxFilter/data/ref_22.ppm diff --git a/Samples/2_Concepts_and_Techniques/boxFilter/data/teapot1024.ppm b/cpp/2_Concepts_and_Techniques/boxFilter/data/teapot1024.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/boxFilter/data/teapot1024.ppm rename to cpp/2_Concepts_and_Techniques/boxFilter/data/teapot1024.ppm diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/convolutionSeparable/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/convolutionSeparable/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md b/cpp/2_Concepts_and_Techniques/convolutionSeparable/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/README.md rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/README.md diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable.cu b/cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable.cu rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable.cu diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_common.h b/cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_common.h rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_common.h diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_gold.cpp b/cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_gold.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_gold.cpp rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/convolutionSeparable_gold.cpp diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.doc b/cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.doc rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.doc diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.pdf b/cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.pdf rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.pdf diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.vsd b/cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.vsd similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.vsd rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/doc/convolutionSeparable.vsd diff --git a/Samples/2_Concepts_and_Techniques/convolutionSeparable/main.cpp b/cpp/2_Concepts_and_Techniques/convolutionSeparable/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionSeparable/main.cpp rename to cpp/2_Concepts_and_Techniques/convolutionSeparable/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/convolutionTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/convolutionTexture/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/convolutionTexture/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/convolutionTexture/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/convolutionTexture/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/README.md b/cpp/2_Concepts_and_Techniques/convolutionTexture/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/README.md rename to cpp/2_Concepts_and_Techniques/convolutionTexture/README.md diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture.cu b/cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture.cu rename to cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture.cu diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_common.h b/cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_common.h rename to cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_common.h diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_gold.cpp b/cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_gold.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_gold.cpp rename to cpp/2_Concepts_and_Techniques/convolutionTexture/convolutionTexture_gold.cpp diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/doc/Performance.xls b/cpp/2_Concepts_and_Techniques/convolutionTexture/doc/Performance.xls similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/doc/Performance.xls rename to cpp/2_Concepts_and_Techniques/convolutionTexture/doc/Performance.xls diff --git a/Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp b/cpp/2_Concepts_and_Techniques/convolutionTexture/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/convolutionTexture/main.cpp rename to cpp/2_Concepts_and_Techniques/convolutionTexture/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/dct8x8/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/dct8x8/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/dct8x8/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/dct8x8/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp b/cpp/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp rename to cpp/2_Concepts_and_Techniques/dct8x8/BmpUtil.cpp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.h b/cpp/2_Concepts_and_Techniques/dct8x8/BmpUtil.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/BmpUtil.h rename to cpp/2_Concepts_and_Techniques/dct8x8/BmpUtil.h diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/dct8x8/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/Common.h b/cpp/2_Concepts_and_Techniques/dct8x8/Common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/Common.h rename to cpp/2_Concepts_and_Techniques/dct8x8/Common.h diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.cpp b/cpp/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.cpp rename to cpp/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.cpp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.h b/cpp/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.h rename to cpp/2_Concepts_and_Techniques/dct8x8/DCT8x8_Gold.h diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/README.md b/cpp/2_Concepts_and_Techniques/dct8x8/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/README.md rename to cpp/2_Concepts_and_Techniques/dct8x8/README.md diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/data/teapot512.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/data/teapot512.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/data/teapot512.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/data/teapot512.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/data/teapot512.ppm b/cpp/2_Concepts_and_Techniques/dct8x8/data/teapot512.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/data/teapot512.ppm rename to cpp/2_Concepts_and_Techniques/dct8x8/data/teapot512.ppm diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8.cu b/cpp/2_Concepts_and_Techniques/dct8x8/dct8x8.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/dct8x8.cu rename to cpp/2_Concepts_and_Techniques/dct8x8/dct8x8.cu diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel1.cuh b/cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel1.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel1.cuh rename to cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel1.cuh diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel2.cuh b/cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel2.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel2.cuh rename to cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel2.cuh diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_quantization.cuh b/cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_quantization.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_quantization.cuh rename to cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_quantization.cuh diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_short.cuh b/cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_short.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_short.cuh rename to cpp/2_Concepts_and_Techniques/dct8x8/dct8x8_kernel_short.cuh diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks1.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks1.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks1.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks1.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks2.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks2.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks2.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks2.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks3.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks3.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks3.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/BarbaraBlocks3.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/CosineBasis.png b/cpp/2_Concepts_and_Techniques/dct8x8/doc/CosineBasis.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/CosineBasis.png rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/CosineBasis.png diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/Cosines.xls b/cpp/2_Concepts_and_Techniques/dct8x8/doc/Cosines.xls similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/Cosines.xls rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/Cosines.xls diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/DctJpeg.png b/cpp/2_Concepts_and_Techniques/dct8x8/doc/DctJpeg.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/DctJpeg.png rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/DctJpeg.png diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_lg.png b/cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_lg.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_lg.png rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_lg.png diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_md.png b/cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_md.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_md.png rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_md.png diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_sm.png b/cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_sm.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/barbara_sm.png rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/barbara_sm.png diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.doc b/cpp/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.doc rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.doc diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.pdf b/cpp/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.pdf rename to cpp/2_Concepts_and_Techniques/dct8x8/doc/dct8x8.pdf diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda1.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda1.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda1.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda1.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda2.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda2.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda2.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda2.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda_short.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda_short.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/teapot512_cuda_short.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/teapot512_cuda_short.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/teapot512_gold1.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/teapot512_gold1.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/teapot512_gold1.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/teapot512_gold1.bmp diff --git a/Samples/2_Concepts_and_Techniques/dct8x8/teapot512_gold2.bmp b/cpp/2_Concepts_and_Techniques/dct8x8/teapot512_gold2.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/dct8x8/teapot512_gold2.bmp rename to cpp/2_Concepts_and_Techniques/dct8x8/teapot512_gold2.bmp diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/eigenvalues/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/eigenvalues/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/eigenvalues/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/eigenvalues/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/eigenvalues/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/README.md b/cpp/2_Concepts_and_Techniques/eigenvalues/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/README.md rename to cpp/2_Concepts_and_Techniques/eigenvalues/README.md diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_multi.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_multi.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_multi.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_multi.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_onei.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_onei.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_onei.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_large_onei.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_small.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_small.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_small.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_kernel_small.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_large.cu b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_large.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_large.cu rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_large.cu diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_large.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_large.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_large.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_large.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_small.cu b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_small.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_small.cu rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_small.cu diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_small.cuh b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_small.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_small.cuh rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_small.cuh diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/bisect_util.cu b/cpp/2_Concepts_and_Techniques/eigenvalues/bisect_util.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/bisect_util.cu rename to cpp/2_Concepts_and_Techniques/eigenvalues/bisect_util.cu diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/config.h b/cpp/2_Concepts_and_Techniques/eigenvalues/config.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/config.h rename to cpp/2_Concepts_and_Techniques/eigenvalues/config.h diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/data/diagonal.dat b/cpp/2_Concepts_and_Techniques/eigenvalues/data/diagonal.dat similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/data/diagonal.dat rename to cpp/2_Concepts_and_Techniques/eigenvalues/data/diagonal.dat diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/data/reference.dat b/cpp/2_Concepts_and_Techniques/eigenvalues/data/reference.dat similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/data/reference.dat rename to cpp/2_Concepts_and_Techniques/eigenvalues/data/reference.dat diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/data/superdiagonal.dat b/cpp/2_Concepts_and_Techniques/eigenvalues/data/superdiagonal.dat similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/data/superdiagonal.dat rename to cpp/2_Concepts_and_Techniques/eigenvalues/data/superdiagonal.dat diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.doc b/cpp/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.doc rename to cpp/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.doc diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.pdf b/cpp/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.pdf rename to cpp/2_Concepts_and_Techniques/eigenvalues/doc/eigenvalues.pdf diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/gerschgorin.cpp b/cpp/2_Concepts_and_Techniques/eigenvalues/gerschgorin.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/gerschgorin.cpp rename to cpp/2_Concepts_and_Techniques/eigenvalues/gerschgorin.cpp diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/gerschgorin.h b/cpp/2_Concepts_and_Techniques/eigenvalues/gerschgorin.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/gerschgorin.h rename to cpp/2_Concepts_and_Techniques/eigenvalues/gerschgorin.h diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/main.cu b/cpp/2_Concepts_and_Techniques/eigenvalues/main.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/main.cu rename to cpp/2_Concepts_and_Techniques/eigenvalues/main.cu diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/matlab.cpp b/cpp/2_Concepts_and_Techniques/eigenvalues/matlab.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/matlab.cpp rename to cpp/2_Concepts_and_Techniques/eigenvalues/matlab.cpp diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/matlab.h b/cpp/2_Concepts_and_Techniques/eigenvalues/matlab.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/matlab.h rename to cpp/2_Concepts_and_Techniques/eigenvalues/matlab.h diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/structs.h b/cpp/2_Concepts_and_Techniques/eigenvalues/structs.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/structs.h rename to cpp/2_Concepts_and_Techniques/eigenvalues/structs.h diff --git a/Samples/2_Concepts_and_Techniques/eigenvalues/util.h b/cpp/2_Concepts_and_Techniques/eigenvalues/util.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/eigenvalues/util.h rename to cpp/2_Concepts_and_Techniques/eigenvalues/util.h diff --git a/Samples/2_Concepts_and_Techniques/histogram/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/histogram/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/histogram/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/histogram/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/histogram/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/histogram/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/histogram/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/histogram/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/histogram/README.md b/cpp/2_Concepts_and_Techniques/histogram/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/README.md rename to cpp/2_Concepts_and_Techniques/histogram/README.md diff --git a/Samples/2_Concepts_and_Techniques/histogram/doc/histogram.doc b/cpp/2_Concepts_and_Techniques/histogram/doc/histogram.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/doc/histogram.doc rename to cpp/2_Concepts_and_Techniques/histogram/doc/histogram.doc diff --git a/Samples/2_Concepts_and_Techniques/histogram/doc/histogram.pdf b/cpp/2_Concepts_and_Techniques/histogram/doc/histogram.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/doc/histogram.pdf rename to cpp/2_Concepts_and_Techniques/histogram/doc/histogram.pdf diff --git a/Samples/2_Concepts_and_Techniques/histogram/doc/histogram.vsd b/cpp/2_Concepts_and_Techniques/histogram/doc/histogram.vsd similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/doc/histogram.vsd rename to cpp/2_Concepts_and_Techniques/histogram/doc/histogram.vsd diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram256.cu b/cpp/2_Concepts_and_Techniques/histogram/histogram256.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/histogram256.cu rename to cpp/2_Concepts_and_Techniques/histogram/histogram256.cu diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram64.cu b/cpp/2_Concepts_and_Techniques/histogram/histogram64.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/histogram64.cu rename to cpp/2_Concepts_and_Techniques/histogram/histogram64.cu diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_common.h b/cpp/2_Concepts_and_Techniques/histogram/histogram_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/histogram_common.h rename to cpp/2_Concepts_and_Techniques/histogram/histogram_common.h diff --git a/Samples/2_Concepts_and_Techniques/histogram/histogram_gold.cpp b/cpp/2_Concepts_and_Techniques/histogram/histogram_gold.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/histogram_gold.cpp rename to cpp/2_Concepts_and_Techniques/histogram/histogram_gold.cpp diff --git a/Samples/2_Concepts_and_Techniques/histogram/main.cpp b/cpp/2_Concepts_and_Techniques/histogram/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/histogram/main.cpp rename to cpp/2_Concepts_and_Techniques/histogram/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/imageDenoising/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/imageDenoising/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/imageDenoising/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/imageDenoising/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/imageDenoising/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/README.md b/cpp/2_Concepts_and_Techniques/imageDenoising/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/README.md rename to cpp/2_Concepts_and_Techniques/imageDenoising/README.md diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/bmploader.cpp b/cpp/2_Concepts_and_Techniques/imageDenoising/bmploader.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/bmploader.cpp rename to cpp/2_Concepts_and_Techniques/imageDenoising/bmploader.cpp diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/data/portrait_noise.bmp b/cpp/2_Concepts_and_Techniques/imageDenoising/data/portrait_noise.bmp similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/data/portrait_noise.bmp rename to cpp/2_Concepts_and_Techniques/imageDenoising/data/portrait_noise.bmp diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_knn.ppm b/cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_knn.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_knn.ppm rename to cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_knn.ppm diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm.ppm b/cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm.ppm rename to cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm.ppm diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm2.ppm b/cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm2.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm2.ppm rename to cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_nlm2.ppm diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_passthru.ppm b/cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_passthru.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/data/ref_passthru.ppm rename to cpp/2_Concepts_and_Techniques/imageDenoising/data/ref_passthru.ppm diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_lg.png b/cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_lg.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_lg.png rename to cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_lg.png diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_md.png b/cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_md.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_md.png rename to cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_md.png diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_sm.png b/cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_sm.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/doc/NLM_sm.png rename to cpp/2_Concepts_and_Techniques/imageDenoising/doc/NLM_sm.png diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.doc b/cpp/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.doc rename to cpp/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.doc diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.pdf b/cpp/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.pdf rename to cpp/2_Concepts_and_Techniques/imageDenoising/doc/imageDenoising.pdf diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising.cu b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising.cu rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising.cu diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising.h b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising.h rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising.h diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoisingGL.cpp b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoisingGL.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoisingGL.cpp rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoisingGL.cpp diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_copy_kernel.cuh b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_copy_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_copy_kernel.cuh rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_copy_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_knn_kernel.cuh b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_knn_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_knn_kernel.cuh rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_knn_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm2_kernel.cuh b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm2_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm2_kernel.cuh rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm2_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm_kernel.cuh b/cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm_kernel.cuh rename to cpp/2_Concepts_and_Techniques/imageDenoising/imageDenoising_nlm_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/inlinePTX/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/inlinePTX/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/inlinePTX/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/inlinePTX/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/inlinePTX/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/README.md b/cpp/2_Concepts_and_Techniques/inlinePTX/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX/README.md rename to cpp/2_Concepts_and_Techniques/inlinePTX/README.md diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX.cu b/cpp/2_Concepts_and_Techniques/inlinePTX/inlinePTX.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX/inlinePTX.cu rename to cpp/2_Concepts_and_Techniques/inlinePTX/inlinePTX.cu diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/README.md diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX.cpp b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX.cpp rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX.cpp diff --git a/Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_kernel.cu b/cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_kernel.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_kernel.cu rename to cpp/2_Concepts_and_Techniques/inlinePTX_nvrtc/inlinePTX_kernel.cu diff --git a/Samples/2_Concepts_and_Techniques/interval/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/interval/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/interval/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/interval/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/interval/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/interval/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/interval/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/interval/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/interval/README.md b/cpp/2_Concepts_and_Techniques/interval/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/README.md rename to cpp/2_Concepts_and_Techniques/interval/README.md diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi/borland_prefix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi/borland_prefix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi/borland_prefix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi/borland_prefix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi/borland_suffix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi/borland_suffix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi/borland_suffix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi/borland_suffix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_prefix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_prefix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_prefix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_prefix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_suffix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_suffix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_suffix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi/msvc_suffix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi_prefix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi_prefix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi_prefix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi_prefix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/abi_suffix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/abi_suffix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/abi_suffix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/abi_suffix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/auto_link.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/auto_link.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/auto_link.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/auto_link.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/borland.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/borland.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/borland.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/borland.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/codegear.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/codegear.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/codegear.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/codegear.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/comeau.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/comeau.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/comeau.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/comeau.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/common_edg.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/common_edg.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/common_edg.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/common_edg.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/compaq_cxx.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/compaq_cxx.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/compaq_cxx.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/compaq_cxx.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/digitalmars.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/digitalmars.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/digitalmars.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/digitalmars.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc_xml.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc_xml.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc_xml.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/gcc_xml.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/greenhills.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/greenhills.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/greenhills.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/greenhills.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/hp_acc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/hp_acc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/hp_acc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/hp_acc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/intel.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/intel.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/intel.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/intel.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/kai.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/kai.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/kai.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/kai.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/metrowerks.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/metrowerks.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/metrowerks.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/metrowerks.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/mpw.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/mpw.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/mpw.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/mpw.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/pgi.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/pgi.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/pgi.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/pgi.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/sgi_mipspro.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/sgi_mipspro.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/sgi_mipspro.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/sgi_mipspro.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/sunpro_cc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/sunpro_cc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/sunpro_cc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/sunpro_cc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/vacpp.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/vacpp.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/vacpp.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/vacpp.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/visualc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/visualc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/compiler/visualc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/compiler/visualc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/cmath.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/cmath.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/cmath.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/cmath.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/complex.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/complex.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/complex.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/complex.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/functional.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/functional.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/functional.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/functional.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/memory.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/memory.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/memory.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/memory.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/utility.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/utility.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/no_tr1/utility.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/no_tr1/utility.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/aix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/aix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/aix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/aix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/amigaos.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/amigaos.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/amigaos.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/amigaos.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/beos.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/beos.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/beos.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/beos.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/bsd.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/bsd.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/bsd.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/bsd.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/cygwin.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/cygwin.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/cygwin.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/cygwin.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/hpux.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/hpux.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/hpux.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/hpux.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/irix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/irix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/irix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/irix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/linux.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/linux.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/linux.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/linux.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/macos.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/macos.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/macos.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/macos.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/qnxnto.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/qnxnto.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/qnxnto.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/qnxnto.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/solaris.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/solaris.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/solaris.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/solaris.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/vxworks.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/vxworks.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/vxworks.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/vxworks.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/platform/win32.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/platform/win32.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/platform/win32.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/platform/win32.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/posix_features.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/posix_features.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/posix_features.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/posix_features.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/requires_threads.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/requires_threads.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/requires_threads.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/requires_threads.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/select_compiler_config.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/select_compiler_config.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/select_compiler_config.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/select_compiler_config.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/select_platform_config.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/select_platform_config.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/select_platform_config.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/select_platform_config.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/select_stdlib_config.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/select_stdlib_config.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/select_stdlib_config.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/select_stdlib_config.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/dinkumware.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/dinkumware.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/dinkumware.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/dinkumware.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/libcomo.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/libcomo.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/libcomo.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/libcomo.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/libstdcpp3.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/libstdcpp3.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/libstdcpp3.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/libstdcpp3.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/modena.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/modena.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/modena.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/modena.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/msl.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/msl.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/msl.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/msl.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/roguewave.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/roguewave.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/roguewave.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/roguewave.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/sgi.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/sgi.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/sgi.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/sgi.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/stlport.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/stlport.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/stlport.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/stlport.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/vacpp.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/vacpp.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/stdlib/vacpp.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/stdlib/vacpp.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/suffix.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/suffix.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/suffix.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/suffix.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/user.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/user.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/user.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/user.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/config/warning_disable.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/config/warning_disable.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/config/warning_disable.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/config/warning_disable.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/limits.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/limits.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/limits.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/limits.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith2.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith2.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith2.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith2.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith3.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith3.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith3.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/arith3.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/checking.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/checking.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/checking.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/checking.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/certain.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/certain.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/certain.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/certain.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/explicit.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/explicit.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/explicit.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/explicit.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/lexicographic.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/lexicographic.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/lexicographic.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/lexicographic.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/possible.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/possible.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/possible.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/possible.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/set.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/set.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/set.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/set.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/tribool.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/tribool.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/tribool.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/compare/tribool.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/constants.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/constants.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/constants.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/constants.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/alpha_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/alpha_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/alpha_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/alpha_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bcc_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bcc_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bcc_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bcc_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bugs.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bugs.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bugs.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/bugs.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99sub_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99sub_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99sub_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/c99sub_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/division.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/division.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/division.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/division.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ia64_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ia64_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ia64_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ia64_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/interval_prototype.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/interval_prototype.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/interval_prototype.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/interval_prototype.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/msvc_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/msvc_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/msvc_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/msvc_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ppc_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ppc_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ppc_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/ppc_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/sparc_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/sparc_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/sparc_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/sparc_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/test_input.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/test_input.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/test_input.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/test_input.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86gcc_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86gcc_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86gcc_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/detail/x86gcc_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/integer.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/integer.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/integer.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/integer.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/x86_fast_rounding_control.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/x86_fast_rounding_control.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/x86_fast_rounding_control.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/ext/x86_fast_rounding_control.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/hw_rounding.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/hw_rounding.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/hw_rounding.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/hw_rounding.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/interval.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/interval.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/interval.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/interval.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/io.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/io.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/io.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/io.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/limits.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/limits.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/limits.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/limits.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/policies.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/policies.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/policies.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/policies.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_arith.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_arith.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_arith.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_arith.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_transc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_transc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_transc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounded_transc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounding.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounding.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounding.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/rounding.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/transc.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/transc.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/transc.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/transc.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/utility.hpp b/cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/utility.hpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/boost/numeric/interval/utility.hpp rename to cpp/2_Concepts_and_Techniques/interval/boost/numeric/interval/utility.hpp diff --git a/Samples/2_Concepts_and_Techniques/interval/cpu_interval.h b/cpp/2_Concepts_and_Techniques/interval/cpu_interval.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/cpu_interval.h rename to cpp/2_Concepts_and_Techniques/interval/cpu_interval.h diff --git a/Samples/2_Concepts_and_Techniques/interval/cuda_interval.h b/cpp/2_Concepts_and_Techniques/interval/cuda_interval.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/cuda_interval.h rename to cpp/2_Concepts_and_Techniques/interval/cuda_interval.h diff --git a/Samples/2_Concepts_and_Techniques/interval/cuda_interval_lib.h b/cpp/2_Concepts_and_Techniques/interval/cuda_interval_lib.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/cuda_interval_lib.h rename to cpp/2_Concepts_and_Techniques/interval/cuda_interval_lib.h diff --git a/Samples/2_Concepts_and_Techniques/interval/cuda_interval_rounded_arith.h b/cpp/2_Concepts_and_Techniques/interval/cuda_interval_rounded_arith.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/cuda_interval_rounded_arith.h rename to cpp/2_Concepts_and_Techniques/interval/cuda_interval_rounded_arith.h diff --git a/Samples/2_Concepts_and_Techniques/interval/interval.cu b/cpp/2_Concepts_and_Techniques/interval/interval.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/interval.cu rename to cpp/2_Concepts_and_Techniques/interval/interval.cu diff --git a/Samples/2_Concepts_and_Techniques/interval/interval.h b/cpp/2_Concepts_and_Techniques/interval/interval.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/interval/interval.h rename to cpp/2_Concepts_and_Techniques/interval/interval.h diff --git a/Samples/2_Concepts_and_Techniques/particles/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/particles/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/particles/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/particles/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/particles/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/particles/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/particles/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/particles/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/particles/README.md b/cpp/2_Concepts_and_Techniques/particles/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/README.md rename to cpp/2_Concepts_and_Techniques/particles/README.md diff --git a/Samples/2_Concepts_and_Techniques/particles/data/ref_particles.bin b/cpp/2_Concepts_and_Techniques/particles/data/ref_particles.bin similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/data/ref_particles.bin rename to cpp/2_Concepts_and_Techniques/particles/data/ref_particles.bin diff --git a/Samples/2_Concepts_and_Techniques/particles/doc/particles.doc b/cpp/2_Concepts_and_Techniques/particles/doc/particles.doc similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/doc/particles.doc rename to cpp/2_Concepts_and_Techniques/particles/doc/particles.doc diff --git a/Samples/2_Concepts_and_Techniques/particles/doc/particles.pdf b/cpp/2_Concepts_and_Techniques/particles/doc/particles.pdf similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/doc/particles.pdf rename to cpp/2_Concepts_and_Techniques/particles/doc/particles.pdf diff --git a/Samples/2_Concepts_and_Techniques/particles/doc/screenshot_lg.png b/cpp/2_Concepts_and_Techniques/particles/doc/screenshot_lg.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/doc/screenshot_lg.png rename to cpp/2_Concepts_and_Techniques/particles/doc/screenshot_lg.png diff --git a/Samples/2_Concepts_and_Techniques/particles/doc/screenshot_md.png b/cpp/2_Concepts_and_Techniques/particles/doc/screenshot_md.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/doc/screenshot_md.png rename to cpp/2_Concepts_and_Techniques/particles/doc/screenshot_md.png diff --git a/Samples/2_Concepts_and_Techniques/particles/doc/screenshot_sm.png b/cpp/2_Concepts_and_Techniques/particles/doc/screenshot_sm.png similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/doc/screenshot_sm.png rename to cpp/2_Concepts_and_Techniques/particles/doc/screenshot_sm.png diff --git a/Samples/2_Concepts_and_Techniques/particles/particleSystem.cpp b/cpp/2_Concepts_and_Techniques/particles/particleSystem.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particleSystem.cpp rename to cpp/2_Concepts_and_Techniques/particles/particleSystem.cpp diff --git a/Samples/2_Concepts_and_Techniques/particles/particleSystem.cuh b/cpp/2_Concepts_and_Techniques/particles/particleSystem.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particleSystem.cuh rename to cpp/2_Concepts_and_Techniques/particles/particleSystem.cuh diff --git a/Samples/2_Concepts_and_Techniques/particles/particleSystem.h b/cpp/2_Concepts_and_Techniques/particles/particleSystem.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particleSystem.h rename to cpp/2_Concepts_and_Techniques/particles/particleSystem.h diff --git a/Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu b/cpp/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu rename to cpp/2_Concepts_and_Techniques/particles/particleSystem_cuda.cu diff --git a/Samples/2_Concepts_and_Techniques/particles/particles.cpp b/cpp/2_Concepts_and_Techniques/particles/particles.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particles.cpp rename to cpp/2_Concepts_and_Techniques/particles/particles.cpp diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_kernel.cuh b/cpp/2_Concepts_and_Techniques/particles/particles_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particles_kernel.cuh rename to cpp/2_Concepts_and_Techniques/particles/particles_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh b/cpp/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh rename to cpp/2_Concepts_and_Techniques/particles/particles_kernel_impl.cuh diff --git a/Samples/2_Concepts_and_Techniques/particles/render_particles.cpp b/cpp/2_Concepts_and_Techniques/particles/render_particles.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/render_particles.cpp rename to cpp/2_Concepts_and_Techniques/particles/render_particles.cpp diff --git a/Samples/2_Concepts_and_Techniques/particles/render_particles.h b/cpp/2_Concepts_and_Techniques/particles/render_particles.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/render_particles.h rename to cpp/2_Concepts_and_Techniques/particles/render_particles.h diff --git a/Samples/2_Concepts_and_Techniques/particles/shaders.cpp b/cpp/2_Concepts_and_Techniques/particles/shaders.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/shaders.cpp rename to cpp/2_Concepts_and_Techniques/particles/shaders.cpp diff --git a/Samples/2_Concepts_and_Techniques/particles/shaders.h b/cpp/2_Concepts_and_Techniques/particles/shaders.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/particles/shaders.h rename to cpp/2_Concepts_and_Techniques/particles/shaders.h diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/radixSortThrust/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/radixSortThrust/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/radixSortThrust/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/radixSortThrust/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/radixSortThrust/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/README.md b/cpp/2_Concepts_and_Techniques/radixSortThrust/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/README.md rename to cpp/2_Concepts_and_Techniques/radixSortThrust/README.md diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/doc/readme.txt b/cpp/2_Concepts_and_Techniques/radixSortThrust/doc/readme.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/doc/readme.txt rename to cpp/2_Concepts_and_Techniques/radixSortThrust/doc/readme.txt diff --git a/Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust.cu b/cpp/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust.cu rename to cpp/2_Concepts_and_Techniques/radixSortThrust/radixSortThrust.cu diff --git a/Samples/2_Concepts_and_Techniques/reduction/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/reduction/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/reduction/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/reduction/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/reduction/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/reduction/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/reduction/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/reduction/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/reduction/README.md b/cpp/2_Concepts_and_Techniques/reduction/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/README.md rename to cpp/2_Concepts_and_Techniques/reduction/README.md diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction.cpp b/cpp/2_Concepts_and_Techniques/reduction/reduction.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/reduction.cpp rename to cpp/2_Concepts_and_Techniques/reduction/reduction.cpp diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction.h b/cpp/2_Concepts_and_Techniques/reduction/reduction.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/reduction.h rename to cpp/2_Concepts_and_Techniques/reduction/reduction.h diff --git a/Samples/2_Concepts_and_Techniques/reduction/reduction_kernel.cu b/cpp/2_Concepts_and_Techniques/reduction/reduction_kernel.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/reduction/reduction_kernel.cu rename to cpp/2_Concepts_and_Techniques/reduction/reduction_kernel.cu diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md b/cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md rename to cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/README.md diff --git a/Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG.cu b/cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG.cu rename to cpp/2_Concepts_and_Techniques/reductionMultiBlockCG/reductionMultiBlockCG.cu diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/scalarProd/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/scalarProd/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/scalarProd/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/scalarProd/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/scalarProd/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/README.md b/cpp/2_Concepts_and_Techniques/scalarProd/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/README.md rename to cpp/2_Concepts_and_Techniques/scalarProd/README.md diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd.cu b/cpp/2_Concepts_and_Techniques/scalarProd/scalarProd.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/scalarProd.cu rename to cpp/2_Concepts_and_Techniques/scalarProd/scalarProd.cu diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_cpu.cpp b/cpp/2_Concepts_and_Techniques/scalarProd/scalarProd_cpu.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_cpu.cpp rename to cpp/2_Concepts_and_Techniques/scalarProd/scalarProd_cpu.cpp diff --git a/Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_kernel.cuh b/cpp/2_Concepts_and_Techniques/scalarProd/scalarProd_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/scalarProd/scalarProd_kernel.cuh rename to cpp/2_Concepts_and_Techniques/scalarProd/scalarProd_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/scan/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/scan/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/scan/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/scan/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/scan/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/scan/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/scan/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/scan/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/scan/README.md b/cpp/2_Concepts_and_Techniques/scan/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/README.md rename to cpp/2_Concepts_and_Techniques/scan/README.md diff --git a/Samples/2_Concepts_and_Techniques/scan/main.cpp b/cpp/2_Concepts_and_Techniques/scan/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/main.cpp rename to cpp/2_Concepts_and_Techniques/scan/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/scan/scan.cu b/cpp/2_Concepts_and_Techniques/scan/scan.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/scan.cu rename to cpp/2_Concepts_and_Techniques/scan/scan.cu diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_common.h b/cpp/2_Concepts_and_Techniques/scan/scan_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/scan_common.h rename to cpp/2_Concepts_and_Techniques/scan/scan_common.h diff --git a/Samples/2_Concepts_and_Techniques/scan/scan_gold.cpp b/cpp/2_Concepts_and_Techniques/scan/scan_gold.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/scan/scan_gold.cpp rename to cpp/2_Concepts_and_Techniques/scan/scan_gold.cpp diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/README.md rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/README.md diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/common.cuh b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/common.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/common.cuh rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/common.cuh diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_00.ppm b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_00.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_00.ppm rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_00.ppm diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_09.ppm b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_09.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_09.ppm rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/ref_09.ppm diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/test.ppm b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/test.ppm similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/data/test.ppm rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/data/test.ppm diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/kernels.cuh diff --git a/Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu b/cpp/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu rename to cpp/2_Concepts_and_Techniques/segmentationTreeThrust/segmentationTree.cu diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/shfl_scan/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/shfl_scan/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/shfl_scan/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/shfl_scan/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/shfl_scan/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/README.md b/cpp/2_Concepts_and_Techniques/shfl_scan/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/README.md rename to cpp/2_Concepts_and_Techniques/shfl_scan/README.md diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_integral_image.cuh b/cpp/2_Concepts_and_Techniques/shfl_scan/shfl_integral_image.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/shfl_integral_image.cuh rename to cpp/2_Concepts_and_Techniques/shfl_scan/shfl_integral_image.cuh diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu b/cpp/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu rename to cpp/2_Concepts_and_Techniques/shfl_scan/shfl_scan.cu diff --git a/Samples/2_Concepts_and_Techniques/shfl_scan/util.h b/cpp/2_Concepts_and_Techniques/shfl_scan/util.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/shfl_scan/util.h rename to cpp/2_Concepts_and_Techniques/shfl_scan/util.h diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/sortingNetworks/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/sortingNetworks/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/sortingNetworks/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/sortingNetworks/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/sortingNetworks/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/README.md b/cpp/2_Concepts_and_Techniques/sortingNetworks/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/README.md rename to cpp/2_Concepts_and_Techniques/sortingNetworks/README.md diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/bitonicSort.cu b/cpp/2_Concepts_and_Techniques/sortingNetworks/bitonicSort.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/bitonicSort.cu rename to cpp/2_Concepts_and_Techniques/sortingNetworks/bitonicSort.cu diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/main.cpp b/cpp/2_Concepts_and_Techniques/sortingNetworks/main.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/main.cpp rename to cpp/2_Concepts_and_Techniques/sortingNetworks/main.cpp diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/oddEvenMergeSort.cu b/cpp/2_Concepts_and_Techniques/sortingNetworks/oddEvenMergeSort.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/oddEvenMergeSort.cu rename to cpp/2_Concepts_and_Techniques/sortingNetworks/oddEvenMergeSort.cu diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.cuh b/cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.cuh rename to cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.cuh diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.h b/cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.h rename to cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_common.h diff --git a/Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_validate.cpp b/cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_validate.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_validate.cpp rename to cpp/2_Concepts_and_Techniques/sortingNetworks/sortingNetworks_validate.cpp diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocation/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocation/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md b/cpp/2_Concepts_and_Techniques/streamOrderedAllocation/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocation/README.md rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocation/README.md diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation.cu b/cpp/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation.cu rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocation/streamOrderedAllocation.cu diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/README.md diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationIPC/streamOrderedAllocationIPC.cu diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/README.md diff --git a/Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P.cu b/cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P.cu rename to cpp/2_Concepts_and_Techniques/streamOrderedAllocationP2P/streamOrderedAllocationP2P.cu diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/threadFenceReduction/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/threadFenceReduction/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md b/cpp/2_Concepts_and_Techniques/threadFenceReduction/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/README.md rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/README.md diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.cu b/cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.cu rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.cu diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.h b/cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.h similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.h rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction.h diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_kernel.cuh b/cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_kernel.cuh similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_kernel.cuh rename to cpp/2_Concepts_and_Techniques/threadFenceReduction/threadFenceReduction_kernel.cuh diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/.vscode/c_cpp_properties.json b/cpp/2_Concepts_and_Techniques/threadMigration/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/.vscode/c_cpp_properties.json rename to cpp/2_Concepts_and_Techniques/threadMigration/.vscode/c_cpp_properties.json diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/.vscode/extensions.json b/cpp/2_Concepts_and_Techniques/threadMigration/.vscode/extensions.json similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/.vscode/extensions.json rename to cpp/2_Concepts_and_Techniques/threadMigration/.vscode/extensions.json diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/cpp/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt rename to cpp/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/README.md b/cpp/2_Concepts_and_Techniques/threadMigration/README.md similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/README.md rename to cpp/2_Concepts_and_Techniques/threadMigration/README.md diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp b/cpp/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp rename to cpp/2_Concepts_and_Techniques/threadMigration/threadMigration.cpp diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_kernel.cu b/cpp/2_Concepts_and_Techniques/threadMigration/threadMigration_kernel.cu similarity index 100% rename from Samples/2_Concepts_and_Techniques/threadMigration/threadMigration_kernel.cu rename to cpp/2_Concepts_and_Techniques/threadMigration/threadMigration_kernel.cu diff --git a/Samples/3_CUDA_Features/CMakeLists.txt b/cpp/3_CUDA_Features/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/CMakeLists.txt rename to cpp/3_CUDA_Features/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/README.md b/cpp/3_CUDA_Features/README.md similarity index 100% rename from Samples/3_CUDA_Features/README.md rename to cpp/3_CUDA_Features/README.md diff --git a/Samples/3_CUDA_Features/StreamPriorities/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/StreamPriorities/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/StreamPriorities/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/StreamPriorities/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/StreamPriorities/.vscode/extensions.json b/cpp/3_CUDA_Features/StreamPriorities/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/StreamPriorities/.vscode/extensions.json rename to cpp/3_CUDA_Features/StreamPriorities/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt b/cpp/3_CUDA_Features/StreamPriorities/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/StreamPriorities/CMakeLists.txt rename to cpp/3_CUDA_Features/StreamPriorities/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/StreamPriorities/README.md b/cpp/3_CUDA_Features/StreamPriorities/README.md similarity index 100% rename from Samples/3_CUDA_Features/StreamPriorities/README.md rename to cpp/3_CUDA_Features/StreamPriorities/README.md diff --git a/Samples/3_CUDA_Features/StreamPriorities/StreamPriorities.cu b/cpp/3_CUDA_Features/StreamPriorities/StreamPriorities.cu similarity index 100% rename from Samples/3_CUDA_Features/StreamPriorities/StreamPriorities.cu rename to cpp/3_CUDA_Features/StreamPriorities/StreamPriorities.cu diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/bf16TensorCoreGemm/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/bf16TensorCoreGemm/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/extensions.json b/cpp/3_CUDA_Features/bf16TensorCoreGemm/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/bf16TensorCoreGemm/.vscode/extensions.json rename to cpp/3_CUDA_Features/bf16TensorCoreGemm/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt b/cpp/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt rename to cpp/3_CUDA_Features/bf16TensorCoreGemm/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md b/cpp/3_CUDA_Features/bf16TensorCoreGemm/README.md similarity index 100% rename from Samples/3_CUDA_Features/bf16TensorCoreGemm/README.md rename to cpp/3_CUDA_Features/bf16TensorCoreGemm/README.md diff --git a/Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu b/cpp/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu similarity index 100% rename from Samples/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu rename to cpp/3_CUDA_Features/bf16TensorCoreGemm/bf16TensorCoreGemm.cu diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/binaryPartitionCG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/binaryPartitionCG/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/binaryPartitionCG/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/.vscode/extensions.json b/cpp/3_CUDA_Features/binaryPartitionCG/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/binaryPartitionCG/.vscode/extensions.json rename to cpp/3_CUDA_Features/binaryPartitionCG/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt b/cpp/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt rename to cpp/3_CUDA_Features/binaryPartitionCG/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/README.md b/cpp/3_CUDA_Features/binaryPartitionCG/README.md similarity index 100% rename from Samples/3_CUDA_Features/binaryPartitionCG/README.md rename to cpp/3_CUDA_Features/binaryPartitionCG/README.md diff --git a/Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG.cu b/cpp/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG.cu similarity index 100% rename from Samples/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG.cu rename to cpp/3_CUDA_Features/binaryPartitionCG/binaryPartitionCG.cu diff --git a/Samples/3_CUDA_Features/bindlessTexture/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/bindlessTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/bindlessTexture/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/bindlessTexture/.vscode/extensions.json b/cpp/3_CUDA_Features/bindlessTexture/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/.vscode/extensions.json rename to cpp/3_CUDA_Features/bindlessTexture/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt b/cpp/3_CUDA_Features/bindlessTexture/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/CMakeLists.txt rename to cpp/3_CUDA_Features/bindlessTexture/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/bindlessTexture/README.md b/cpp/3_CUDA_Features/bindlessTexture/README.md similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/README.md rename to cpp/3_CUDA_Features/bindlessTexture/README.md diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture.cpp b/cpp/3_CUDA_Features/bindlessTexture/bindlessTexture.cpp similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/bindlessTexture.cpp rename to cpp/3_CUDA_Features/bindlessTexture/bindlessTexture.cpp diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture.h b/cpp/3_CUDA_Features/bindlessTexture/bindlessTexture.h similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/bindlessTexture.h rename to cpp/3_CUDA_Features/bindlessTexture/bindlessTexture.h diff --git a/Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_kernel.cu b/cpp/3_CUDA_Features/bindlessTexture/bindlessTexture_kernel.cu similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/bindlessTexture_kernel.cu rename to cpp/3_CUDA_Features/bindlessTexture/bindlessTexture_kernel.cu diff --git a/Samples/3_CUDA_Features/bindlessTexture/data/flower.ppm b/cpp/3_CUDA_Features/bindlessTexture/data/flower.ppm similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/data/flower.ppm rename to cpp/3_CUDA_Features/bindlessTexture/data/flower.ppm diff --git a/Samples/3_CUDA_Features/bindlessTexture/data/person.ppm b/cpp/3_CUDA_Features/bindlessTexture/data/person.ppm similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/data/person.ppm rename to cpp/3_CUDA_Features/bindlessTexture/data/person.ppm diff --git a/Samples/3_CUDA_Features/bindlessTexture/data/ref_bindlessTexture.bin b/cpp/3_CUDA_Features/bindlessTexture/data/ref_bindlessTexture.bin similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/data/ref_bindlessTexture.bin rename to cpp/3_CUDA_Features/bindlessTexture/data/ref_bindlessTexture.bin diff --git a/Samples/3_CUDA_Features/bindlessTexture/data/sponge.ppm b/cpp/3_CUDA_Features/bindlessTexture/data/sponge.ppm similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/data/sponge.ppm rename to cpp/3_CUDA_Features/bindlessTexture/data/sponge.ppm diff --git a/Samples/3_CUDA_Features/bindlessTexture/doc/sshot_lg.JPG b/cpp/3_CUDA_Features/bindlessTexture/doc/sshot_lg.JPG similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/doc/sshot_lg.JPG rename to cpp/3_CUDA_Features/bindlessTexture/doc/sshot_lg.JPG diff --git a/Samples/3_CUDA_Features/bindlessTexture/doc/sshot_md.JPG b/cpp/3_CUDA_Features/bindlessTexture/doc/sshot_md.JPG similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/doc/sshot_md.JPG rename to cpp/3_CUDA_Features/bindlessTexture/doc/sshot_md.JPG diff --git a/Samples/3_CUDA_Features/bindlessTexture/doc/sshot_sm.JPG b/cpp/3_CUDA_Features/bindlessTexture/doc/sshot_sm.JPG similarity index 100% rename from Samples/3_CUDA_Features/bindlessTexture/doc/sshot_sm.JPG rename to cpp/3_CUDA_Features/bindlessTexture/doc/sshot_sm.JPG diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/extensions.json b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/extensions.json rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/README.md similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/README.md rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/README.md diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort.cu b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort.cu rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpAdvancedQuicksort.cu diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpBitonicSort.cu b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpBitonicSort.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpBitonicSort.cu rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpBitonicSort.cu diff --git a/Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpQuicksort.h b/cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpQuicksort.h similarity index 100% rename from Samples/3_CUDA_Features/cdpAdvancedQuicksort/cdpQuicksort.h rename to cpp/3_CUDA_Features/cdpAdvancedQuicksort/cdpQuicksort.h diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cdpBezierTessellation/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cdpBezierTessellation/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/extensions.json b/cpp/3_CUDA_Features/cdpBezierTessellation/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cdpBezierTessellation/.vscode/extensions.json rename to cpp/3_CUDA_Features/cdpBezierTessellation/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/BezierLineCDP.cu b/cpp/3_CUDA_Features/cdpBezierTessellation/BezierLineCDP.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpBezierTessellation/BezierLineCDP.cu rename to cpp/3_CUDA_Features/cdpBezierTessellation/BezierLineCDP.cu diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt b/cpp/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt rename to cpp/3_CUDA_Features/cdpBezierTessellation/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cdpBezierTessellation/README.md b/cpp/3_CUDA_Features/cdpBezierTessellation/README.md similarity index 100% rename from Samples/3_CUDA_Features/cdpBezierTessellation/README.md rename to cpp/3_CUDA_Features/cdpBezierTessellation/README.md diff --git a/Samples/3_CUDA_Features/cdpQuadtree/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cdpQuadtree/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cdpQuadtree/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cdpQuadtree/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cdpQuadtree/.vscode/extensions.json b/cpp/3_CUDA_Features/cdpQuadtree/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cdpQuadtree/.vscode/extensions.json rename to cpp/3_CUDA_Features/cdpQuadtree/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt b/cpp/3_CUDA_Features/cdpQuadtree/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cdpQuadtree/CMakeLists.txt rename to cpp/3_CUDA_Features/cdpQuadtree/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cdpQuadtree/README.md b/cpp/3_CUDA_Features/cdpQuadtree/README.md similarity index 100% rename from Samples/3_CUDA_Features/cdpQuadtree/README.md rename to cpp/3_CUDA_Features/cdpQuadtree/README.md diff --git a/Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu b/cpp/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu rename to cpp/3_CUDA_Features/cdpQuadtree/cdpQuadtree.cu diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cdpSimplePrint/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cdpSimplePrint/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cdpSimplePrint/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/.vscode/extensions.json b/cpp/3_CUDA_Features/cdpSimplePrint/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cdpSimplePrint/.vscode/extensions.json rename to cpp/3_CUDA_Features/cdpSimplePrint/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt b/cpp/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt rename to cpp/3_CUDA_Features/cdpSimplePrint/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/README.md b/cpp/3_CUDA_Features/cdpSimplePrint/README.md similarity index 100% rename from Samples/3_CUDA_Features/cdpSimplePrint/README.md rename to cpp/3_CUDA_Features/cdpSimplePrint/README.md diff --git a/Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint.cu b/cpp/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint.cu rename to cpp/3_CUDA_Features/cdpSimplePrint/cdpSimplePrint.cu diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cdpSimpleQuicksort/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cdpSimpleQuicksort/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/extensions.json b/cpp/3_CUDA_Features/cdpSimpleQuicksort/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cdpSimpleQuicksort/.vscode/extensions.json rename to cpp/3_CUDA_Features/cdpSimpleQuicksort/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt b/cpp/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt rename to cpp/3_CUDA_Features/cdpSimpleQuicksort/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md b/cpp/3_CUDA_Features/cdpSimpleQuicksort/README.md similarity index 100% rename from Samples/3_CUDA_Features/cdpSimpleQuicksort/README.md rename to cpp/3_CUDA_Features/cdpSimpleQuicksort/README.md diff --git a/Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort.cu b/cpp/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort.cu similarity index 100% rename from Samples/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort.cu rename to cpp/3_CUDA_Features/cdpSimpleQuicksort/cdpSimpleQuicksort.cu diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cudaCompressibleMemory/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cudaCompressibleMemory/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/extensions.json b/cpp/3_CUDA_Features/cudaCompressibleMemory/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/.vscode/extensions.json rename to cpp/3_CUDA_Features/cudaCompressibleMemory/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt b/cpp/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt rename to cpp/3_CUDA_Features/cudaCompressibleMemory/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/README.md b/cpp/3_CUDA_Features/cudaCompressibleMemory/README.md similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/README.md rename to cpp/3_CUDA_Features/cudaCompressibleMemory/README.md diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/compMalloc.cpp b/cpp/3_CUDA_Features/cudaCompressibleMemory/compMalloc.cpp similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/compMalloc.cpp rename to cpp/3_CUDA_Features/cudaCompressibleMemory/compMalloc.cpp diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/compMalloc.h b/cpp/3_CUDA_Features/cudaCompressibleMemory/compMalloc.h similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/compMalloc.h rename to cpp/3_CUDA_Features/cudaCompressibleMemory/compMalloc.h diff --git a/Samples/3_CUDA_Features/cudaCompressibleMemory/saxpy.cu b/cpp/3_CUDA_Features/cudaCompressibleMemory/saxpy.cu similarity index 100% rename from Samples/3_CUDA_Features/cudaCompressibleMemory/saxpy.cu rename to cpp/3_CUDA_Features/cudaCompressibleMemory/saxpy.cu diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/cudaTensorCoreGemm/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/cudaTensorCoreGemm/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/extensions.json b/cpp/3_CUDA_Features/cudaTensorCoreGemm/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/cudaTensorCoreGemm/.vscode/extensions.json rename to cpp/3_CUDA_Features/cudaTensorCoreGemm/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt b/cpp/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt rename to cpp/3_CUDA_Features/cudaTensorCoreGemm/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md b/cpp/3_CUDA_Features/cudaTensorCoreGemm/README.md similarity index 100% rename from Samples/3_CUDA_Features/cudaTensorCoreGemm/README.md rename to cpp/3_CUDA_Features/cudaTensorCoreGemm/README.md diff --git a/Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu b/cpp/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu similarity index 100% rename from Samples/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu rename to cpp/3_CUDA_Features/cudaTensorCoreGemm/cudaTensorCoreGemm.cu diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/extensions.json b/cpp/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/extensions.json rename to cpp/3_CUDA_Features/dmmaTensorCoreGemm/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt b/cpp/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt rename to cpp/3_CUDA_Features/dmmaTensorCoreGemm/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md b/cpp/3_CUDA_Features/dmmaTensorCoreGemm/README.md similarity index 100% rename from Samples/3_CUDA_Features/dmmaTensorCoreGemm/README.md rename to cpp/3_CUDA_Features/dmmaTensorCoreGemm/README.md diff --git a/Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu b/cpp/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu similarity index 100% rename from Samples/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu rename to cpp/3_CUDA_Features/dmmaTensorCoreGemm/dmmaTensorCoreGemm.cu diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/extensions.json b/cpp/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/extensions.json rename to cpp/3_CUDA_Features/globalToShmemAsyncCopy/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt b/cpp/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt rename to cpp/3_CUDA_Features/globalToShmemAsyncCopy/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md b/cpp/3_CUDA_Features/globalToShmemAsyncCopy/README.md similarity index 100% rename from Samples/3_CUDA_Features/globalToShmemAsyncCopy/README.md rename to cpp/3_CUDA_Features/globalToShmemAsyncCopy/README.md diff --git a/Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu b/cpp/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu similarity index 100% rename from Samples/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu rename to cpp/3_CUDA_Features/globalToShmemAsyncCopy/globalToShmemAsyncCopy.cu diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json b/cpp/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json rename to cpp/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt b/cpp/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt rename to cpp/3_CUDA_Features/graphConditionalNodes/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/README.md b/cpp/3_CUDA_Features/graphConditionalNodes/README.md similarity index 100% rename from Samples/3_CUDA_Features/graphConditionalNodes/README.md rename to cpp/3_CUDA_Features/graphConditionalNodes/README.md diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu b/cpp/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu similarity index 100% rename from Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu rename to cpp/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/graphMemoryFootprint/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/graphMemoryFootprint/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/extensions.json b/cpp/3_CUDA_Features/graphMemoryFootprint/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryFootprint/.vscode/extensions.json rename to cpp/3_CUDA_Features/graphMemoryFootprint/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt b/cpp/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt rename to cpp/3_CUDA_Features/graphMemoryFootprint/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/README.md b/cpp/3_CUDA_Features/graphMemoryFootprint/README.md similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryFootprint/README.md rename to cpp/3_CUDA_Features/graphMemoryFootprint/README.md diff --git a/Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu b/cpp/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu rename to cpp/3_CUDA_Features/graphMemoryFootprint/graphMemoryFootprint.cu diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/graphMemoryNodes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryNodes/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/graphMemoryNodes/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/.vscode/extensions.json b/cpp/3_CUDA_Features/graphMemoryNodes/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryNodes/.vscode/extensions.json rename to cpp/3_CUDA_Features/graphMemoryNodes/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt b/cpp/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt rename to cpp/3_CUDA_Features/graphMemoryNodes/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/README.md b/cpp/3_CUDA_Features/graphMemoryNodes/README.md similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryNodes/README.md rename to cpp/3_CUDA_Features/graphMemoryNodes/README.md diff --git a/Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes.cu b/cpp/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes.cu similarity index 100% rename from Samples/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes.cu rename to cpp/3_CUDA_Features/graphMemoryNodes/graphMemoryNodes.cu diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/immaTensorCoreGemm/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/immaTensorCoreGemm/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/extensions.json b/cpp/3_CUDA_Features/immaTensorCoreGemm/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/immaTensorCoreGemm/.vscode/extensions.json rename to cpp/3_CUDA_Features/immaTensorCoreGemm/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt b/cpp/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt rename to cpp/3_CUDA_Features/immaTensorCoreGemm/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/README.md b/cpp/3_CUDA_Features/immaTensorCoreGemm/README.md similarity index 100% rename from Samples/3_CUDA_Features/immaTensorCoreGemm/README.md rename to cpp/3_CUDA_Features/immaTensorCoreGemm/README.md diff --git a/Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm.cu b/cpp/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm.cu similarity index 100% rename from Samples/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm.cu rename to cpp/3_CUDA_Features/immaTensorCoreGemm/immaTensorCoreGemm.cu diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/jacobiCudaGraphs/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/jacobiCudaGraphs/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/extensions.json b/cpp/3_CUDA_Features/jacobiCudaGraphs/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/.vscode/extensions.json rename to cpp/3_CUDA_Features/jacobiCudaGraphs/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt b/cpp/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt rename to cpp/3_CUDA_Features/jacobiCudaGraphs/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/README.md b/cpp/3_CUDA_Features/jacobiCudaGraphs/README.md similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/README.md rename to cpp/3_CUDA_Features/jacobiCudaGraphs/README.md diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobi.cu b/cpp/3_CUDA_Features/jacobiCudaGraphs/jacobi.cu similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/jacobi.cu rename to cpp/3_CUDA_Features/jacobiCudaGraphs/jacobi.cu diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/jacobi.h b/cpp/3_CUDA_Features/jacobiCudaGraphs/jacobi.h similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/jacobi.h rename to cpp/3_CUDA_Features/jacobiCudaGraphs/jacobi.h diff --git a/Samples/3_CUDA_Features/jacobiCudaGraphs/main.cpp b/cpp/3_CUDA_Features/jacobiCudaGraphs/main.cpp similarity index 100% rename from Samples/3_CUDA_Features/jacobiCudaGraphs/main.cpp rename to cpp/3_CUDA_Features/jacobiCudaGraphs/main.cpp diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/memMapIPCDrv/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/memMapIPCDrv/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/.vscode/extensions.json b/cpp/3_CUDA_Features/memMapIPCDrv/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/.vscode/extensions.json rename to cpp/3_CUDA_Features/memMapIPCDrv/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/cpp/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt rename to cpp/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/README.md b/cpp/3_CUDA_Features/memMapIPCDrv/README.md similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/README.md rename to cpp/3_CUDA_Features/memMapIPCDrv/README.md diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp b/cpp/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp rename to cpp/3_CUDA_Features/memMapIPCDrv/memMapIpc.cpp diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc_kernel.cu b/cpp/3_CUDA_Features/memMapIPCDrv/memMapIpc_kernel.cu similarity index 100% rename from Samples/3_CUDA_Features/memMapIPCDrv/memMapIpc_kernel.cu rename to cpp/3_CUDA_Features/memMapIPCDrv/memMapIpc_kernel.cu diff --git a/Samples/3_CUDA_Features/newdelete/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/newdelete/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/newdelete/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/newdelete/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/newdelete/.vscode/extensions.json b/cpp/3_CUDA_Features/newdelete/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/newdelete/.vscode/extensions.json rename to cpp/3_CUDA_Features/newdelete/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/newdelete/CMakeLists.txt b/cpp/3_CUDA_Features/newdelete/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/newdelete/CMakeLists.txt rename to cpp/3_CUDA_Features/newdelete/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/newdelete/README.md b/cpp/3_CUDA_Features/newdelete/README.md similarity index 100% rename from Samples/3_CUDA_Features/newdelete/README.md rename to cpp/3_CUDA_Features/newdelete/README.md diff --git a/Samples/3_CUDA_Features/newdelete/container.hpp b/cpp/3_CUDA_Features/newdelete/container.hpp similarity index 100% rename from Samples/3_CUDA_Features/newdelete/container.hpp rename to cpp/3_CUDA_Features/newdelete/container.hpp diff --git a/Samples/3_CUDA_Features/newdelete/newdelete.cu b/cpp/3_CUDA_Features/newdelete/newdelete.cu similarity index 100% rename from Samples/3_CUDA_Features/newdelete/newdelete.cu rename to cpp/3_CUDA_Features/newdelete/newdelete.cu diff --git a/Samples/3_CUDA_Features/ptxjit/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/ptxjit/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/ptxjit/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/ptxjit/.vscode/extensions.json b/cpp/3_CUDA_Features/ptxjit/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/.vscode/extensions.json rename to cpp/3_CUDA_Features/ptxjit/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/cpp/3_CUDA_Features/ptxjit/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/CMakeLists.txt rename to cpp/3_CUDA_Features/ptxjit/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/ptxjit/README.md b/cpp/3_CUDA_Features/ptxjit/README.md similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/README.md rename to cpp/3_CUDA_Features/ptxjit/README.md diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit.cpp b/cpp/3_CUDA_Features/ptxjit/ptxjit.cpp similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/ptxjit.cpp rename to cpp/3_CUDA_Features/ptxjit/ptxjit.cpp diff --git a/Samples/3_CUDA_Features/ptxjit/ptxjit_kernel.cu b/cpp/3_CUDA_Features/ptxjit/ptxjit_kernel.cu similarity index 100% rename from Samples/3_CUDA_Features/ptxjit/ptxjit_kernel.cu rename to cpp/3_CUDA_Features/ptxjit/ptxjit_kernel.cu diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/simpleCudaGraphs/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/simpleCudaGraphs/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/extensions.json b/cpp/3_CUDA_Features/simpleCudaGraphs/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/simpleCudaGraphs/.vscode/extensions.json rename to cpp/3_CUDA_Features/simpleCudaGraphs/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt b/cpp/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt rename to cpp/3_CUDA_Features/simpleCudaGraphs/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/README.md b/cpp/3_CUDA_Features/simpleCudaGraphs/README.md similarity index 100% rename from Samples/3_CUDA_Features/simpleCudaGraphs/README.md rename to cpp/3_CUDA_Features/simpleCudaGraphs/README.md diff --git a/Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs.cu b/cpp/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs.cu similarity index 100% rename from Samples/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs.cu rename to cpp/3_CUDA_Features/simpleCudaGraphs/simpleCudaGraphs.cu diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/tf32TensorCoreGemm/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/tf32TensorCoreGemm/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/extensions.json b/cpp/3_CUDA_Features/tf32TensorCoreGemm/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/tf32TensorCoreGemm/.vscode/extensions.json rename to cpp/3_CUDA_Features/tf32TensorCoreGemm/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt b/cpp/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt rename to cpp/3_CUDA_Features/tf32TensorCoreGemm/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md b/cpp/3_CUDA_Features/tf32TensorCoreGemm/README.md similarity index 100% rename from Samples/3_CUDA_Features/tf32TensorCoreGemm/README.md rename to cpp/3_CUDA_Features/tf32TensorCoreGemm/README.md diff --git a/Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm.cu b/cpp/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm.cu similarity index 100% rename from Samples/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm.cu rename to cpp/3_CUDA_Features/tf32TensorCoreGemm/tf32TensorCoreGemm.cu diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/c_cpp_properties.json b/cpp/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/c_cpp_properties.json rename to cpp/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/c_cpp_properties.json diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/extensions.json b/cpp/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/extensions.json similarity index 100% rename from Samples/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/extensions.json rename to cpp/3_CUDA_Features/warpAggregatedAtomicsCG/.vscode/extensions.json diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt b/cpp/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt similarity index 100% rename from Samples/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt rename to cpp/3_CUDA_Features/warpAggregatedAtomicsCG/CMakeLists.txt diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md b/cpp/3_CUDA_Features/warpAggregatedAtomicsCG/README.md similarity index 100% rename from Samples/3_CUDA_Features/warpAggregatedAtomicsCG/README.md rename to cpp/3_CUDA_Features/warpAggregatedAtomicsCG/README.md diff --git a/Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG.cu b/cpp/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG.cu similarity index 100% rename from Samples/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG.cu rename to cpp/3_CUDA_Features/warpAggregatedAtomicsCG/warpAggregatedAtomicsCG.cu diff --git a/Samples/4_CUDA_Libraries/CMakeLists.txt b/cpp/4_CUDA_Libraries/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/CMakeLists.txt rename to cpp/4_CUDA_Libraries/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/README.md rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/README.md diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm b/cpp/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm similarity index 100% rename from Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm rename to cpp/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/extensions.json b/cpp/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/MersenneTwisterGP11213/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt b/cpp/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt rename to cpp/4_CUDA_Libraries/MersenneTwisterGP11213/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwister.cpp b/cpp/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwister.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwister.cpp rename to cpp/4_CUDA_Libraries/MersenneTwisterGP11213/MersenneTwister.cpp diff --git a/Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md b/cpp/4_CUDA_Libraries/MersenneTwisterGP11213/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/MersenneTwisterGP11213/README.md rename to cpp/4_CUDA_Libraries/MersenneTwisterGP11213/README.md diff --git a/Samples/4_CUDA_Libraries/README.md b/cpp/4_CUDA_Libraries/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/README.md rename to cpp/4_CUDA_Libraries/README.md diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/batchCUBLAS/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/batchCUBLAS/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/extensions.json b/cpp/4_CUDA_Libraries/batchCUBLAS/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/batchCUBLAS/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt b/cpp/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt rename to cpp/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/README.md b/cpp/4_CUDA_Libraries/batchCUBLAS/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/README.md rename to cpp/4_CUDA_Libraries/batchCUBLAS/README.md diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.cpp b/cpp/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.cpp rename to cpp/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.cpp diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.h b/cpp/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.h similarity index 100% rename from Samples/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.h rename to cpp/4_CUDA_Libraries/batchCUBLAS/batchCUBLAS.h diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/boxFilterNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/boxFilterNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/boxFilterNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/boxFilterNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/README.md b/cpp/4_CUDA_Libraries/boxFilterNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/README.md rename to cpp/4_CUDA_Libraries/boxFilterNPP/README.md diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp b/cpp/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp rename to cpp/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/teapot512.pgm b/cpp/4_CUDA_Libraries/boxFilterNPP/teapot512.pgm similarity index 100% rename from Samples/4_CUDA_Libraries/boxFilterNPP/teapot512.pgm rename to cpp/4_CUDA_Libraries/boxFilterNPP/teapot512.pgm diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/README.md diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512.pgm b/cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512.pgm similarity index 100% rename from Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512.pgm rename to cpp/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512.pgm diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradient/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradient/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradient/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradient/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradient/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradient/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradient/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/README.md b/cpp/4_CUDA_Libraries/conjugateGradient/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradient/README.md rename to cpp/4_CUDA_Libraries/conjugateGradient/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradient/main.cpp b/cpp/4_CUDA_Libraries/conjugateGradient/main.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradient/main.cpp rename to cpp/4_CUDA_Libraries/conjugateGradient/main.cpp diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md b/cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md rename to cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs.cu b/cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs.cu similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs.cu rename to cpp/4_CUDA_Libraries/conjugateGradientCudaGraphs/conjugateGradientCudaGraphs.cu diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md b/cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md rename to cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG.cu b/cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG.cu similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG.cu rename to cpp/4_CUDA_Libraries/conjugateGradientMultiBlockCG/conjugateGradientMultiBlockCG.cu diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md rename to cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu similarity index 98% rename from Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu rename to cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu index 6344d5e7..621536bc 100644 --- a/Samples/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu +++ b/cpp/4_CUDA_Libraries/conjugateGradientMultiDeviceCG/conjugateGradientMultiDeviceCG.cu @@ -746,12 +746,8 @@ int main(int argc, char **argv) deviceId++; } - // Use cudaMemLocationTypeHost for optimal host memory location - cudaMemLocation hostLoc; - hostLoc.type = cudaMemLocationTypeHost; - checkCudaErrors(cudaMemPrefetchAsync(x, sizeof(float) * N, hostLoc, 0)); - checkCudaErrors(cudaMemPrefetchAsync(dot_result, sizeof(double), hostLoc, 0)); - + // Synchronize all kernel streams across all devices before prefetching to host. + // This ensures GPU writes are fully visible before page migration begins. deviceId = bestFitDeviceIds.begin(); device_count = 0; while (deviceId != bestFitDeviceIds.end()) { @@ -760,6 +756,14 @@ int main(int argc, char **argv) deviceId++; } + cudaMemLocation hostLoc; + hostLoc.type = cudaMemLocationTypeHost; + checkCudaErrors(cudaMemPrefetchAsync(x, sizeof(float) * N, hostLoc, 0)); + checkCudaErrors(cudaMemPrefetchAsync(dot_result, sizeof(double), hostLoc, 0)); + // Ensure the async prefetch of x/dot_result on the null stream completes before + // CPU access. cudaStreamSynchronize only covers the kernel streams, not the null stream. + checkCudaErrors(cudaDeviceSynchronize()); + r1 = (float)*dot_result; printf("GPU Final, residual = %e \n ", sqrt(r1)); diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradientPrecond/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradientPrecond/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md b/cpp/4_CUDA_Libraries/conjugateGradientPrecond/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientPrecond/README.md rename to cpp/4_CUDA_Libraries/conjugateGradientPrecond/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp b/cpp/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp rename to cpp/4_CUDA_Libraries/conjugateGradientPrecond/main.cpp diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/conjugateGradientUM/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/conjugateGradientUM/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/extensions.json b/cpp/4_CUDA_Libraries/conjugateGradientUM/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientUM/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/conjugateGradientUM/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt b/cpp/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt rename to cpp/4_CUDA_Libraries/conjugateGradientUM/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/README.md b/cpp/4_CUDA_Libraries/conjugateGradientUM/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientUM/README.md rename to cpp/4_CUDA_Libraries/conjugateGradientUM/README.md diff --git a/Samples/4_CUDA_Libraries/conjugateGradientUM/main.cpp b/cpp/4_CUDA_Libraries/conjugateGradientUM/main.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/conjugateGradientUM/main.cpp rename to cpp/4_CUDA_Libraries/conjugateGradientUM/main.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/README.md diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver.cpp b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver.cpp rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/cuSolverDn_LinearSolver.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/gr_900_900_crg.mtx b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/gr_900_900_crg.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/gr_900_900_crg.mtx rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/gr_900_900_crg.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/lap3D_7pt_n20.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.c b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.c similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.c rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.c diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.h b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.h similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.h rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio.h diff --git a/Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio_wrapper.cpp b/cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio_wrapper.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio_wrapper.cpp rename to cpp/4_CUDA_Libraries/cuSolverDn_LinearSolver/mmio_wrapper.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cuSolverRf/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cuSolverRf/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cuSolverRf/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cuSolverRf/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt b/cpp/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cuSolverRf/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/README.md b/cpp/4_CUDA_Libraries/cuSolverRf/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/README.md rename to cpp/4_CUDA_Libraries/cuSolverRf/README.md diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp b/cpp/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp rename to cpp/4_CUDA_Libraries/cuSolverRf/cuSolverRf.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/lap2D_5pt_n100.mtx b/cpp/4_CUDA_Libraries/cuSolverRf/lap2D_5pt_n100.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/lap2D_5pt_n100.mtx rename to cpp/4_CUDA_Libraries/cuSolverRf/lap2D_5pt_n100.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/lap3D_7pt_n20.mtx b/cpp/4_CUDA_Libraries/cuSolverRf/lap3D_7pt_n20.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/lap3D_7pt_n20.mtx rename to cpp/4_CUDA_Libraries/cuSolverRf/lap3D_7pt_n20.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/mmio.c b/cpp/4_CUDA_Libraries/cuSolverRf/mmio.c similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/mmio.c rename to cpp/4_CUDA_Libraries/cuSolverRf/mmio.c diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/mmio.h b/cpp/4_CUDA_Libraries/cuSolverRf/mmio.h similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/mmio.h rename to cpp/4_CUDA_Libraries/cuSolverRf/mmio.h diff --git a/Samples/4_CUDA_Libraries/cuSolverRf/mmio_wrapper.cpp b/cpp/4_CUDA_Libraries/cuSolverRf/mmio_wrapper.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverRf/mmio_wrapper.cpp rename to cpp/4_CUDA_Libraries/cuSolverRf/mmio_wrapper.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/README.md diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.c b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.c similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.c rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.c diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.h b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.h similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.h rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio.h diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio_wrapper.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio_wrapper.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio_wrapper.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LinearSolver/mmio_wrapper.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/README.md diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/cuSolverSp_LowlevelCholesky.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap2D_5pt_n100.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap2D_5pt_n100.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap2D_5pt_n100.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap2D_5pt_n100.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap3D_7pt_n20.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap3D_7pt_n20.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap3D_7pt_n20.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/lap3D_7pt_n20.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.c b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.c similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.c rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.c diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.h b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.h similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.h rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio.h diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio_wrapper.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio_wrapper.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio_wrapper.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelCholesky/mmio_wrapper.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/README.md diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/cuSolverSp_LowlevelQR.cpp diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n100.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap2D_5pt_n32.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/lap3D_7pt_n20.mtx diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.c b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.c similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.c rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.c diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.h b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.h similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.h rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio.h diff --git a/Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio_wrapper.cpp b/cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio_wrapper.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio_wrapper.cpp rename to cpp/4_CUDA_Libraries/cuSolverSp_LowlevelQR/mmio_wrapper.cpp diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/cudaNvSci/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/cudaNvSci/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/.vscode/extensions.json b/cpp/4_CUDA_Libraries/cudaNvSci/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/cudaNvSci/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt b/cpp/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt rename to cpp/4_CUDA_Libraries/cudaNvSci/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/README.md b/cpp/4_CUDA_Libraries/cudaNvSci/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/README.md rename to cpp/4_CUDA_Libraries/cudaNvSci/README.md diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp b/cpp/4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp rename to cpp/4_CUDA_Libraries/cudaNvSci/cudaNvSci.cpp diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/cudaNvSci.h b/cpp/4_CUDA_Libraries/cudaNvSci/cudaNvSci.h similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/cudaNvSci.h rename to cpp/4_CUDA_Libraries/cudaNvSci/cudaNvSci.h diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/imageKernels.cu b/cpp/4_CUDA_Libraries/cudaNvSci/imageKernels.cu similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/imageKernels.cu rename to cpp/4_CUDA_Libraries/cudaNvSci/imageKernels.cu diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/main.cpp b/cpp/4_CUDA_Libraries/cudaNvSci/main.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/main.cpp rename to cpp/4_CUDA_Libraries/cudaNvSci/main.cpp diff --git a/Samples/4_CUDA_Libraries/cudaNvSci/teapot1024.ppm b/cpp/4_CUDA_Libraries/cudaNvSci/teapot1024.ppm similarity index 100% rename from Samples/4_CUDA_Libraries/cudaNvSci/teapot1024.ppm rename to cpp/4_CUDA_Libraries/cudaNvSci/teapot1024.ppm diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/freeImageInteropNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/freeImageInteropNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/freeImageInteropNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/freeImageInteropNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/freeImageInteropNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/freeImageInteropNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md b/cpp/4_CUDA_Libraries/freeImageInteropNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/freeImageInteropNPP/README.md rename to cpp/4_CUDA_Libraries/freeImageInteropNPP/README.md diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp b/cpp/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp rename to cpp/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/histEqualizationNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/histEqualizationNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/histEqualizationNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/histEqualizationNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/histEqualizationNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/histEqualizationNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/README.md b/cpp/4_CUDA_Libraries/histEqualizationNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/histEqualizationNPP/README.md rename to cpp/4_CUDA_Libraries/histEqualizationNPP/README.md diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp b/cpp/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp rename to cpp/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp diff --git a/Samples/4_CUDA_Libraries/jitLto/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/jitLto/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/jitLto/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/jitLto/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/jitLto/.vscode/extensions.json b/cpp/4_CUDA_Libraries/jitLto/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/jitLto/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/jitLto/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt b/cpp/4_CUDA_Libraries/jitLto/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/jitLto/CMakeLists.txt rename to cpp/4_CUDA_Libraries/jitLto/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/jitLto/README.md b/cpp/4_CUDA_Libraries/jitLto/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/jitLto/README.md rename to cpp/4_CUDA_Libraries/jitLto/README.md diff --git a/Samples/4_CUDA_Libraries/jitLto/jitLto.cpp b/cpp/4_CUDA_Libraries/jitLto/jitLto.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/jitLto/jitLto.cpp rename to cpp/4_CUDA_Libraries/jitLto/jitLto.cpp diff --git a/Samples/4_CUDA_Libraries/lineOfSight/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/lineOfSight/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/lineOfSight/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/lineOfSight/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/lineOfSight/.vscode/extensions.json b/cpp/4_CUDA_Libraries/lineOfSight/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/lineOfSight/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/lineOfSight/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt b/cpp/4_CUDA_Libraries/lineOfSight/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/lineOfSight/CMakeLists.txt rename to cpp/4_CUDA_Libraries/lineOfSight/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/lineOfSight/README.md b/cpp/4_CUDA_Libraries/lineOfSight/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/lineOfSight/README.md rename to cpp/4_CUDA_Libraries/lineOfSight/README.md diff --git a/Samples/4_CUDA_Libraries/lineOfSight/lineOfSight.cu b/cpp/4_CUDA_Libraries/lineOfSight/lineOfSight.cu similarity index 100% rename from Samples/4_CUDA_Libraries/lineOfSight/lineOfSight.cu rename to cpp/4_CUDA_Libraries/lineOfSight/lineOfSight.cu diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/extensions.json b/cpp/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/matrixMulCUBLAS/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt b/cpp/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt rename to cpp/4_CUDA_Libraries/matrixMulCUBLAS/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md b/cpp/4_CUDA_Libraries/matrixMulCUBLAS/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/matrixMulCUBLAS/README.md rename to cpp/4_CUDA_Libraries/matrixMulCUBLAS/README.md diff --git a/Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS.cpp b/cpp/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS.cpp rename to cpp/4_CUDA_Libraries/matrixMulCUBLAS/matrixMulCUBLAS.cpp diff --git a/Samples/4_CUDA_Libraries/nvJPEG/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/nvJPEG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/nvJPEG/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/nvJPEG/.vscode/extensions.json b/cpp/4_CUDA_Libraries/nvJPEG/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/nvJPEG/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt b/cpp/4_CUDA_Libraries/nvJPEG/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/CMakeLists.txt rename to cpp/4_CUDA_Libraries/nvJPEG/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/nvJPEG/README.md b/cpp/4_CUDA_Libraries/nvJPEG/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/README.md rename to cpp/4_CUDA_Libraries/nvJPEG/README.md diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img1.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img1.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img1.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img1.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img2.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img2.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img2.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img2.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img3.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img3.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img3.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img3.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img4.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img4.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img4.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img4.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img5.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img5.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img5.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img5.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img6.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img6.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img6.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img6.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img7.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img7.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img7.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img7.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/images/img8.jpg b/cpp/4_CUDA_Libraries/nvJPEG/images/img8.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/images/img8.jpg rename to cpp/4_CUDA_Libraries/nvJPEG/images/img8.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG/nvJPEG.cpp b/cpp/4_CUDA_Libraries/nvJPEG/nvJPEG.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG/nvJPEG.cpp rename to cpp/4_CUDA_Libraries/nvJPEG/nvJPEG.cpp diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/nvJPEG_encoder/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/extensions.json b/cpp/4_CUDA_Libraries/nvJPEG_encoder/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt b/cpp/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md b/cpp/4_CUDA_Libraries/nvJPEG_encoder/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/README.md rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/README.md diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img1.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img1.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img1.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img1.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img2.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img2.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img2.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img2.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img3.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img3.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img3.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img3.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img4.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img4.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img4.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img4.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img5.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img5.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img5.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img5.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img6.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img6.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img6.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img6.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img7.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img7.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img7.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img7.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img8.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img8.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img8.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/encode_output/img8.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img1.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img1.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img1.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img1.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img2.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img2.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img2.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img2.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img3.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img3.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img3.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img3.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img4.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img4.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img4.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img4.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img5.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img5.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img5.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img5.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img6.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img6.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img6.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img6.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img7.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img7.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img7.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img7.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img8.jpg b/cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img8.jpg similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/images/img8.jpg rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/images/img8.jpg diff --git a/Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder.cpp b/cpp/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder.cpp rename to cpp/4_CUDA_Libraries/nvJPEG_encoder/nvJPEG_encoder.cpp diff --git a/Samples/4_CUDA_Libraries/oceanFFT/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/oceanFFT/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/oceanFFT/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/oceanFFT/.vscode/extensions.json b/cpp/4_CUDA_Libraries/oceanFFT/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/oceanFFT/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt b/cpp/4_CUDA_Libraries/oceanFFT/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/CMakeLists.txt rename to cpp/4_CUDA_Libraries/oceanFFT/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/oceanFFT/README.md b/cpp/4_CUDA_Libraries/oceanFFT/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/README.md rename to cpp/4_CUDA_Libraries/oceanFFT/README.md diff --git a/Samples/4_CUDA_Libraries/oceanFFT/data/ocean.frag b/cpp/4_CUDA_Libraries/oceanFFT/data/ocean.frag similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/data/ocean.frag rename to cpp/4_CUDA_Libraries/oceanFFT/data/ocean.frag diff --git a/Samples/4_CUDA_Libraries/oceanFFT/data/ocean.vert b/cpp/4_CUDA_Libraries/oceanFFT/data/ocean.vert similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/data/ocean.vert rename to cpp/4_CUDA_Libraries/oceanFFT/data/ocean.vert diff --git a/Samples/4_CUDA_Libraries/oceanFFT/data/ref_slopeShading.bin b/cpp/4_CUDA_Libraries/oceanFFT/data/ref_slopeShading.bin similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/data/ref_slopeShading.bin rename to cpp/4_CUDA_Libraries/oceanFFT/data/ref_slopeShading.bin diff --git a/Samples/4_CUDA_Libraries/oceanFFT/data/ref_spatialDomain.bin b/cpp/4_CUDA_Libraries/oceanFFT/data/ref_spatialDomain.bin similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/data/ref_spatialDomain.bin rename to cpp/4_CUDA_Libraries/oceanFFT/data/ref_spatialDomain.bin diff --git a/Samples/4_CUDA_Libraries/oceanFFT/data/reference.ppm b/cpp/4_CUDA_Libraries/oceanFFT/data/reference.ppm similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/data/reference.ppm rename to cpp/4_CUDA_Libraries/oceanFFT/data/reference.ppm diff --git a/Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_lg.png b/cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_lg.png similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_lg.png rename to cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_lg.png diff --git a/Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_md.png b/cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_md.png similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_md.png rename to cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_md.png diff --git a/Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_sm.png b/cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_sm.png similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/doc/sshot_sm.png rename to cpp/4_CUDA_Libraries/oceanFFT/doc/sshot_sm.png diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT.cpp b/cpp/4_CUDA_Libraries/oceanFFT/oceanFFT.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/oceanFFT.cpp rename to cpp/4_CUDA_Libraries/oceanFFT/oceanFFT.cpp diff --git a/Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_kernel.cu b/cpp/4_CUDA_Libraries/oceanFFT/oceanFFT_kernel.cu similarity index 100% rename from Samples/4_CUDA_Libraries/oceanFFT/oceanFFT_kernel.cu rename to cpp/4_CUDA_Libraries/oceanFFT/oceanFFT_kernel.cu diff --git a/Samples/4_CUDA_Libraries/randomFog/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/randomFog/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/randomFog/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/randomFog/.vscode/extensions.json b/cpp/4_CUDA_Libraries/randomFog/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/randomFog/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt b/cpp/4_CUDA_Libraries/randomFog/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/CMakeLists.txt rename to cpp/4_CUDA_Libraries/randomFog/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/randomFog/README.md b/cpp/4_CUDA_Libraries/randomFog/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/README.md rename to cpp/4_CUDA_Libraries/randomFog/README.md diff --git a/Samples/4_CUDA_Libraries/randomFog/data/ref_randomFog.bin b/cpp/4_CUDA_Libraries/randomFog/data/ref_randomFog.bin similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/data/ref_randomFog.bin rename to cpp/4_CUDA_Libraries/randomFog/data/ref_randomFog.bin diff --git a/Samples/4_CUDA_Libraries/randomFog/randomFog.cpp b/cpp/4_CUDA_Libraries/randomFog/randomFog.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/randomFog.cpp rename to cpp/4_CUDA_Libraries/randomFog/randomFog.cpp diff --git a/Samples/4_CUDA_Libraries/randomFog/rng.cpp b/cpp/4_CUDA_Libraries/randomFog/rng.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/rng.cpp rename to cpp/4_CUDA_Libraries/randomFog/rng.cpp diff --git a/Samples/4_CUDA_Libraries/randomFog/rng.h b/cpp/4_CUDA_Libraries/randomFog/rng.h similarity index 100% rename from Samples/4_CUDA_Libraries/randomFog/rng.h rename to cpp/4_CUDA_Libraries/randomFog/rng.h diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUBLAS/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUBLAS/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUBLAS/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUBLAS/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUBLAS/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/README.md b/cpp/4_CUDA_Libraries/simpleCUBLAS/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS/README.md rename to cpp/4_CUDA_Libraries/simpleCUBLAS/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS.cpp b/cpp/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS.cpp rename to cpp/4_CUDA_Libraries/simpleCUBLAS/simpleCUBLAS.cpp diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUBLASXT/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUBLASXT/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUBLASXT/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLASXT/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUBLASXT/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUBLASXT/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md b/cpp/4_CUDA_Libraries/simpleCUBLASXT/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLASXT/README.md rename to cpp/4_CUDA_Libraries/simpleCUBLASXT/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp b/cpp/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp rename to cpp/4_CUDA_Libraries/simpleCUBLASXT/simpleCUBLASXT.cpp diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUBLAS_LU/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUBLAS_LU/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md b/cpp/4_CUDA_Libraries/simpleCUBLAS_LU/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS_LU/README.md rename to cpp/4_CUDA_Libraries/simpleCUBLAS_LU/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU.cpp b/cpp/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU.cpp rename to cpp/4_CUDA_Libraries/simpleCUBLAS_LU/simpleCUBLAS_LU.cpp diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUFFT/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUFFT/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUFFT/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUFFT/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUFFT/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/README.md b/cpp/4_CUDA_Libraries/simpleCUFFT/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT/README.md rename to cpp/4_CUDA_Libraries/simpleCUFFT/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT.cu b/cpp/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT.cu similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT.cu rename to cpp/4_CUDA_Libraries/simpleCUFFT/simpleCUFFT.cu diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md b/cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md rename to cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU.cu b/cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU.cu similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU.cu rename to cpp/4_CUDA_Libraries/simpleCUFFT_2d_MGPU/simpleCUFFT_2d_MGPU.cu diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md b/cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md rename to cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu b/cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu rename to cpp/4_CUDA_Libraries/simpleCUFFT_MGPU/simpleCUFFT_MGPU.cu diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/extensions.json b/cpp/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/simpleCUFFT_callback/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt b/cpp/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt rename to cpp/4_CUDA_Libraries/simpleCUFFT_callback/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md b/cpp/4_CUDA_Libraries/simpleCUFFT_callback/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_callback/README.md rename to cpp/4_CUDA_Libraries/simpleCUFFT_callback/README.md diff --git a/Samples/4_CUDA_Libraries/simpleCUFFT_callback/simpleCUFFT_callback.cu b/cpp/4_CUDA_Libraries/simpleCUFFT_callback/simpleCUFFT_callback.cu similarity index 100% rename from Samples/4_CUDA_Libraries/simpleCUFFT_callback/simpleCUFFT_callback.cu rename to cpp/4_CUDA_Libraries/simpleCUFFT_callback/simpleCUFFT_callback.cu diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/c_cpp_properties.json b/cpp/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/c_cpp_properties.json rename to cpp/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/c_cpp_properties.json diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/extensions.json b/cpp/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/extensions.json similarity index 100% rename from Samples/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/extensions.json rename to cpp/4_CUDA_Libraries/watershedSegmentationNPP/.vscode/extensions.json diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt b/cpp/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt similarity index 100% rename from Samples/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt rename to cpp/4_CUDA_Libraries/watershedSegmentationNPP/CMakeLists.txt diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md b/cpp/4_CUDA_Libraries/watershedSegmentationNPP/README.md similarity index 100% rename from Samples/4_CUDA_Libraries/watershedSegmentationNPP/README.md rename to cpp/4_CUDA_Libraries/watershedSegmentationNPP/README.md diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp b/cpp/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp similarity index 100% rename from Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp rename to cpp/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp diff --git a/Samples/5_Domain_Specific/BlackScholes/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/BlackScholes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/BlackScholes/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/BlackScholes/.vscode/extensions.json b/cpp/5_Domain_Specific/BlackScholes/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/.vscode/extensions.json rename to cpp/5_Domain_Specific/BlackScholes/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes.cu b/cpp/5_Domain_Specific/BlackScholes/BlackScholes.cu similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/BlackScholes.cu rename to cpp/5_Domain_Specific/BlackScholes/BlackScholes.cu diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_gold.cpp b/cpp/5_Domain_Specific/BlackScholes/BlackScholes_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/BlackScholes_gold.cpp rename to cpp/5_Domain_Specific/BlackScholes/BlackScholes_gold.cpp diff --git a/Samples/5_Domain_Specific/BlackScholes/BlackScholes_kernel.cuh b/cpp/5_Domain_Specific/BlackScholes/BlackScholes_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/BlackScholes_kernel.cuh rename to cpp/5_Domain_Specific/BlackScholes/BlackScholes_kernel.cuh diff --git a/Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt b/cpp/5_Domain_Specific/BlackScholes/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/CMakeLists.txt rename to cpp/5_Domain_Specific/BlackScholes/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/BlackScholes/README.md b/cpp/5_Domain_Specific/BlackScholes/README.md similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/README.md rename to cpp/5_Domain_Specific/BlackScholes/README.md diff --git a/Samples/5_Domain_Specific/BlackScholes/doc/BlackScholes.doc b/cpp/5_Domain_Specific/BlackScholes/doc/BlackScholes.doc similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/doc/BlackScholes.doc rename to cpp/5_Domain_Specific/BlackScholes/doc/BlackScholes.doc diff --git a/Samples/5_Domain_Specific/BlackScholes/doc/BlackScholes.pdf b/cpp/5_Domain_Specific/BlackScholes/doc/BlackScholes.pdf similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes/doc/BlackScholes.pdf rename to cpp/5_Domain_Specific/BlackScholes/doc/BlackScholes.pdf diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/BlackScholes_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/extensions.json b/cpp/5_Domain_Specific/BlackScholes_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/.vscode/extensions.json rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes.cpp b/cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes.cpp similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes.cpp rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes.cpp diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_gold.cpp b/cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_gold.cpp rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_gold.cpp diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_kernel.cuh b/cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_kernel.cuh rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/BlackScholes_kernel.cuh diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt b/cpp/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md b/cpp/5_Domain_Specific/BlackScholes_nvrtc/README.md similarity index 100% rename from Samples/5_Domain_Specific/BlackScholes_nvrtc/README.md rename to cpp/5_Domain_Specific/BlackScholes_nvrtc/README.md diff --git a/Samples/5_Domain_Specific/CMakeLists.txt b/cpp/5_Domain_Specific/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/CMakeLists.txt rename to cpp/5_Domain_Specific/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/FDTD3d/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/FDTD3d/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/FDTD3d/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/FDTD3d/.vscode/extensions.json b/cpp/5_Domain_Specific/FDTD3d/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/.vscode/extensions.json rename to cpp/5_Domain_Specific/FDTD3d/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt b/cpp/5_Domain_Specific/FDTD3d/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/CMakeLists.txt rename to cpp/5_Domain_Specific/FDTD3d/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/FDTD3d/README.md b/cpp/5_Domain_Specific/FDTD3d/README.md similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/README.md rename to cpp/5_Domain_Specific/FDTD3d/README.md diff --git a/Samples/5_Domain_Specific/FDTD3d/inc/FDTD3d.h b/cpp/5_Domain_Specific/FDTD3d/inc/FDTD3d.h similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/inc/FDTD3d.h rename to cpp/5_Domain_Specific/FDTD3d/inc/FDTD3d.h diff --git a/Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h b/cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h rename to cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dGPU.h diff --git a/Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dGPUKernel.cuh b/cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dGPUKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dGPUKernel.cuh rename to cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dGPUKernel.cuh diff --git a/Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dReference.h b/cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dReference.h similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/inc/FDTD3dReference.h rename to cpp/5_Domain_Specific/FDTD3d/inc/FDTD3dReference.h diff --git a/Samples/5_Domain_Specific/FDTD3d/src/FDTD3d.cpp b/cpp/5_Domain_Specific/FDTD3d/src/FDTD3d.cpp similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/src/FDTD3d.cpp rename to cpp/5_Domain_Specific/FDTD3d/src/FDTD3d.cpp diff --git a/Samples/5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu b/cpp/5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu rename to cpp/5_Domain_Specific/FDTD3d/src/FDTD3dGPU.cu diff --git a/Samples/5_Domain_Specific/FDTD3d/src/FDTD3dReference.cpp b/cpp/5_Domain_Specific/FDTD3d/src/FDTD3dReference.cpp similarity index 100% rename from Samples/5_Domain_Specific/FDTD3d/src/FDTD3dReference.cpp rename to cpp/5_Domain_Specific/FDTD3d/src/FDTD3dReference.cpp diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/HSOpticalFlow/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/HSOpticalFlow/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/.vscode/extensions.json b/cpp/5_Domain_Specific/HSOpticalFlow/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/.vscode/extensions.json rename to cpp/5_Domain_Specific/HSOpticalFlow/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt b/cpp/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt rename to cpp/5_Domain_Specific/HSOpticalFlow/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo b/cpp/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo rename to cpp/5_Domain_Specific/HSOpticalFlow/FlowCPU.flo diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo b/cpp/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo rename to cpp/5_Domain_Specific/HSOpticalFlow/FlowGPU.flo diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/README.md b/cpp/5_Domain_Specific/HSOpticalFlow/README.md similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/README.md rename to cpp/5_Domain_Specific/HSOpticalFlow/README.md diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/addKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/addKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/addKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/addKernel.cuh diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/common.h b/cpp/5_Domain_Specific/HSOpticalFlow/common.h similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/common.h rename to cpp/5_Domain_Specific/HSOpticalFlow/common.h diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/data/frame10.ppm b/cpp/5_Domain_Specific/HSOpticalFlow/data/frame10.ppm similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/data/frame10.ppm rename to cpp/5_Domain_Specific/HSOpticalFlow/data/frame10.ppm diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/data/frame11.ppm b/cpp/5_Domain_Specific/HSOpticalFlow/data/frame11.ppm similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/data/frame11.ppm rename to cpp/5_Domain_Specific/HSOpticalFlow/data/frame11.ppm diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/derivativesKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/derivativesKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/derivativesKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/derivativesKernel.cuh diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.docx b/cpp/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.docx similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.docx rename to cpp/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.docx diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.pdf b/cpp/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.pdf similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.pdf rename to cpp/5_Domain_Specific/HSOpticalFlow/doc/OpticalFlow.pdf diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/downscaleKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/downscaleKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/downscaleKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/downscaleKernel.cuh diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/flowCUDA.cu b/cpp/5_Domain_Specific/HSOpticalFlow/flowCUDA.cu similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/flowCUDA.cu rename to cpp/5_Domain_Specific/HSOpticalFlow/flowCUDA.cu diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/flowCUDA.h b/cpp/5_Domain_Specific/HSOpticalFlow/flowCUDA.h similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/flowCUDA.h rename to cpp/5_Domain_Specific/HSOpticalFlow/flowCUDA.h diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/flowGold.cpp b/cpp/5_Domain_Specific/HSOpticalFlow/flowGold.cpp similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/flowGold.cpp rename to cpp/5_Domain_Specific/HSOpticalFlow/flowGold.cpp diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/flowGold.h b/cpp/5_Domain_Specific/HSOpticalFlow/flowGold.h similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/flowGold.h rename to cpp/5_Domain_Specific/HSOpticalFlow/flowGold.h diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/main.cpp b/cpp/5_Domain_Specific/HSOpticalFlow/main.cpp similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/main.cpp rename to cpp/5_Domain_Specific/HSOpticalFlow/main.cpp diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/solverKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/solverKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/solverKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/solverKernel.cuh diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/upscaleKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/upscaleKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/upscaleKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/upscaleKernel.cuh diff --git a/Samples/5_Domain_Specific/HSOpticalFlow/warpingKernel.cuh b/cpp/5_Domain_Specific/HSOpticalFlow/warpingKernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/HSOpticalFlow/warpingKernel.cuh rename to cpp/5_Domain_Specific/HSOpticalFlow/warpingKernel.cuh diff --git a/Samples/5_Domain_Specific/Mandelbrot/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/Mandelbrot/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/Mandelbrot/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/Mandelbrot/.vscode/extensions.json b/cpp/5_Domain_Specific/Mandelbrot/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/.vscode/extensions.json rename to cpp/5_Domain_Specific/Mandelbrot/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt b/cpp/5_Domain_Specific/Mandelbrot/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/CMakeLists.txt rename to cpp/5_Domain_Specific/Mandelbrot/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot.cpp b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot.cpp similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot.cpp rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot.cpp diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_cuda.cu b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_cuda.cu rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_cuda.cu diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.cpp b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.cpp rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.cpp diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.h b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.h similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.h rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_gold.h diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.cuh b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.cuh rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.cuh diff --git a/Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.h b/cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.h similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.h rename to cpp/5_Domain_Specific/Mandelbrot/Mandelbrot_kernel.h diff --git a/Samples/5_Domain_Specific/Mandelbrot/README.md b/cpp/5_Domain_Specific/Mandelbrot/README.md similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/README.md rename to cpp/5_Domain_Specific/Mandelbrot/README.md diff --git a/Samples/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp32.ppm b/cpp/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp32.ppm similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp32.ppm rename to cpp/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp32.ppm diff --git a/Samples/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp64.ppm b/cpp/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp64.ppm similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp64.ppm rename to cpp/5_Domain_Specific/Mandelbrot/data/Mandelbrot_fp64.ppm diff --git a/Samples/5_Domain_Specific/Mandelbrot/data/params.txt b/cpp/5_Domain_Specific/Mandelbrot/data/params.txt similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/data/params.txt rename to cpp/5_Domain_Specific/Mandelbrot/data/params.txt diff --git a/Samples/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp32.ppm b/cpp/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp32.ppm similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp32.ppm rename to cpp/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp32.ppm diff --git a/Samples/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp64.ppm b/cpp/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp64.ppm similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp64.ppm rename to cpp/5_Domain_Specific/Mandelbrot/data/referenceJulia_fp64.ppm diff --git a/Samples/5_Domain_Specific/Mandelbrot/doc/sshot_lg.JPG b/cpp/5_Domain_Specific/Mandelbrot/doc/sshot_lg.JPG similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/doc/sshot_lg.JPG rename to cpp/5_Domain_Specific/Mandelbrot/doc/sshot_lg.JPG diff --git a/Samples/5_Domain_Specific/Mandelbrot/doc/sshot_md.JPG b/cpp/5_Domain_Specific/Mandelbrot/doc/sshot_md.JPG similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/doc/sshot_md.JPG rename to cpp/5_Domain_Specific/Mandelbrot/doc/sshot_md.JPG diff --git a/Samples/5_Domain_Specific/Mandelbrot/doc/sshot_sm.JPG b/cpp/5_Domain_Specific/Mandelbrot/doc/sshot_sm.JPG similarity index 100% rename from Samples/5_Domain_Specific/Mandelbrot/doc/sshot_sm.JPG rename to cpp/5_Domain_Specific/Mandelbrot/doc/sshot_sm.JPG diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/MonteCarloMultiGPU/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/extensions.json b/cpp/5_Domain_Specific/MonteCarloMultiGPU/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/.vscode/extensions.json rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt b/cpp/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU.cpp b/cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU.cpp similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU.cpp rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarloMultiGPU.cpp diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_common.h b/cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_common.h similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_common.h rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_common.h diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_gold.cpp b/cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_gold.cpp rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_gold.cpp diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_kernel.cu b/cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_kernel.cu rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_kernel.cu diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_reduction.cuh b/cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_reduction.cuh similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_reduction.cuh rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/MonteCarlo_reduction.cuh diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md b/cpp/5_Domain_Specific/MonteCarloMultiGPU/README.md similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/README.md rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/README.md diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.doc b/cpp/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.doc similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.doc rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.doc diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.pdf b/cpp/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.pdf similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.pdf rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/doc/MonteCarlo.pdf diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/multithreading.cpp b/cpp/5_Domain_Specific/MonteCarloMultiGPU/multithreading.cpp similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/multithreading.cpp rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/multithreading.cpp diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/multithreading.h b/cpp/5_Domain_Specific/MonteCarloMultiGPU/multithreading.h similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/multithreading.h rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/multithreading.h diff --git a/Samples/5_Domain_Specific/MonteCarloMultiGPU/realtype.h b/cpp/5_Domain_Specific/MonteCarloMultiGPU/realtype.h similarity index 100% rename from Samples/5_Domain_Specific/MonteCarloMultiGPU/realtype.h rename to cpp/5_Domain_Specific/MonteCarloMultiGPU/realtype.h diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/NV12toBGRandResize/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/NV12toBGRandResize/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/extensions.json b/cpp/5_Domain_Specific/NV12toBGRandResize/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/.vscode/extensions.json rename to cpp/5_Domain_Specific/NV12toBGRandResize/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt b/cpp/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt rename to cpp/5_Domain_Specific/NV12toBGRandResize/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/README.md b/cpp/5_Domain_Specific/NV12toBGRandResize/README.md similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/README.md rename to cpp/5_Domain_Specific/NV12toBGRandResize/README.md diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/bgr_resize.cu b/cpp/5_Domain_Specific/NV12toBGRandResize/bgr_resize.cu similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/bgr_resize.cu rename to cpp/5_Domain_Specific/NV12toBGRandResize/bgr_resize.cu diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/data/test1280x720.nv12 b/cpp/5_Domain_Specific/NV12toBGRandResize/data/test1280x720.nv12 similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/data/test1280x720.nv12 rename to cpp/5_Domain_Specific/NV12toBGRandResize/data/test1280x720.nv12 diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/data/test1920x1080.nv12 b/cpp/5_Domain_Specific/NV12toBGRandResize/data/test1920x1080.nv12 similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/data/test1920x1080.nv12 rename to cpp/5_Domain_Specific/NV12toBGRandResize/data/test1920x1080.nv12 diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/data/test640x480.nv12 b/cpp/5_Domain_Specific/NV12toBGRandResize/data/test640x480.nv12 similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/data/test640x480.nv12 rename to cpp/5_Domain_Specific/NV12toBGRandResize/data/test640x480.nv12 diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/nv12_resize.cu b/cpp/5_Domain_Specific/NV12toBGRandResize/nv12_resize.cu similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/nv12_resize.cu rename to cpp/5_Domain_Specific/NV12toBGRandResize/nv12_resize.cu diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/nv12_to_bgr_planar.cu b/cpp/5_Domain_Specific/NV12toBGRandResize/nv12_to_bgr_planar.cu similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/nv12_to_bgr_planar.cu rename to cpp/5_Domain_Specific/NV12toBGRandResize/nv12_to_bgr_planar.cu diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/resize_convert.h b/cpp/5_Domain_Specific/NV12toBGRandResize/resize_convert.h similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/resize_convert.h rename to cpp/5_Domain_Specific/NV12toBGRandResize/resize_convert.h diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/resize_convert_main.cpp b/cpp/5_Domain_Specific/NV12toBGRandResize/resize_convert_main.cpp similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/resize_convert_main.cpp rename to cpp/5_Domain_Specific/NV12toBGRandResize/resize_convert_main.cpp diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/utils.cu b/cpp/5_Domain_Specific/NV12toBGRandResize/utils.cu similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/utils.cu rename to cpp/5_Domain_Specific/NV12toBGRandResize/utils.cu diff --git a/Samples/5_Domain_Specific/NV12toBGRandResize/utils.h b/cpp/5_Domain_Specific/NV12toBGRandResize/utils.h similarity index 100% rename from Samples/5_Domain_Specific/NV12toBGRandResize/utils.h rename to cpp/5_Domain_Specific/NV12toBGRandResize/utils.h diff --git a/Samples/5_Domain_Specific/README.md b/cpp/5_Domain_Specific/README.md similarity index 100% rename from Samples/5_Domain_Specific/README.md rename to cpp/5_Domain_Specific/README.md diff --git a/Samples/5_Domain_Specific/SobelFilter/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/SobelFilter/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/SobelFilter/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/SobelFilter/.vscode/extensions.json b/cpp/5_Domain_Specific/SobelFilter/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/.vscode/extensions.json rename to cpp/5_Domain_Specific/SobelFilter/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt b/cpp/5_Domain_Specific/SobelFilter/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/CMakeLists.txt rename to cpp/5_Domain_Specific/SobelFilter/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/SobelFilter/README.md b/cpp/5_Domain_Specific/SobelFilter/README.md similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/README.md rename to cpp/5_Domain_Specific/SobelFilter/README.md diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter.cpp b/cpp/5_Domain_Specific/SobelFilter/SobelFilter.cpp similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/SobelFilter.cpp rename to cpp/5_Domain_Specific/SobelFilter/SobelFilter.cpp diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_kernels.cu b/cpp/5_Domain_Specific/SobelFilter/SobelFilter_kernels.cu similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/SobelFilter_kernels.cu rename to cpp/5_Domain_Specific/SobelFilter/SobelFilter_kernels.cu diff --git a/Samples/5_Domain_Specific/SobelFilter/SobelFilter_kernels.h b/cpp/5_Domain_Specific/SobelFilter/SobelFilter_kernels.h similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/SobelFilter_kernels.h rename to cpp/5_Domain_Specific/SobelFilter/SobelFilter_kernels.h diff --git a/Samples/5_Domain_Specific/SobelFilter/data/ref_orig.pgm b/cpp/5_Domain_Specific/SobelFilter/data/ref_orig.pgm similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/data/ref_orig.pgm rename to cpp/5_Domain_Specific/SobelFilter/data/ref_orig.pgm diff --git a/Samples/5_Domain_Specific/SobelFilter/data/ref_shared.pgm b/cpp/5_Domain_Specific/SobelFilter/data/ref_shared.pgm similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/data/ref_shared.pgm rename to cpp/5_Domain_Specific/SobelFilter/data/ref_shared.pgm diff --git a/Samples/5_Domain_Specific/SobelFilter/data/ref_tex.pgm b/cpp/5_Domain_Specific/SobelFilter/data/ref_tex.pgm similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/data/ref_tex.pgm rename to cpp/5_Domain_Specific/SobelFilter/data/ref_tex.pgm diff --git a/Samples/5_Domain_Specific/SobelFilter/data/teapot.pgm b/cpp/5_Domain_Specific/SobelFilter/data/teapot.pgm similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/data/teapot.pgm rename to cpp/5_Domain_Specific/SobelFilter/data/teapot.pgm diff --git a/Samples/5_Domain_Specific/SobelFilter/doc/sshot_lg.JPG b/cpp/5_Domain_Specific/SobelFilter/doc/sshot_lg.JPG similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/doc/sshot_lg.JPG rename to cpp/5_Domain_Specific/SobelFilter/doc/sshot_lg.JPG diff --git a/Samples/5_Domain_Specific/SobelFilter/doc/sshot_md.JPG b/cpp/5_Domain_Specific/SobelFilter/doc/sshot_md.JPG similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/doc/sshot_md.JPG rename to cpp/5_Domain_Specific/SobelFilter/doc/sshot_md.JPG diff --git a/Samples/5_Domain_Specific/SobelFilter/doc/sshot_sm.JPG b/cpp/5_Domain_Specific/SobelFilter/doc/sshot_sm.JPG similarity index 100% rename from Samples/5_Domain_Specific/SobelFilter/doc/sshot_sm.JPG rename to cpp/5_Domain_Specific/SobelFilter/doc/sshot_sm.JPG diff --git a/Samples/5_Domain_Specific/SobolQRNG/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/SobolQRNG/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/SobolQRNG/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/SobolQRNG/.vscode/extensions.json b/cpp/5_Domain_Specific/SobolQRNG/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/.vscode/extensions.json rename to cpp/5_Domain_Specific/SobolQRNG/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt b/cpp/5_Domain_Specific/SobolQRNG/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/CMakeLists.txt rename to cpp/5_Domain_Specific/SobolQRNG/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/SobolQRNG/README.md b/cpp/5_Domain_Specific/SobolQRNG/README.md similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/README.md rename to cpp/5_Domain_Specific/SobolQRNG/README.md diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol.cpp b/cpp/5_Domain_Specific/SobolQRNG/sobol.cpp similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol.cpp rename to cpp/5_Domain_Specific/SobolQRNG/sobol.cpp diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol.h b/cpp/5_Domain_Specific/SobolQRNG/sobol.h similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol.h rename to cpp/5_Domain_Specific/SobolQRNG/sobol.h diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_gold.cpp b/cpp/5_Domain_Specific/SobolQRNG/sobol_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_gold.cpp rename to cpp/5_Domain_Specific/SobolQRNG/sobol_gold.cpp diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_gold.h b/cpp/5_Domain_Specific/SobolQRNG/sobol_gold.h similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_gold.h rename to cpp/5_Domain_Specific/SobolQRNG/sobol_gold.h diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_gpu.cu b/cpp/5_Domain_Specific/SobolQRNG/sobol_gpu.cu similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_gpu.cu rename to cpp/5_Domain_Specific/SobolQRNG/sobol_gpu.cu diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_gpu.h b/cpp/5_Domain_Specific/SobolQRNG/sobol_gpu.h similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_gpu.h rename to cpp/5_Domain_Specific/SobolQRNG/sobol_gpu.h diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_primitives.cpp b/cpp/5_Domain_Specific/SobolQRNG/sobol_primitives.cpp similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_primitives.cpp rename to cpp/5_Domain_Specific/SobolQRNG/sobol_primitives.cpp diff --git a/Samples/5_Domain_Specific/SobolQRNG/sobol_primitives.h b/cpp/5_Domain_Specific/SobolQRNG/sobol_primitives.h similarity index 100% rename from Samples/5_Domain_Specific/SobolQRNG/sobol_primitives.h rename to cpp/5_Domain_Specific/SobolQRNG/sobol_primitives.h diff --git a/Samples/5_Domain_Specific/bicubicTexture/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/bicubicTexture/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/bicubicTexture/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/bicubicTexture/.vscode/extensions.json b/cpp/5_Domain_Specific/bicubicTexture/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/.vscode/extensions.json rename to cpp/5_Domain_Specific/bicubicTexture/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt b/cpp/5_Domain_Specific/bicubicTexture/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/CMakeLists.txt rename to cpp/5_Domain_Specific/bicubicTexture/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/bicubicTexture/README.md b/cpp/5_Domain_Specific/bicubicTexture/README.md similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/README.md rename to cpp/5_Domain_Specific/bicubicTexture/README.md diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture.cpp b/cpp/5_Domain_Specific/bicubicTexture/bicubicTexture.cpp similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/bicubicTexture.cpp rename to cpp/5_Domain_Specific/bicubicTexture/bicubicTexture.cpp diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_cuda.cu b/cpp/5_Domain_Specific/bicubicTexture/bicubicTexture_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_cuda.cu rename to cpp/5_Domain_Specific/bicubicTexture/bicubicTexture_cuda.cu diff --git a/Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_kernel.cuh b/cpp/5_Domain_Specific/bicubicTexture/bicubicTexture_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/bicubicTexture_kernel.cuh rename to cpp/5_Domain_Specific/bicubicTexture/bicubicTexture_kernel.cuh diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/0_nearest.ppm b/cpp/5_Domain_Specific/bicubicTexture/data/0_nearest.ppm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/0_nearest.ppm rename to cpp/5_Domain_Specific/bicubicTexture/data/0_nearest.ppm diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/1_bilinear.ppm b/cpp/5_Domain_Specific/bicubicTexture/data/1_bilinear.ppm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/1_bilinear.ppm rename to cpp/5_Domain_Specific/bicubicTexture/data/1_bilinear.ppm diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/2_bicubic.ppm b/cpp/5_Domain_Specific/bicubicTexture/data/2_bicubic.ppm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/2_bicubic.ppm rename to cpp/5_Domain_Specific/bicubicTexture/data/2_bicubic.ppm diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/3_fastbicubic.ppm b/cpp/5_Domain_Specific/bicubicTexture/data/3_fastbicubic.ppm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/3_fastbicubic.ppm rename to cpp/5_Domain_Specific/bicubicTexture/data/3_fastbicubic.ppm diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/4_catmull-rom.ppm b/cpp/5_Domain_Specific/bicubicTexture/data/4_catmull-rom.ppm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/4_catmull-rom.ppm rename to cpp/5_Domain_Specific/bicubicTexture/data/4_catmull-rom.ppm diff --git a/Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm b/cpp/5_Domain_Specific/bicubicTexture/data/teapot512.pgm similarity index 100% rename from Samples/5_Domain_Specific/bicubicTexture/data/teapot512.pgm rename to cpp/5_Domain_Specific/bicubicTexture/data/teapot512.pgm diff --git a/Samples/5_Domain_Specific/bilateralFilter/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/bilateralFilter/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/bilateralFilter/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/bilateralFilter/.vscode/extensions.json b/cpp/5_Domain_Specific/bilateralFilter/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/.vscode/extensions.json rename to cpp/5_Domain_Specific/bilateralFilter/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt b/cpp/5_Domain_Specific/bilateralFilter/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/CMakeLists.txt rename to cpp/5_Domain_Specific/bilateralFilter/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/bilateralFilter/README.md b/cpp/5_Domain_Specific/bilateralFilter/README.md similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/README.md rename to cpp/5_Domain_Specific/bilateralFilter/README.md diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter.cpp b/cpp/5_Domain_Specific/bilateralFilter/bilateralFilter.cpp similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/bilateralFilter.cpp rename to cpp/5_Domain_Specific/bilateralFilter/bilateralFilter.cpp diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_cpu.cpp b/cpp/5_Domain_Specific/bilateralFilter/bilateralFilter_cpu.cpp similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/bilateralFilter_cpu.cpp rename to cpp/5_Domain_Specific/bilateralFilter/bilateralFilter_cpu.cpp diff --git a/Samples/5_Domain_Specific/bilateralFilter/bilateral_kernel.cu b/cpp/5_Domain_Specific/bilateralFilter/bilateral_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/bilateral_kernel.cu rename to cpp/5_Domain_Specific/bilateralFilter/bilateral_kernel.cu diff --git a/Samples/5_Domain_Specific/bilateralFilter/bmploader.cpp b/cpp/5_Domain_Specific/bilateralFilter/bmploader.cpp similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/bmploader.cpp rename to cpp/5_Domain_Specific/bilateralFilter/bmploader.cpp diff --git a/Samples/5_Domain_Specific/bilateralFilter/data/nature_monte.bmp b/cpp/5_Domain_Specific/bilateralFilter/data/nature_monte.bmp similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/data/nature_monte.bmp rename to cpp/5_Domain_Specific/bilateralFilter/data/nature_monte.bmp diff --git a/Samples/5_Domain_Specific/bilateralFilter/data/ref_05.ppm b/cpp/5_Domain_Specific/bilateralFilter/data/ref_05.ppm similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/data/ref_05.ppm rename to cpp/5_Domain_Specific/bilateralFilter/data/ref_05.ppm diff --git a/Samples/5_Domain_Specific/bilateralFilter/data/ref_06.ppm b/cpp/5_Domain_Specific/bilateralFilter/data/ref_06.ppm similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/data/ref_06.ppm rename to cpp/5_Domain_Specific/bilateralFilter/data/ref_06.ppm diff --git a/Samples/5_Domain_Specific/bilateralFilter/data/ref_07.ppm b/cpp/5_Domain_Specific/bilateralFilter/data/ref_07.ppm similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/data/ref_07.ppm rename to cpp/5_Domain_Specific/bilateralFilter/data/ref_07.ppm diff --git a/Samples/5_Domain_Specific/bilateralFilter/data/ref_08.ppm b/cpp/5_Domain_Specific/bilateralFilter/data/ref_08.ppm similarity index 100% rename from Samples/5_Domain_Specific/bilateralFilter/data/ref_08.ppm rename to cpp/5_Domain_Specific/bilateralFilter/data/ref_08.ppm diff --git a/Samples/5_Domain_Specific/binomialOptions/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/binomialOptions/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/binomialOptions/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/binomialOptions/.vscode/extensions.json b/cpp/5_Domain_Specific/binomialOptions/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/.vscode/extensions.json rename to cpp/5_Domain_Specific/binomialOptions/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt b/cpp/5_Domain_Specific/binomialOptions/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/CMakeLists.txt rename to cpp/5_Domain_Specific/binomialOptions/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/binomialOptions/README.md b/cpp/5_Domain_Specific/binomialOptions/README.md similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/README.md rename to cpp/5_Domain_Specific/binomialOptions/README.md diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp b/cpp/5_Domain_Specific/binomialOptions/binomialOptions.cpp similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/binomialOptions.cpp rename to cpp/5_Domain_Specific/binomialOptions/binomialOptions.cpp diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h b/cpp/5_Domain_Specific/binomialOptions/binomialOptions_common.h similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/binomialOptions_common.h rename to cpp/5_Domain_Specific/binomialOptions/binomialOptions_common.h diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp b/cpp/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp rename to cpp/5_Domain_Specific/binomialOptions/binomialOptions_gold.cpp diff --git a/Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu b/cpp/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu rename to cpp/5_Domain_Specific/binomialOptions/binomialOptions_kernel.cu diff --git a/Samples/5_Domain_Specific/binomialOptions/doc/binomialOptions.doc b/cpp/5_Domain_Specific/binomialOptions/doc/binomialOptions.doc similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/doc/binomialOptions.doc rename to cpp/5_Domain_Specific/binomialOptions/doc/binomialOptions.doc diff --git a/Samples/5_Domain_Specific/binomialOptions/doc/binomialOptions.pdf b/cpp/5_Domain_Specific/binomialOptions/doc/binomialOptions.pdf similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/doc/binomialOptions.pdf rename to cpp/5_Domain_Specific/binomialOptions/doc/binomialOptions.pdf diff --git a/Samples/5_Domain_Specific/binomialOptions/realtype.h b/cpp/5_Domain_Specific/binomialOptions/realtype.h similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions/realtype.h rename to cpp/5_Domain_Specific/binomialOptions/realtype.h diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/binomialOptions_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/extensions.json b/cpp/5_Domain_Specific/binomialOptions_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/.vscode/extensions.json rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt b/cpp/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md b/cpp/5_Domain_Specific/binomialOptions_nvrtc/README.md similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/README.md rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/README.md diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions.cpp b/cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions.cpp similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions.cpp rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions.cpp diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_common.h b/cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_common.h similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_common.h rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_common.h diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gold.cpp b/cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gold.cpp rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gold.cpp diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gpu.cpp b/cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gpu.cpp similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gpu.cpp rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_gpu.cpp diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_kernel.cu b/cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_kernel.cu rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/binomialOptions_kernel.cu diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/common_gpu_header.h b/cpp/5_Domain_Specific/binomialOptions_nvrtc/common_gpu_header.h similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/common_gpu_header.h rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/common_gpu_header.h diff --git a/Samples/5_Domain_Specific/binomialOptions_nvrtc/realtype.h b/cpp/5_Domain_Specific/binomialOptions_nvrtc/realtype.h similarity index 100% rename from Samples/5_Domain_Specific/binomialOptions_nvrtc/realtype.h rename to cpp/5_Domain_Specific/binomialOptions_nvrtc/realtype.h diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/convolutionFFT2D/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/convolutionFFT2D/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/.vscode/extensions.json b/cpp/5_Domain_Specific/convolutionFFT2D/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/.vscode/extensions.json rename to cpp/5_Domain_Specific/convolutionFFT2D/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt b/cpp/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt rename to cpp/5_Domain_Specific/convolutionFFT2D/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/README.md b/cpp/5_Domain_Specific/convolutionFFT2D/README.md similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/README.md rename to cpp/5_Domain_Specific/convolutionFFT2D/README.md diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cu b/cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cu similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cu rename to cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cu diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cuh b/cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cuh similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cuh rename to cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D.cuh diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_common.h b/cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_common.h similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_common.h rename to cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_common.h diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_gold.cpp b/cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_gold.cpp rename to cpp/5_Domain_Specific/convolutionFFT2D/convolutionFFT2D_gold.cpp diff --git a/Samples/5_Domain_Specific/convolutionFFT2D/main.cpp b/cpp/5_Domain_Specific/convolutionFFT2D/main.cpp similarity index 100% rename from Samples/5_Domain_Specific/convolutionFFT2D/main.cpp rename to cpp/5_Domain_Specific/convolutionFFT2D/main.cpp diff --git a/Samples/5_Domain_Specific/dwtHaar1D/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/dwtHaar1D/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/dwtHaar1D/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/dwtHaar1D/.vscode/extensions.json b/cpp/5_Domain_Specific/dwtHaar1D/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/.vscode/extensions.json rename to cpp/5_Domain_Specific/dwtHaar1D/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt b/cpp/5_Domain_Specific/dwtHaar1D/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/CMakeLists.txt rename to cpp/5_Domain_Specific/dwtHaar1D/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/dwtHaar1D/README.md b/cpp/5_Domain_Specific/dwtHaar1D/README.md similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/README.md rename to cpp/5_Domain_Specific/dwtHaar1D/README.md diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/regression.gold.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/regression.gold.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/regression.gold.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/regression.gold.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/regression_2_14.gold.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/regression_2_14.gold.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/regression_2_14.gold.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/regression_2_14.gold.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/regression_2_18.gold.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/regression_2_18.gold.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/regression_2_18.gold.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/regression_2_18.gold.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/signal.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/signal.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/signal.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/signal.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/signal_2_14.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/signal_2_14.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/signal_2_14.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/signal_2_14.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/data/signal_2_18.dat b/cpp/5_Domain_Specific/dwtHaar1D/data/signal_2_18.dat similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/data/signal_2_18.dat rename to cpp/5_Domain_Specific/dwtHaar1D/data/signal_2_18.dat diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu b/cpp/5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu rename to cpp/5_Domain_Specific/dwtHaar1D/dwtHaar1D.cu diff --git a/Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_kernel.cuh b/cpp/5_Domain_Specific/dwtHaar1D/dwtHaar1D_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/dwtHaar1D/dwtHaar1D_kernel.cuh rename to cpp/5_Domain_Specific/dwtHaar1D/dwtHaar1D_kernel.cuh diff --git a/Samples/5_Domain_Specific/dxtc/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/dxtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/dxtc/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/dxtc/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/dxtc/.vscode/extensions.json b/cpp/5_Domain_Specific/dxtc/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/dxtc/.vscode/extensions.json rename to cpp/5_Domain_Specific/dxtc/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/dxtc/CMakeLists.txt b/cpp/5_Domain_Specific/dxtc/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/dxtc/CMakeLists.txt rename to cpp/5_Domain_Specific/dxtc/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/dxtc/CudaMath.h b/cpp/5_Domain_Specific/dxtc/CudaMath.h similarity index 100% rename from Samples/5_Domain_Specific/dxtc/CudaMath.h rename to cpp/5_Domain_Specific/dxtc/CudaMath.h diff --git a/Samples/5_Domain_Specific/dxtc/README.md b/cpp/5_Domain_Specific/dxtc/README.md similarity index 100% rename from Samples/5_Domain_Specific/dxtc/README.md rename to cpp/5_Domain_Specific/dxtc/README.md diff --git a/Samples/5_Domain_Specific/dxtc/data/teapot512_ref.dds b/cpp/5_Domain_Specific/dxtc/data/teapot512_ref.dds similarity index 100% rename from Samples/5_Domain_Specific/dxtc/data/teapot512_ref.dds rename to cpp/5_Domain_Specific/dxtc/data/teapot512_ref.dds diff --git a/Samples/5_Domain_Specific/dxtc/data/teapot512_std.dds b/cpp/5_Domain_Specific/dxtc/data/teapot512_std.dds similarity index 100% rename from Samples/5_Domain_Specific/dxtc/data/teapot512_std.dds rename to cpp/5_Domain_Specific/dxtc/data/teapot512_std.dds diff --git a/Samples/5_Domain_Specific/dxtc/data/teapot512_std.ppm b/cpp/5_Domain_Specific/dxtc/data/teapot512_std.ppm similarity index 100% rename from Samples/5_Domain_Specific/dxtc/data/teapot512_std.ppm rename to cpp/5_Domain_Specific/dxtc/data/teapot512_std.ppm diff --git a/Samples/5_Domain_Specific/dxtc/dds.h b/cpp/5_Domain_Specific/dxtc/dds.h similarity index 100% rename from Samples/5_Domain_Specific/dxtc/dds.h rename to cpp/5_Domain_Specific/dxtc/dds.h diff --git a/Samples/5_Domain_Specific/dxtc/doc/cuda_dxtc.doc b/cpp/5_Domain_Specific/dxtc/doc/cuda_dxtc.doc similarity index 100% rename from Samples/5_Domain_Specific/dxtc/doc/cuda_dxtc.doc rename to cpp/5_Domain_Specific/dxtc/doc/cuda_dxtc.doc diff --git a/Samples/5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf b/cpp/5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf similarity index 100% rename from Samples/5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf rename to cpp/5_Domain_Specific/dxtc/doc/cuda_dxtc.pdf diff --git a/Samples/5_Domain_Specific/dxtc/dxtc.cu b/cpp/5_Domain_Specific/dxtc/dxtc.cu similarity index 100% rename from Samples/5_Domain_Specific/dxtc/dxtc.cu rename to cpp/5_Domain_Specific/dxtc/dxtc.cu diff --git a/Samples/5_Domain_Specific/dxtc/permutations.h b/cpp/5_Domain_Specific/dxtc/permutations.h similarity index 100% rename from Samples/5_Domain_Specific/dxtc/permutations.h rename to cpp/5_Domain_Specific/dxtc/permutations.h diff --git a/Samples/5_Domain_Specific/fastWalshTransform/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/fastWalshTransform/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/fastWalshTransform/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/fastWalshTransform/.vscode/extensions.json b/cpp/5_Domain_Specific/fastWalshTransform/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/.vscode/extensions.json rename to cpp/5_Domain_Specific/fastWalshTransform/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt b/cpp/5_Domain_Specific/fastWalshTransform/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/CMakeLists.txt rename to cpp/5_Domain_Specific/fastWalshTransform/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/fastWalshTransform/README.md b/cpp/5_Domain_Specific/fastWalshTransform/README.md similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/README.md rename to cpp/5_Domain_Specific/fastWalshTransform/README.md diff --git a/Samples/5_Domain_Specific/fastWalshTransform/doc/FWT.doc b/cpp/5_Domain_Specific/fastWalshTransform/doc/FWT.doc similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/doc/FWT.doc rename to cpp/5_Domain_Specific/fastWalshTransform/doc/FWT.doc diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform.cu b/cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform.cu similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform.cu rename to cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform.cu diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_gold.cpp b/cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_gold.cpp rename to cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform_gold.cpp diff --git a/Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_kernel.cuh b/cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/fastWalshTransform/fastWalshTransform_kernel.cuh rename to cpp/5_Domain_Specific/fastWalshTransform/fastWalshTransform_kernel.cuh diff --git a/Samples/5_Domain_Specific/fluidsGL/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/fluidsGL/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/fluidsGL/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/fluidsGL/.vscode/extensions.json b/cpp/5_Domain_Specific/fluidsGL/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/.vscode/extensions.json rename to cpp/5_Domain_Specific/fluidsGL/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt b/cpp/5_Domain_Specific/fluidsGL/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/CMakeLists.txt rename to cpp/5_Domain_Specific/fluidsGL/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/fluidsGL/README.md b/cpp/5_Domain_Specific/fluidsGL/README.md similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/README.md rename to cpp/5_Domain_Specific/fluidsGL/README.md diff --git a/Samples/5_Domain_Specific/fluidsGL/data/ref_fluidsGL.ppm b/cpp/5_Domain_Specific/fluidsGL/data/ref_fluidsGL.ppm similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/data/ref_fluidsGL.ppm rename to cpp/5_Domain_Specific/fluidsGL/data/ref_fluidsGL.ppm diff --git a/Samples/5_Domain_Specific/fluidsGL/defines.h b/cpp/5_Domain_Specific/fluidsGL/defines.h similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/defines.h rename to cpp/5_Domain_Specific/fluidsGL/defines.h diff --git a/Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL.doc b/cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL.doc similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL.doc rename to cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL.doc diff --git a/Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL.pdf b/cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL.pdf similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL.pdf rename to cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL.pdf diff --git a/Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_lg.gif b/cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_lg.gif similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_lg.gif rename to cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_lg.gif diff --git a/Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_md.gif b/cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_md.gif similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_md.gif rename to cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_md.gif diff --git a/Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_sm.gif b/cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_sm.gif similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/doc/fluidsGL_sm.gif rename to cpp/5_Domain_Specific/fluidsGL/doc/fluidsGL_sm.gif diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL.cpp b/cpp/5_Domain_Specific/fluidsGL/fluidsGL.cpp similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/fluidsGL.cpp rename to cpp/5_Domain_Specific/fluidsGL/fluidsGL.cpp diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cu b/cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cu similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cu rename to cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cu diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cuh b/cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cuh similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cuh rename to cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.cuh diff --git a/Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.h b/cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.h similarity index 100% rename from Samples/5_Domain_Specific/fluidsGL/fluidsGL_kernels.h rename to cpp/5_Domain_Specific/fluidsGL/fluidsGL_kernels.h diff --git a/Samples/5_Domain_Specific/marchingCubes/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/marchingCubes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/marchingCubes/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/marchingCubes/.vscode/extensions.json b/cpp/5_Domain_Specific/marchingCubes/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/.vscode/extensions.json rename to cpp/5_Domain_Specific/marchingCubes/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt b/cpp/5_Domain_Specific/marchingCubes/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/CMakeLists.txt rename to cpp/5_Domain_Specific/marchingCubes/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/marchingCubes/README.md b/cpp/5_Domain_Specific/marchingCubes/README.md similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/README.md rename to cpp/5_Domain_Specific/marchingCubes/README.md diff --git a/Samples/5_Domain_Specific/marchingCubes/data/Bucky.raw b/cpp/5_Domain_Specific/marchingCubes/data/Bucky.raw similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/data/Bucky.raw rename to cpp/5_Domain_Specific/marchingCubes/data/Bucky.raw diff --git a/Samples/5_Domain_Specific/marchingCubes/data/compVoxelArray.bin b/cpp/5_Domain_Specific/marchingCubes/data/compVoxelArray.bin similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/data/compVoxelArray.bin rename to cpp/5_Domain_Specific/marchingCubes/data/compVoxelArray.bin diff --git a/Samples/5_Domain_Specific/marchingCubes/data/normalArray.bin b/cpp/5_Domain_Specific/marchingCubes/data/normalArray.bin similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/data/normalArray.bin rename to cpp/5_Domain_Specific/marchingCubes/data/normalArray.bin diff --git a/Samples/5_Domain_Specific/marchingCubes/data/posArray.bin b/cpp/5_Domain_Specific/marchingCubes/data/posArray.bin similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/data/posArray.bin rename to cpp/5_Domain_Specific/marchingCubes/data/posArray.bin diff --git a/Samples/5_Domain_Specific/marchingCubes/data/ref_march_cubes.ppm b/cpp/5_Domain_Specific/marchingCubes/data/ref_march_cubes.ppm similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/data/ref_march_cubes.ppm rename to cpp/5_Domain_Specific/marchingCubes/data/ref_march_cubes.ppm diff --git a/Samples/5_Domain_Specific/marchingCubes/defines.h b/cpp/5_Domain_Specific/marchingCubes/defines.h similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/defines.h rename to cpp/5_Domain_Specific/marchingCubes/defines.h diff --git a/Samples/5_Domain_Specific/marchingCubes/doc/screenshot_lg.png b/cpp/5_Domain_Specific/marchingCubes/doc/screenshot_lg.png similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/doc/screenshot_lg.png rename to cpp/5_Domain_Specific/marchingCubes/doc/screenshot_lg.png diff --git a/Samples/5_Domain_Specific/marchingCubes/doc/screenshot_md.png b/cpp/5_Domain_Specific/marchingCubes/doc/screenshot_md.png similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/doc/screenshot_md.png rename to cpp/5_Domain_Specific/marchingCubes/doc/screenshot_md.png diff --git a/Samples/5_Domain_Specific/marchingCubes/doc/screenshot_sm.png b/cpp/5_Domain_Specific/marchingCubes/doc/screenshot_sm.png similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/doc/screenshot_sm.png rename to cpp/5_Domain_Specific/marchingCubes/doc/screenshot_sm.png diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes.cpp b/cpp/5_Domain_Specific/marchingCubes/marchingCubes.cpp similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/marchingCubes.cpp rename to cpp/5_Domain_Specific/marchingCubes/marchingCubes.cpp diff --git a/Samples/5_Domain_Specific/marchingCubes/marchingCubes_kernel.cu b/cpp/5_Domain_Specific/marchingCubes/marchingCubes_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/marchingCubes_kernel.cu rename to cpp/5_Domain_Specific/marchingCubes/marchingCubes_kernel.cu diff --git a/Samples/5_Domain_Specific/marchingCubes/tables.h b/cpp/5_Domain_Specific/marchingCubes/tables.h similarity index 100% rename from Samples/5_Domain_Specific/marchingCubes/tables.h rename to cpp/5_Domain_Specific/marchingCubes/tables.h diff --git a/Samples/5_Domain_Specific/nbody/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/nbody/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/nbody/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/nbody/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/nbody/.vscode/extensions.json b/cpp/5_Domain_Specific/nbody/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/nbody/.vscode/extensions.json rename to cpp/5_Domain_Specific/nbody/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/nbody/CMakeLists.txt b/cpp/5_Domain_Specific/nbody/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/nbody/CMakeLists.txt rename to cpp/5_Domain_Specific/nbody/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/nbody/README.md b/cpp/5_Domain_Specific/nbody/README.md similarity index 100% rename from Samples/5_Domain_Specific/nbody/README.md rename to cpp/5_Domain_Specific/nbody/README.md diff --git a/Samples/5_Domain_Specific/nbody/bodysystem.h b/cpp/5_Domain_Specific/nbody/bodysystem.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystem.h rename to cpp/5_Domain_Specific/nbody/bodysystem.h diff --git a/Samples/5_Domain_Specific/nbody/bodysystemcpu.h b/cpp/5_Domain_Specific/nbody/bodysystemcpu.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystemcpu.h rename to cpp/5_Domain_Specific/nbody/bodysystemcpu.h diff --git a/Samples/5_Domain_Specific/nbody/bodysystemcpu_impl.h b/cpp/5_Domain_Specific/nbody/bodysystemcpu_impl.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystemcpu_impl.h rename to cpp/5_Domain_Specific/nbody/bodysystemcpu_impl.h diff --git a/Samples/5_Domain_Specific/nbody/bodysystemcuda.cu b/cpp/5_Domain_Specific/nbody/bodysystemcuda.cu similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystemcuda.cu rename to cpp/5_Domain_Specific/nbody/bodysystemcuda.cu diff --git a/Samples/5_Domain_Specific/nbody/bodysystemcuda.h b/cpp/5_Domain_Specific/nbody/bodysystemcuda.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystemcuda.h rename to cpp/5_Domain_Specific/nbody/bodysystemcuda.h diff --git a/Samples/5_Domain_Specific/nbody/bodysystemcuda_impl.h b/cpp/5_Domain_Specific/nbody/bodysystemcuda_impl.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/bodysystemcuda_impl.h rename to cpp/5_Domain_Specific/nbody/bodysystemcuda_impl.h diff --git a/Samples/5_Domain_Specific/nbody/doc/nbody_gems3_ch31.pdf b/cpp/5_Domain_Specific/nbody/doc/nbody_gems3_ch31.pdf similarity index 100% rename from Samples/5_Domain_Specific/nbody/doc/nbody_gems3_ch31.pdf rename to cpp/5_Domain_Specific/nbody/doc/nbody_gems3_ch31.pdf diff --git a/Samples/5_Domain_Specific/nbody/doc/screenshot_lg.jpg b/cpp/5_Domain_Specific/nbody/doc/screenshot_lg.jpg similarity index 100% rename from Samples/5_Domain_Specific/nbody/doc/screenshot_lg.jpg rename to cpp/5_Domain_Specific/nbody/doc/screenshot_lg.jpg diff --git a/Samples/5_Domain_Specific/nbody/doc/screenshot_md.jpg b/cpp/5_Domain_Specific/nbody/doc/screenshot_md.jpg similarity index 100% rename from Samples/5_Domain_Specific/nbody/doc/screenshot_md.jpg rename to cpp/5_Domain_Specific/nbody/doc/screenshot_md.jpg diff --git a/Samples/5_Domain_Specific/nbody/doc/screenshot_sm.jpg b/cpp/5_Domain_Specific/nbody/doc/screenshot_sm.jpg similarity index 100% rename from Samples/5_Domain_Specific/nbody/doc/screenshot_sm.jpg rename to cpp/5_Domain_Specific/nbody/doc/screenshot_sm.jpg diff --git a/Samples/5_Domain_Specific/nbody/nbody.cpp b/cpp/5_Domain_Specific/nbody/nbody.cpp similarity index 100% rename from Samples/5_Domain_Specific/nbody/nbody.cpp rename to cpp/5_Domain_Specific/nbody/nbody.cpp diff --git a/Samples/5_Domain_Specific/nbody/render_particles.cpp b/cpp/5_Domain_Specific/nbody/render_particles.cpp similarity index 100% rename from Samples/5_Domain_Specific/nbody/render_particles.cpp rename to cpp/5_Domain_Specific/nbody/render_particles.cpp diff --git a/Samples/5_Domain_Specific/nbody/render_particles.h b/cpp/5_Domain_Specific/nbody/render_particles.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/render_particles.h rename to cpp/5_Domain_Specific/nbody/render_particles.h diff --git a/Samples/5_Domain_Specific/nbody/tipsy.h b/cpp/5_Domain_Specific/nbody/tipsy.h similarity index 100% rename from Samples/5_Domain_Specific/nbody/tipsy.h rename to cpp/5_Domain_Specific/nbody/tipsy.h diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/extensions.json b/cpp/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/extensions.json rename to cpp/5_Domain_Specific/p2pBandwidthLatencyTest/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt b/cpp/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt rename to cpp/5_Domain_Specific/p2pBandwidthLatencyTest/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md b/cpp/5_Domain_Specific/p2pBandwidthLatencyTest/README.md similarity index 100% rename from Samples/5_Domain_Specific/p2pBandwidthLatencyTest/README.md rename to cpp/5_Domain_Specific/p2pBandwidthLatencyTest/README.md diff --git a/Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu b/cpp/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu similarity index 100% rename from Samples/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu rename to cpp/5_Domain_Specific/p2pBandwidthLatencyTest/p2pBandwidthLatencyTest.cu diff --git a/Samples/5_Domain_Specific/postProcessGL/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/postProcessGL/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/postProcessGL/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/postProcessGL/.vscode/extensions.json b/cpp/5_Domain_Specific/postProcessGL/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/.vscode/extensions.json rename to cpp/5_Domain_Specific/postProcessGL/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt b/cpp/5_Domain_Specific/postProcessGL/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/CMakeLists.txt rename to cpp/5_Domain_Specific/postProcessGL/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/postProcessGL/README.md b/cpp/5_Domain_Specific/postProcessGL/README.md similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/README.md rename to cpp/5_Domain_Specific/postProcessGL/README.md diff --git a/Samples/5_Domain_Specific/postProcessGL/data/teapot_2.ppm b/cpp/5_Domain_Specific/postProcessGL/data/teapot_2.ppm similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/data/teapot_2.ppm rename to cpp/5_Domain_Specific/postProcessGL/data/teapot_2.ppm diff --git a/Samples/5_Domain_Specific/postProcessGL/data/teapot_4.ppm b/cpp/5_Domain_Specific/postProcessGL/data/teapot_4.ppm similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/data/teapot_4.ppm rename to cpp/5_Domain_Specific/postProcessGL/data/teapot_4.ppm diff --git a/Samples/5_Domain_Specific/postProcessGL/data/teapot_8.ppm b/cpp/5_Domain_Specific/postProcessGL/data/teapot_8.ppm similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/data/teapot_8.ppm rename to cpp/5_Domain_Specific/postProcessGL/data/teapot_8.ppm diff --git a/Samples/5_Domain_Specific/postProcessGL/data/teapot_orig.ppm b/cpp/5_Domain_Specific/postProcessGL/data/teapot_orig.ppm similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/data/teapot_orig.ppm rename to cpp/5_Domain_Specific/postProcessGL/data/teapot_orig.ppm diff --git a/Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_lg.gif b/cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_lg.gif similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_lg.gif rename to cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_lg.gif diff --git a/Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_md.gif b/cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_md.gif similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_md.gif rename to cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_md.gif diff --git a/Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_sm.gif b/cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_sm.gif similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/doc/postProcessGL_sm.gif rename to cpp/5_Domain_Specific/postProcessGL/doc/postProcessGL_sm.gif diff --git a/Samples/5_Domain_Specific/postProcessGL/main.cpp b/cpp/5_Domain_Specific/postProcessGL/main.cpp similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/main.cpp rename to cpp/5_Domain_Specific/postProcessGL/main.cpp diff --git a/Samples/5_Domain_Specific/postProcessGL/postProcessGL.cu b/cpp/5_Domain_Specific/postProcessGL/postProcessGL.cu similarity index 100% rename from Samples/5_Domain_Specific/postProcessGL/postProcessGL.cu rename to cpp/5_Domain_Specific/postProcessGL/postProcessGL.cu diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/quasirandomGenerator/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/quasirandomGenerator/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/.vscode/extensions.json b/cpp/5_Domain_Specific/quasirandomGenerator/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/.vscode/extensions.json rename to cpp/5_Domain_Specific/quasirandomGenerator/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt b/cpp/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt rename to cpp/5_Domain_Specific/quasirandomGenerator/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/README.md b/cpp/5_Domain_Specific/quasirandomGenerator/README.md similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/README.md rename to cpp/5_Domain_Specific/quasirandomGenerator/README.md diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator.cpp b/cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator.cpp similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator.cpp rename to cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator.cpp diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_common.h b/cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_common.h similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_common.h rename to cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_common.h diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_gold.cpp b/cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_gold.cpp rename to cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_gold.cpp diff --git a/Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_kernel.cu b/cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_kernel.cu rename to cpp/5_Domain_Specific/quasirandomGenerator/quasirandomGenerator_kernel.cu diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/extensions.json b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/extensions.json rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/README.md diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator.cpp b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator.cpp similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator.cpp rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator.cpp diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_common.h b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_common.h similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_common.h rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_common.h diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gold.cpp b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gold.cpp similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gold.cpp rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gold.cpp diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gpu.cuh b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gpu.cuh similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gpu.cuh rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_gpu.cuh diff --git a/Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_kernel.cu b/cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_kernel.cu rename to cpp/5_Domain_Specific/quasirandomGenerator_nvrtc/quasirandomGenerator_kernel.cu diff --git a/Samples/5_Domain_Specific/recursiveGaussian/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/recursiveGaussian/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/recursiveGaussian/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/recursiveGaussian/.vscode/extensions.json b/cpp/5_Domain_Specific/recursiveGaussian/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/.vscode/extensions.json rename to cpp/5_Domain_Specific/recursiveGaussian/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt b/cpp/5_Domain_Specific/recursiveGaussian/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/CMakeLists.txt rename to cpp/5_Domain_Specific/recursiveGaussian/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/recursiveGaussian/README.md b/cpp/5_Domain_Specific/recursiveGaussian/README.md similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/README.md rename to cpp/5_Domain_Specific/recursiveGaussian/README.md diff --git a/Samples/5_Domain_Specific/recursiveGaussian/data/ref_10.ppm b/cpp/5_Domain_Specific/recursiveGaussian/data/ref_10.ppm similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/data/ref_10.ppm rename to cpp/5_Domain_Specific/recursiveGaussian/data/ref_10.ppm diff --git a/Samples/5_Domain_Specific/recursiveGaussian/data/ref_14.ppm b/cpp/5_Domain_Specific/recursiveGaussian/data/ref_14.ppm similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/data/ref_14.ppm rename to cpp/5_Domain_Specific/recursiveGaussian/data/ref_14.ppm diff --git a/Samples/5_Domain_Specific/recursiveGaussian/data/ref_18.ppm b/cpp/5_Domain_Specific/recursiveGaussian/data/ref_18.ppm similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/data/ref_18.ppm rename to cpp/5_Domain_Specific/recursiveGaussian/data/ref_18.ppm diff --git a/Samples/5_Domain_Specific/recursiveGaussian/data/ref_22.ppm b/cpp/5_Domain_Specific/recursiveGaussian/data/ref_22.ppm similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/data/ref_22.ppm rename to cpp/5_Domain_Specific/recursiveGaussian/data/ref_22.ppm diff --git a/Samples/5_Domain_Specific/recursiveGaussian/data/teapot512.ppm b/cpp/5_Domain_Specific/recursiveGaussian/data/teapot512.ppm similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/data/teapot512.ppm rename to cpp/5_Domain_Specific/recursiveGaussian/data/teapot512.ppm diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian.cpp b/cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian.cpp similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian.cpp rename to cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian.cpp diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_cuda.cu b/cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_cuda.cu rename to cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian_cuda.cu diff --git a/Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_kernel.cuh b/cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/recursiveGaussian/recursiveGaussian_kernel.cuh rename to cpp/5_Domain_Specific/recursiveGaussian/recursiveGaussian_kernel.cuh diff --git a/Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt b/cpp/5_Domain_Specific/simpleD3D11/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleD3D11/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleD3D11/README.md b/cpp/5_Domain_Specific/simpleD3D11/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/README.md rename to cpp/5_Domain_Specific/simpleD3D11/README.md diff --git a/Samples/5_Domain_Specific/simpleD3D11/ShaderStructs.h b/cpp/5_Domain_Specific/simpleD3D11/ShaderStructs.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/ShaderStructs.h rename to cpp/5_Domain_Specific/simpleD3D11/ShaderStructs.h diff --git a/Samples/5_Domain_Specific/simpleD3D11/data/ref_simpleD3D11.ppm b/cpp/5_Domain_Specific/simpleD3D11/data/ref_simpleD3D11.ppm similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/data/ref_simpleD3D11.ppm rename to cpp/5_Domain_Specific/simpleD3D11/data/ref_simpleD3D11.ppm diff --git a/Samples/5_Domain_Specific/simpleD3D11/simpleD3D11.cpp b/cpp/5_Domain_Specific/simpleD3D11/simpleD3D11.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/simpleD3D11.cpp rename to cpp/5_Domain_Specific/simpleD3D11/simpleD3D11.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D11/sinewave_cuda.cu b/cpp/5_Domain_Specific/simpleD3D11/sinewave_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/sinewave_cuda.cu rename to cpp/5_Domain_Specific/simpleD3D11/sinewave_cuda.cu diff --git a/Samples/5_Domain_Specific/simpleD3D11/sinewave_cuda.h b/cpp/5_Domain_Specific/simpleD3D11/sinewave_cuda.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11/sinewave_cuda.h rename to cpp/5_Domain_Specific/simpleD3D11/sinewave_cuda.h diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt b/cpp/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleD3D11Texture/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/README.md b/cpp/5_Domain_Specific/simpleD3D11Texture/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/README.md rename to cpp/5_Domain_Specific/simpleD3D11Texture/README.md diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/d3dx11effect/d3dx11effect.h b/cpp/5_Domain_Specific/simpleD3D11Texture/d3dx11effect/d3dx11effect.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/d3dx11effect/d3dx11effect.h rename to cpp/5_Domain_Specific/simpleD3D11Texture/d3dx11effect/d3dx11effect.h diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/data/ref_simpleD3D11Texture.ppm b/cpp/5_Domain_Specific/simpleD3D11Texture/data/ref_simpleD3D11Texture.ppm similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/data/ref_simpleD3D11Texture.ppm rename to cpp/5_Domain_Specific/simpleD3D11Texture/data/ref_simpleD3D11Texture.ppm diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture.cpp b/cpp/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture.cpp rename to cpp/5_Domain_Specific/simpleD3D11Texture/simpleD3D11Texture.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/texture_2d.cu b/cpp/5_Domain_Specific/simpleD3D11Texture/texture_2d.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/texture_2d.cu rename to cpp/5_Domain_Specific/simpleD3D11Texture/texture_2d.cu diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/texture_3d.cu b/cpp/5_Domain_Specific/simpleD3D11Texture/texture_3d.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/texture_3d.cu rename to cpp/5_Domain_Specific/simpleD3D11Texture/texture_3d.cu diff --git a/Samples/5_Domain_Specific/simpleD3D11Texture/texture_cube.cu b/cpp/5_Domain_Specific/simpleD3D11Texture/texture_cube.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D11Texture/texture_cube.cu rename to cpp/5_Domain_Specific/simpleD3D11Texture/texture_cube.cu diff --git a/Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt b/cpp/5_Domain_Specific/simpleD3D12/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleD3D12/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleD3D12/DX12CudaSample.cpp b/cpp/5_Domain_Specific/simpleD3D12/DX12CudaSample.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/DX12CudaSample.cpp rename to cpp/5_Domain_Specific/simpleD3D12/DX12CudaSample.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D12/DX12CudaSample.h b/cpp/5_Domain_Specific/simpleD3D12/DX12CudaSample.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/DX12CudaSample.h rename to cpp/5_Domain_Specific/simpleD3D12/DX12CudaSample.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/DXSampleHelper.h b/cpp/5_Domain_Specific/simpleD3D12/DXSampleHelper.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/DXSampleHelper.h rename to cpp/5_Domain_Specific/simpleD3D12/DXSampleHelper.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/Main.cpp b/cpp/5_Domain_Specific/simpleD3D12/Main.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/Main.cpp rename to cpp/5_Domain_Specific/simpleD3D12/Main.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D12/README.md b/cpp/5_Domain_Specific/simpleD3D12/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/README.md rename to cpp/5_Domain_Specific/simpleD3D12/README.md diff --git a/Samples/5_Domain_Specific/simpleD3D12/ShaderStructs.h b/cpp/5_Domain_Specific/simpleD3D12/ShaderStructs.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/ShaderStructs.h rename to cpp/5_Domain_Specific/simpleD3D12/ShaderStructs.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/Win32Application.cpp b/cpp/5_Domain_Specific/simpleD3D12/Win32Application.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/Win32Application.cpp rename to cpp/5_Domain_Specific/simpleD3D12/Win32Application.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D12/Win32Application.h b/cpp/5_Domain_Specific/simpleD3D12/Win32Application.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/Win32Application.h rename to cpp/5_Domain_Specific/simpleD3D12/Win32Application.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/d3dx12.h b/cpp/5_Domain_Specific/simpleD3D12/d3dx12.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/d3dx12.h rename to cpp/5_Domain_Specific/simpleD3D12/d3dx12.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/shaders.hlsl b/cpp/5_Domain_Specific/simpleD3D12/shaders.hlsl similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/shaders.hlsl rename to cpp/5_Domain_Specific/simpleD3D12/shaders.hlsl diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12.cpp b/cpp/5_Domain_Specific/simpleD3D12/simpleD3D12.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/simpleD3D12.cpp rename to cpp/5_Domain_Specific/simpleD3D12/simpleD3D12.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D12/simpleD3D12.h b/cpp/5_Domain_Specific/simpleD3D12/simpleD3D12.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/simpleD3D12.h rename to cpp/5_Domain_Specific/simpleD3D12/simpleD3D12.h diff --git a/Samples/5_Domain_Specific/simpleD3D12/sinewave_cuda.cu b/cpp/5_Domain_Specific/simpleD3D12/sinewave_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/sinewave_cuda.cu rename to cpp/5_Domain_Specific/simpleD3D12/sinewave_cuda.cu diff --git a/Samples/5_Domain_Specific/simpleD3D12/stdafx.cpp b/cpp/5_Domain_Specific/simpleD3D12/stdafx.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/stdafx.cpp rename to cpp/5_Domain_Specific/simpleD3D12/stdafx.cpp diff --git a/Samples/5_Domain_Specific/simpleD3D12/stdafx.h b/cpp/5_Domain_Specific/simpleD3D12/stdafx.h similarity index 100% rename from Samples/5_Domain_Specific/simpleD3D12/stdafx.h rename to cpp/5_Domain_Specific/simpleD3D12/stdafx.h diff --git a/Samples/5_Domain_Specific/simpleGL/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/simpleGL/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/simpleGL/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/simpleGL/.vscode/extensions.json b/cpp/5_Domain_Specific/simpleGL/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/.vscode/extensions.json rename to cpp/5_Domain_Specific/simpleGL/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/simpleGL/CMakeLists.txt b/cpp/5_Domain_Specific/simpleGL/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleGL/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleGL/README.md b/cpp/5_Domain_Specific/simpleGL/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/README.md rename to cpp/5_Domain_Specific/simpleGL/README.md diff --git a/Samples/5_Domain_Specific/simpleGL/data/ref_simpleGL.bin b/cpp/5_Domain_Specific/simpleGL/data/ref_simpleGL.bin similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/data/ref_simpleGL.bin rename to cpp/5_Domain_Specific/simpleGL/data/ref_simpleGL.bin diff --git a/Samples/5_Domain_Specific/simpleGL/simpleGL.cu b/cpp/5_Domain_Specific/simpleGL/simpleGL.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleGL/simpleGL.cu rename to cpp/5_Domain_Specific/simpleGL/simpleGL.cu diff --git a/Samples/5_Domain_Specific/simpleVulkan/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/simpleVulkan/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/simpleVulkan/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/simpleVulkan/.vscode/extensions.json b/cpp/5_Domain_Specific/simpleVulkan/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/.vscode/extensions.json rename to cpp/5_Domain_Specific/simpleVulkan/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/simpleVulkan/Build_instructions.txt b/cpp/5_Domain_Specific/simpleVulkan/Build_instructions.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/Build_instructions.txt rename to cpp/5_Domain_Specific/simpleVulkan/Build_instructions.txt diff --git a/Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt b/cpp/5_Domain_Specific/simpleVulkan/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleVulkan/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleVulkan/README.md b/cpp/5_Domain_Specific/simpleVulkan/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/README.md rename to cpp/5_Domain_Specific/simpleVulkan/README.md diff --git a/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu b/cpp/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu rename to cpp/5_Domain_Specific/simpleVulkan/SineWaveSimulation.cu diff --git a/Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.h b/cpp/5_Domain_Specific/simpleVulkan/SineWaveSimulation.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/SineWaveSimulation.h rename to cpp/5_Domain_Specific/simpleVulkan/SineWaveSimulation.h diff --git a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp b/cpp/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp rename to cpp/5_Domain_Specific/simpleVulkan/VulkanBaseApp.cpp diff --git a/Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h b/cpp/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h rename to cpp/5_Domain_Specific/simpleVulkan/VulkanBaseApp.h diff --git a/Samples/5_Domain_Specific/simpleVulkan/frag.spv b/cpp/5_Domain_Specific/simpleVulkan/frag.spv similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/frag.spv rename to cpp/5_Domain_Specific/simpleVulkan/frag.spv diff --git a/Samples/5_Domain_Specific/simpleVulkan/linmath.h b/cpp/5_Domain_Specific/simpleVulkan/linmath.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/linmath.h rename to cpp/5_Domain_Specific/simpleVulkan/linmath.h diff --git a/Samples/5_Domain_Specific/simpleVulkan/main.cpp b/cpp/5_Domain_Specific/simpleVulkan/main.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/main.cpp rename to cpp/5_Domain_Specific/simpleVulkan/main.cpp diff --git a/Samples/5_Domain_Specific/simpleVulkan/sinewave.frag b/cpp/5_Domain_Specific/simpleVulkan/sinewave.frag similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/sinewave.frag rename to cpp/5_Domain_Specific/simpleVulkan/sinewave.frag diff --git a/Samples/5_Domain_Specific/simpleVulkan/sinewave.vert b/cpp/5_Domain_Specific/simpleVulkan/sinewave.vert similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/sinewave.vert rename to cpp/5_Domain_Specific/simpleVulkan/sinewave.vert diff --git a/Samples/5_Domain_Specific/simpleVulkan/vert.spv b/cpp/5_Domain_Specific/simpleVulkan/vert.spv similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkan/vert.spv rename to cpp/5_Domain_Specific/simpleVulkan/vert.spv diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/simpleVulkanMMAP/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/simpleVulkanMMAP/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/extensions.json b/cpp/5_Domain_Specific/simpleVulkanMMAP/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/.vscode/extensions.json rename to cpp/5_Domain_Specific/simpleVulkanMMAP/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/Build_instructions.txt b/cpp/5_Domain_Specific/simpleVulkanMMAP/Build_instructions.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/Build_instructions.txt rename to cpp/5_Domain_Specific/simpleVulkanMMAP/Build_instructions.txt diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt b/cpp/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt rename to cpp/5_Domain_Specific/simpleVulkanMMAP/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu b/cpp/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu rename to cpp/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.cu diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h b/cpp/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h rename to cpp/5_Domain_Specific/simpleVulkanMMAP/MonteCarloPi.h diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/README.md b/cpp/5_Domain_Specific/simpleVulkanMMAP/README.md similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/README.md rename to cpp/5_Domain_Specific/simpleVulkanMMAP/README.md diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.cpp b/cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.cpp rename to cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.cpp diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h b/cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h rename to cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanBaseApp.h diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanCudaInterop.h b/cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanCudaInterop.h similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/VulkanCudaInterop.h rename to cpp/5_Domain_Specific/simpleVulkanMMAP/VulkanCudaInterop.h diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/frag.spv b/cpp/5_Domain_Specific/simpleVulkanMMAP/frag.spv similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/frag.spv rename to cpp/5_Domain_Specific/simpleVulkanMMAP/frag.spv diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp b/cpp/5_Domain_Specific/simpleVulkanMMAP/main.cpp similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/main.cpp rename to cpp/5_Domain_Specific/simpleVulkanMMAP/main.cpp diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/montecarlo.frag b/cpp/5_Domain_Specific/simpleVulkanMMAP/montecarlo.frag similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/montecarlo.frag rename to cpp/5_Domain_Specific/simpleVulkanMMAP/montecarlo.frag diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/montecarlo.vert b/cpp/5_Domain_Specific/simpleVulkanMMAP/montecarlo.vert similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/montecarlo.vert rename to cpp/5_Domain_Specific/simpleVulkanMMAP/montecarlo.vert diff --git a/Samples/5_Domain_Specific/simpleVulkanMMAP/vert.spv b/cpp/5_Domain_Specific/simpleVulkanMMAP/vert.spv similarity index 100% rename from Samples/5_Domain_Specific/simpleVulkanMMAP/vert.spv rename to cpp/5_Domain_Specific/simpleVulkanMMAP/vert.spv diff --git a/Samples/5_Domain_Specific/smokeParticles/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/smokeParticles/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/smokeParticles/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/smokeParticles/.vscode/extensions.json b/cpp/5_Domain_Specific/smokeParticles/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/.vscode/extensions.json rename to cpp/5_Domain_Specific/smokeParticles/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt b/cpp/5_Domain_Specific/smokeParticles/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/CMakeLists.txt rename to cpp/5_Domain_Specific/smokeParticles/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/smokeParticles/GLSLProgram.cpp b/cpp/5_Domain_Specific/smokeParticles/GLSLProgram.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/GLSLProgram.cpp rename to cpp/5_Domain_Specific/smokeParticles/GLSLProgram.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/GLSLProgram.h b/cpp/5_Domain_Specific/smokeParticles/GLSLProgram.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/GLSLProgram.h rename to cpp/5_Domain_Specific/smokeParticles/GLSLProgram.h diff --git a/Samples/5_Domain_Specific/smokeParticles/GpuArray.h b/cpp/5_Domain_Specific/smokeParticles/GpuArray.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/GpuArray.h rename to cpp/5_Domain_Specific/smokeParticles/GpuArray.h diff --git a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem.cpp b/cpp/5_Domain_Specific/smokeParticles/ParticleSystem.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/ParticleSystem.cpp rename to cpp/5_Domain_Specific/smokeParticles/ParticleSystem.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem.cuh b/cpp/5_Domain_Specific/smokeParticles/ParticleSystem.cuh similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/ParticleSystem.cuh rename to cpp/5_Domain_Specific/smokeParticles/ParticleSystem.cuh diff --git a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem.h b/cpp/5_Domain_Specific/smokeParticles/ParticleSystem.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/ParticleSystem.h rename to cpp/5_Domain_Specific/smokeParticles/ParticleSystem.h diff --git a/Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu b/cpp/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu rename to cpp/5_Domain_Specific/smokeParticles/ParticleSystem_cuda.cu diff --git a/Samples/5_Domain_Specific/smokeParticles/README.md b/cpp/5_Domain_Specific/smokeParticles/README.md similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/README.md rename to cpp/5_Domain_Specific/smokeParticles/README.md diff --git a/Samples/5_Domain_Specific/smokeParticles/SmokeRenderer.cpp b/cpp/5_Domain_Specific/smokeParticles/SmokeRenderer.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/SmokeRenderer.cpp rename to cpp/5_Domain_Specific/smokeParticles/SmokeRenderer.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/SmokeRenderer.h b/cpp/5_Domain_Specific/smokeParticles/SmokeRenderer.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/SmokeRenderer.h rename to cpp/5_Domain_Specific/smokeParticles/SmokeRenderer.h diff --git a/Samples/5_Domain_Specific/smokeParticles/SmokeShaders.cpp b/cpp/5_Domain_Specific/smokeParticles/SmokeShaders.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/SmokeShaders.cpp rename to cpp/5_Domain_Specific/smokeParticles/SmokeShaders.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/SmokeShaders.h b/cpp/5_Domain_Specific/smokeParticles/SmokeShaders.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/SmokeShaders.h rename to cpp/5_Domain_Specific/smokeParticles/SmokeShaders.h diff --git a/Samples/5_Domain_Specific/smokeParticles/data/floortile.ppm b/cpp/5_Domain_Specific/smokeParticles/data/floortile.ppm similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/data/floortile.ppm rename to cpp/5_Domain_Specific/smokeParticles/data/floortile.ppm diff --git a/Samples/5_Domain_Specific/smokeParticles/data/ref_smokePart_pos.bin b/cpp/5_Domain_Specific/smokeParticles/data/ref_smokePart_pos.bin similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/data/ref_smokePart_pos.bin rename to cpp/5_Domain_Specific/smokeParticles/data/ref_smokePart_pos.bin diff --git a/Samples/5_Domain_Specific/smokeParticles/data/ref_smokePart_vel.bin b/cpp/5_Domain_Specific/smokeParticles/data/ref_smokePart_vel.bin similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/data/ref_smokePart_vel.bin rename to cpp/5_Domain_Specific/smokeParticles/data/ref_smokePart_vel.bin diff --git a/Samples/5_Domain_Specific/smokeParticles/doc/screenshot_lg.png b/cpp/5_Domain_Specific/smokeParticles/doc/screenshot_lg.png similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/doc/screenshot_lg.png rename to cpp/5_Domain_Specific/smokeParticles/doc/screenshot_lg.png diff --git a/Samples/5_Domain_Specific/smokeParticles/doc/screenshot_md.png b/cpp/5_Domain_Specific/smokeParticles/doc/screenshot_md.png similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/doc/screenshot_md.png rename to cpp/5_Domain_Specific/smokeParticles/doc/screenshot_md.png diff --git a/Samples/5_Domain_Specific/smokeParticles/doc/screenshot_sm.png b/cpp/5_Domain_Specific/smokeParticles/doc/screenshot_sm.png similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/doc/screenshot_sm.png rename to cpp/5_Domain_Specific/smokeParticles/doc/screenshot_sm.png diff --git a/Samples/5_Domain_Specific/smokeParticles/doc/smokeParticles.doc b/cpp/5_Domain_Specific/smokeParticles/doc/smokeParticles.doc similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/doc/smokeParticles.doc rename to cpp/5_Domain_Specific/smokeParticles/doc/smokeParticles.doc diff --git a/Samples/5_Domain_Specific/smokeParticles/doc/smokeParticles.pdf b/cpp/5_Domain_Specific/smokeParticles/doc/smokeParticles.pdf similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/doc/smokeParticles.pdf rename to cpp/5_Domain_Specific/smokeParticles/doc/smokeParticles.pdf diff --git a/Samples/5_Domain_Specific/smokeParticles/framebufferObject.cpp b/cpp/5_Domain_Specific/smokeParticles/framebufferObject.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/framebufferObject.cpp rename to cpp/5_Domain_Specific/smokeParticles/framebufferObject.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/framebufferObject.h b/cpp/5_Domain_Specific/smokeParticles/framebufferObject.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/framebufferObject.h rename to cpp/5_Domain_Specific/smokeParticles/framebufferObject.h diff --git a/Samples/5_Domain_Specific/smokeParticles/nvMath.h b/cpp/5_Domain_Specific/smokeParticles/nvMath.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/nvMath.h rename to cpp/5_Domain_Specific/smokeParticles/nvMath.h diff --git a/Samples/5_Domain_Specific/smokeParticles/nvMatrix.h b/cpp/5_Domain_Specific/smokeParticles/nvMatrix.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/nvMatrix.h rename to cpp/5_Domain_Specific/smokeParticles/nvMatrix.h diff --git a/Samples/5_Domain_Specific/smokeParticles/nvQuaternion.h b/cpp/5_Domain_Specific/smokeParticles/nvQuaternion.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/nvQuaternion.h rename to cpp/5_Domain_Specific/smokeParticles/nvQuaternion.h diff --git a/Samples/5_Domain_Specific/smokeParticles/nvVector.h b/cpp/5_Domain_Specific/smokeParticles/nvVector.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/nvVector.h rename to cpp/5_Domain_Specific/smokeParticles/nvVector.h diff --git a/Samples/5_Domain_Specific/smokeParticles/particleDemo.cpp b/cpp/5_Domain_Specific/smokeParticles/particleDemo.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/particleDemo.cpp rename to cpp/5_Domain_Specific/smokeParticles/particleDemo.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/particles_kernel.cuh b/cpp/5_Domain_Specific/smokeParticles/particles_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/particles_kernel.cuh rename to cpp/5_Domain_Specific/smokeParticles/particles_kernel.cuh diff --git a/Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh b/cpp/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh rename to cpp/5_Domain_Specific/smokeParticles/particles_kernel_device.cuh diff --git a/Samples/5_Domain_Specific/smokeParticles/renderbuffer.cpp b/cpp/5_Domain_Specific/smokeParticles/renderbuffer.cpp similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/renderbuffer.cpp rename to cpp/5_Domain_Specific/smokeParticles/renderbuffer.cpp diff --git a/Samples/5_Domain_Specific/smokeParticles/renderbuffer.h b/cpp/5_Domain_Specific/smokeParticles/renderbuffer.h similarity index 100% rename from Samples/5_Domain_Specific/smokeParticles/renderbuffer.h rename to cpp/5_Domain_Specific/smokeParticles/renderbuffer.h diff --git a/Samples/5_Domain_Specific/stereoDisparity/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/stereoDisparity/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/stereoDisparity/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/stereoDisparity/.vscode/extensions.json b/cpp/5_Domain_Specific/stereoDisparity/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/.vscode/extensions.json rename to cpp/5_Domain_Specific/stereoDisparity/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt b/cpp/5_Domain_Specific/stereoDisparity/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/CMakeLists.txt rename to cpp/5_Domain_Specific/stereoDisparity/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/stereoDisparity/README.md b/cpp/5_Domain_Specific/stereoDisparity/README.md similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/README.md rename to cpp/5_Domain_Specific/stereoDisparity/README.md diff --git a/Samples/5_Domain_Specific/stereoDisparity/data/stereo.im0.640x533.ppm b/cpp/5_Domain_Specific/stereoDisparity/data/stereo.im0.640x533.ppm similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/data/stereo.im0.640x533.ppm rename to cpp/5_Domain_Specific/stereoDisparity/data/stereo.im0.640x533.ppm diff --git a/Samples/5_Domain_Specific/stereoDisparity/data/stereo.im1.640x533.ppm b/cpp/5_Domain_Specific/stereoDisparity/data/stereo.im1.640x533.ppm similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/data/stereo.im1.640x533.ppm rename to cpp/5_Domain_Specific/stereoDisparity/data/stereo.im1.640x533.ppm diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity.cu b/cpp/5_Domain_Specific/stereoDisparity/stereoDisparity.cu similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/stereoDisparity.cu rename to cpp/5_Domain_Specific/stereoDisparity/stereoDisparity.cu diff --git a/Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_kernel.cuh b/cpp/5_Domain_Specific/stereoDisparity/stereoDisparity_kernel.cuh similarity index 100% rename from Samples/5_Domain_Specific/stereoDisparity/stereoDisparity_kernel.cuh rename to cpp/5_Domain_Specific/stereoDisparity/stereoDisparity_kernel.cuh diff --git a/Samples/5_Domain_Specific/volumeFiltering/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/volumeFiltering/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/volumeFiltering/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/volumeFiltering/.vscode/extensions.json b/cpp/5_Domain_Specific/volumeFiltering/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/.vscode/extensions.json rename to cpp/5_Domain_Specific/volumeFiltering/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt b/cpp/5_Domain_Specific/volumeFiltering/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/CMakeLists.txt rename to cpp/5_Domain_Specific/volumeFiltering/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/volumeFiltering/README.md b/cpp/5_Domain_Specific/volumeFiltering/README.md similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/README.md rename to cpp/5_Domain_Specific/volumeFiltering/README.md diff --git a/Samples/5_Domain_Specific/volumeFiltering/data/Bucky.raw b/cpp/5_Domain_Specific/volumeFiltering/data/Bucky.raw similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/data/Bucky.raw rename to cpp/5_Domain_Specific/volumeFiltering/data/Bucky.raw diff --git a/Samples/5_Domain_Specific/volumeFiltering/data/ref_volumefilter.ppm b/cpp/5_Domain_Specific/volumeFiltering/data/ref_volumefilter.ppm similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/data/ref_volumefilter.ppm rename to cpp/5_Domain_Specific/volumeFiltering/data/ref_volumefilter.ppm diff --git a/Samples/5_Domain_Specific/volumeFiltering/doc/sshot_lg.JPG b/cpp/5_Domain_Specific/volumeFiltering/doc/sshot_lg.JPG similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/doc/sshot_lg.JPG rename to cpp/5_Domain_Specific/volumeFiltering/doc/sshot_lg.JPG diff --git a/Samples/5_Domain_Specific/volumeFiltering/doc/sshot_md.JPG b/cpp/5_Domain_Specific/volumeFiltering/doc/sshot_md.JPG similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/doc/sshot_md.JPG rename to cpp/5_Domain_Specific/volumeFiltering/doc/sshot_md.JPG diff --git a/Samples/5_Domain_Specific/volumeFiltering/doc/sshot_sm.JPG b/cpp/5_Domain_Specific/volumeFiltering/doc/sshot_sm.JPG similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/doc/sshot_sm.JPG rename to cpp/5_Domain_Specific/volumeFiltering/doc/sshot_sm.JPG diff --git a/Samples/5_Domain_Specific/volumeFiltering/volume.cpp b/cpp/5_Domain_Specific/volumeFiltering/volume.cpp similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volume.cpp rename to cpp/5_Domain_Specific/volumeFiltering/volume.cpp diff --git a/Samples/5_Domain_Specific/volumeFiltering/volume.h b/cpp/5_Domain_Specific/volumeFiltering/volume.h similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volume.h rename to cpp/5_Domain_Specific/volumeFiltering/volume.h diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFilter.h b/cpp/5_Domain_Specific/volumeFiltering/volumeFilter.h similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volumeFilter.h rename to cpp/5_Domain_Specific/volumeFiltering/volumeFilter.h diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFilter_kernel.cu b/cpp/5_Domain_Specific/volumeFiltering/volumeFilter_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volumeFilter_kernel.cu rename to cpp/5_Domain_Specific/volumeFiltering/volumeFilter_kernel.cu diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeFiltering.cpp b/cpp/5_Domain_Specific/volumeFiltering/volumeFiltering.cpp similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volumeFiltering.cpp rename to cpp/5_Domain_Specific/volumeFiltering/volumeFiltering.cpp diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeRender.h b/cpp/5_Domain_Specific/volumeFiltering/volumeRender.h similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volumeRender.h rename to cpp/5_Domain_Specific/volumeFiltering/volumeRender.h diff --git a/Samples/5_Domain_Specific/volumeFiltering/volumeRender_kernel.cu b/cpp/5_Domain_Specific/volumeFiltering/volumeRender_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/volumeFiltering/volumeRender_kernel.cu rename to cpp/5_Domain_Specific/volumeFiltering/volumeRender_kernel.cu diff --git a/Samples/5_Domain_Specific/volumeRender/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/volumeRender/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/volumeRender/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/volumeRender/.vscode/extensions.json b/cpp/5_Domain_Specific/volumeRender/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/.vscode/extensions.json rename to cpp/5_Domain_Specific/volumeRender/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/volumeRender/CMakeLists.txt b/cpp/5_Domain_Specific/volumeRender/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/CMakeLists.txt rename to cpp/5_Domain_Specific/volumeRender/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/volumeRender/README.md b/cpp/5_Domain_Specific/volumeRender/README.md similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/README.md rename to cpp/5_Domain_Specific/volumeRender/README.md diff --git a/Samples/5_Domain_Specific/volumeRender/data/Bucky.raw b/cpp/5_Domain_Specific/volumeRender/data/Bucky.raw similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/data/Bucky.raw rename to cpp/5_Domain_Specific/volumeRender/data/Bucky.raw diff --git a/Samples/5_Domain_Specific/volumeRender/data/ref_volume.ppm b/cpp/5_Domain_Specific/volumeRender/data/ref_volume.ppm similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/data/ref_volume.ppm rename to cpp/5_Domain_Specific/volumeRender/data/ref_volume.ppm diff --git a/Samples/5_Domain_Specific/volumeRender/doc/sshot_lg.JPG b/cpp/5_Domain_Specific/volumeRender/doc/sshot_lg.JPG similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/doc/sshot_lg.JPG rename to cpp/5_Domain_Specific/volumeRender/doc/sshot_lg.JPG diff --git a/Samples/5_Domain_Specific/volumeRender/doc/sshot_md.jpg b/cpp/5_Domain_Specific/volumeRender/doc/sshot_md.jpg similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/doc/sshot_md.jpg rename to cpp/5_Domain_Specific/volumeRender/doc/sshot_md.jpg diff --git a/Samples/5_Domain_Specific/volumeRender/doc/sshot_sm.JPG b/cpp/5_Domain_Specific/volumeRender/doc/sshot_sm.JPG similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/doc/sshot_sm.JPG rename to cpp/5_Domain_Specific/volumeRender/doc/sshot_sm.JPG diff --git a/Samples/5_Domain_Specific/volumeRender/volume.ppm b/cpp/5_Domain_Specific/volumeRender/volume.ppm similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/volume.ppm rename to cpp/5_Domain_Specific/volumeRender/volume.ppm diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender.cpp b/cpp/5_Domain_Specific/volumeRender/volumeRender.cpp similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/volumeRender.cpp rename to cpp/5_Domain_Specific/volumeRender/volumeRender.cpp diff --git a/Samples/5_Domain_Specific/volumeRender/volumeRender_kernel.cu b/cpp/5_Domain_Specific/volumeRender/volumeRender_kernel.cu similarity index 100% rename from Samples/5_Domain_Specific/volumeRender/volumeRender_kernel.cu rename to cpp/5_Domain_Specific/volumeRender/volumeRender_kernel.cu diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/c_cpp_properties.json b/cpp/5_Domain_Specific/vulkanImageCUDA/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/c_cpp_properties.json rename to cpp/5_Domain_Specific/vulkanImageCUDA/.vscode/c_cpp_properties.json diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/extensions.json b/cpp/5_Domain_Specific/vulkanImageCUDA/.vscode/extensions.json similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/.vscode/extensions.json rename to cpp/5_Domain_Specific/vulkanImageCUDA/.vscode/extensions.json diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/Build_instructions.txt b/cpp/5_Domain_Specific/vulkanImageCUDA/Build_instructions.txt similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/Build_instructions.txt rename to cpp/5_Domain_Specific/vulkanImageCUDA/Build_instructions.txt diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt b/cpp/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt rename to cpp/5_Domain_Specific/vulkanImageCUDA/CMakeLists.txt diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/README.md b/cpp/5_Domain_Specific/vulkanImageCUDA/README.md similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/README.md rename to cpp/5_Domain_Specific/vulkanImageCUDA/README.md diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/frag.spv b/cpp/5_Domain_Specific/vulkanImageCUDA/frag.spv similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/frag.spv rename to cpp/5_Domain_Specific/vulkanImageCUDA/frag.spv diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/linmath.h b/cpp/5_Domain_Specific/vulkanImageCUDA/linmath.h similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/linmath.h rename to cpp/5_Domain_Specific/vulkanImageCUDA/linmath.h diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/shader.frag b/cpp/5_Domain_Specific/vulkanImageCUDA/shader.frag similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/shader.frag rename to cpp/5_Domain_Specific/vulkanImageCUDA/shader.frag diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/shader.vert b/cpp/5_Domain_Specific/vulkanImageCUDA/shader.vert similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/shader.vert rename to cpp/5_Domain_Specific/vulkanImageCUDA/shader.vert diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/teapot1024.ppm b/cpp/5_Domain_Specific/vulkanImageCUDA/teapot1024.ppm similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/teapot1024.ppm rename to cpp/5_Domain_Specific/vulkanImageCUDA/teapot1024.ppm diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vert.spv b/cpp/5_Domain_Specific/vulkanImageCUDA/vert.spv similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/vert.spv rename to cpp/5_Domain_Specific/vulkanImageCUDA/vert.spv diff --git a/Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu b/cpp/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu similarity index 100% rename from Samples/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu rename to cpp/5_Domain_Specific/vulkanImageCUDA/vulkanImageCUDA.cu diff --git a/Samples/6_Performance/CMakeLists.txt b/cpp/6_Performance/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/CMakeLists.txt rename to cpp/6_Performance/CMakeLists.txt diff --git a/Samples/6_Performance/LargeKernelParameter/.vscode/c_cpp_properties.json b/cpp/6_Performance/LargeKernelParameter/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/6_Performance/LargeKernelParameter/.vscode/c_cpp_properties.json rename to cpp/6_Performance/LargeKernelParameter/.vscode/c_cpp_properties.json diff --git a/Samples/6_Performance/LargeKernelParameter/.vscode/extensions.json b/cpp/6_Performance/LargeKernelParameter/.vscode/extensions.json similarity index 100% rename from Samples/6_Performance/LargeKernelParameter/.vscode/extensions.json rename to cpp/6_Performance/LargeKernelParameter/.vscode/extensions.json diff --git a/Samples/6_Performance/LargeKernelParameter/CMakeLists.txt b/cpp/6_Performance/LargeKernelParameter/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/LargeKernelParameter/CMakeLists.txt rename to cpp/6_Performance/LargeKernelParameter/CMakeLists.txt diff --git a/Samples/6_Performance/LargeKernelParameter/LargeKernelParameter.cu b/cpp/6_Performance/LargeKernelParameter/LargeKernelParameter.cu similarity index 100% rename from Samples/6_Performance/LargeKernelParameter/LargeKernelParameter.cu rename to cpp/6_Performance/LargeKernelParameter/LargeKernelParameter.cu diff --git a/Samples/6_Performance/LargeKernelParameter/README.md b/cpp/6_Performance/LargeKernelParameter/README.md similarity index 100% rename from Samples/6_Performance/LargeKernelParameter/README.md rename to cpp/6_Performance/LargeKernelParameter/README.md diff --git a/Samples/6_Performance/README.md b/cpp/6_Performance/README.md similarity index 100% rename from Samples/6_Performance/README.md rename to cpp/6_Performance/README.md diff --git a/Samples/6_Performance/UnifiedMemoryPerf/.vscode/c_cpp_properties.json b/cpp/6_Performance/UnifiedMemoryPerf/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/.vscode/c_cpp_properties.json rename to cpp/6_Performance/UnifiedMemoryPerf/.vscode/c_cpp_properties.json diff --git a/Samples/6_Performance/UnifiedMemoryPerf/.vscode/extensions.json b/cpp/6_Performance/UnifiedMemoryPerf/.vscode/extensions.json similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/.vscode/extensions.json rename to cpp/6_Performance/UnifiedMemoryPerf/.vscode/extensions.json diff --git a/Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt b/cpp/6_Performance/UnifiedMemoryPerf/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/CMakeLists.txt rename to cpp/6_Performance/UnifiedMemoryPerf/CMakeLists.txt diff --git a/Samples/6_Performance/UnifiedMemoryPerf/README.md b/cpp/6_Performance/UnifiedMemoryPerf/README.md similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/README.md rename to cpp/6_Performance/UnifiedMemoryPerf/README.md diff --git a/Samples/6_Performance/UnifiedMemoryPerf/commonDefs.hpp b/cpp/6_Performance/UnifiedMemoryPerf/commonDefs.hpp similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/commonDefs.hpp rename to cpp/6_Performance/UnifiedMemoryPerf/commonDefs.hpp diff --git a/Samples/6_Performance/UnifiedMemoryPerf/commonKernels.cu b/cpp/6_Performance/UnifiedMemoryPerf/commonKernels.cu similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/commonKernels.cu rename to cpp/6_Performance/UnifiedMemoryPerf/commonKernels.cu diff --git a/Samples/6_Performance/UnifiedMemoryPerf/commonKernels.hpp b/cpp/6_Performance/UnifiedMemoryPerf/commonKernels.hpp similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/commonKernels.hpp rename to cpp/6_Performance/UnifiedMemoryPerf/commonKernels.hpp diff --git a/Samples/6_Performance/UnifiedMemoryPerf/helperFunctions.cpp b/cpp/6_Performance/UnifiedMemoryPerf/helperFunctions.cpp similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/helperFunctions.cpp rename to cpp/6_Performance/UnifiedMemoryPerf/helperFunctions.cpp diff --git a/Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu b/cpp/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu similarity index 100% rename from Samples/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu rename to cpp/6_Performance/UnifiedMemoryPerf/matrixMultiplyPerf.cu diff --git a/Samples/6_Performance/alignedTypes/.vscode/c_cpp_properties.json b/cpp/6_Performance/alignedTypes/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/6_Performance/alignedTypes/.vscode/c_cpp_properties.json rename to cpp/6_Performance/alignedTypes/.vscode/c_cpp_properties.json diff --git a/Samples/6_Performance/alignedTypes/.vscode/extensions.json b/cpp/6_Performance/alignedTypes/.vscode/extensions.json similarity index 100% rename from Samples/6_Performance/alignedTypes/.vscode/extensions.json rename to cpp/6_Performance/alignedTypes/.vscode/extensions.json diff --git a/Samples/6_Performance/alignedTypes/CMakeLists.txt b/cpp/6_Performance/alignedTypes/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/alignedTypes/CMakeLists.txt rename to cpp/6_Performance/alignedTypes/CMakeLists.txt diff --git a/Samples/6_Performance/alignedTypes/README.md b/cpp/6_Performance/alignedTypes/README.md similarity index 100% rename from Samples/6_Performance/alignedTypes/README.md rename to cpp/6_Performance/alignedTypes/README.md diff --git a/Samples/6_Performance/alignedTypes/alignedTypes.cu b/cpp/6_Performance/alignedTypes/alignedTypes.cu similarity index 100% rename from Samples/6_Performance/alignedTypes/alignedTypes.cu rename to cpp/6_Performance/alignedTypes/alignedTypes.cu diff --git a/Samples/6_Performance/alignedTypes/doc/alignedTypes.txt b/cpp/6_Performance/alignedTypes/doc/alignedTypes.txt similarity index 100% rename from Samples/6_Performance/alignedTypes/doc/alignedTypes.txt rename to cpp/6_Performance/alignedTypes/doc/alignedTypes.txt diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/.vscode/c_cpp_properties.json b/cpp/6_Performance/cudaGraphsPerfScaling/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/.vscode/c_cpp_properties.json rename to cpp/6_Performance/cudaGraphsPerfScaling/.vscode/c_cpp_properties.json diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/.vscode/extensions.json b/cpp/6_Performance/cudaGraphsPerfScaling/.vscode/extensions.json similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/.vscode/extensions.json rename to cpp/6_Performance/cudaGraphsPerfScaling/.vscode/extensions.json diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt b/cpp/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt rename to cpp/6_Performance/cudaGraphsPerfScaling/CMakeLists.txt diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/README.md b/cpp/6_Performance/cudaGraphsPerfScaling/README.md similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/README.md rename to cpp/6_Performance/cudaGraphsPerfScaling/README.md diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/cudaGraphPerfScaling.cu b/cpp/6_Performance/cudaGraphsPerfScaling/cudaGraphPerfScaling.cu similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/cudaGraphPerfScaling.cu rename to cpp/6_Performance/cudaGraphsPerfScaling/cudaGraphPerfScaling.cu diff --git a/Samples/6_Performance/cudaGraphsPerfScaling/dataCollection.bash b/cpp/6_Performance/cudaGraphsPerfScaling/dataCollection.bash similarity index 100% rename from Samples/6_Performance/cudaGraphsPerfScaling/dataCollection.bash rename to cpp/6_Performance/cudaGraphsPerfScaling/dataCollection.bash diff --git a/Samples/6_Performance/transpose/.vscode/c_cpp_properties.json b/cpp/6_Performance/transpose/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/6_Performance/transpose/.vscode/c_cpp_properties.json rename to cpp/6_Performance/transpose/.vscode/c_cpp_properties.json diff --git a/Samples/6_Performance/transpose/.vscode/extensions.json b/cpp/6_Performance/transpose/.vscode/extensions.json similarity index 100% rename from Samples/6_Performance/transpose/.vscode/extensions.json rename to cpp/6_Performance/transpose/.vscode/extensions.json diff --git a/Samples/6_Performance/transpose/CMakeLists.txt b/cpp/6_Performance/transpose/CMakeLists.txt similarity index 100% rename from Samples/6_Performance/transpose/CMakeLists.txt rename to cpp/6_Performance/transpose/CMakeLists.txt diff --git a/Samples/6_Performance/transpose/README.md b/cpp/6_Performance/transpose/README.md similarity index 100% rename from Samples/6_Performance/transpose/README.md rename to cpp/6_Performance/transpose/README.md diff --git a/Samples/6_Performance/transpose/doc/MatrixTranspose.docx b/cpp/6_Performance/transpose/doc/MatrixTranspose.docx similarity index 100% rename from Samples/6_Performance/transpose/doc/MatrixTranspose.docx rename to cpp/6_Performance/transpose/doc/MatrixTranspose.docx diff --git a/Samples/6_Performance/transpose/doc/MatrixTranspose.pdf b/cpp/6_Performance/transpose/doc/MatrixTranspose.pdf similarity index 100% rename from Samples/6_Performance/transpose/doc/MatrixTranspose.pdf rename to cpp/6_Performance/transpose/doc/MatrixTranspose.pdf diff --git a/Samples/6_Performance/transpose/transpose.cu b/cpp/6_Performance/transpose/transpose.cu similarity index 100% rename from Samples/6_Performance/transpose/transpose.cu rename to cpp/6_Performance/transpose/transpose.cu diff --git a/Samples/7_libNVVM/CMakeLists.txt b/cpp/7_libNVVM/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/CMakeLists.txt rename to cpp/7_libNVVM/CMakeLists.txt diff --git a/Samples/7_libNVVM/README.md b/cpp/7_libNVVM/README.md similarity index 100% rename from Samples/7_libNVVM/README.md rename to cpp/7_libNVVM/README.md diff --git a/Samples/7_libNVVM/common/include/DDSWriter.h b/cpp/7_libNVVM/common/include/DDSWriter.h similarity index 100% rename from Samples/7_libNVVM/common/include/DDSWriter.h rename to cpp/7_libNVVM/common/include/DDSWriter.h diff --git a/Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt b/cpp/7_libNVVM/cuda-c-linking/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/cuda-c-linking/CMakeLists.txt rename to cpp/7_libNVVM/cuda-c-linking/CMakeLists.txt diff --git a/Samples/7_libNVVM/cuda-c-linking/README.md b/cpp/7_libNVVM/cuda-c-linking/README.md similarity index 100% rename from Samples/7_libNVVM/cuda-c-linking/README.md rename to cpp/7_libNVVM/cuda-c-linking/README.md diff --git a/Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp b/cpp/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp similarity index 100% rename from Samples/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp rename to cpp/7_libNVVM/cuda-c-linking/cuda-c-linking.cpp diff --git a/Samples/7_libNVVM/cuda-c-linking/math-funcs.cu b/cpp/7_libNVVM/cuda-c-linking/math-funcs.cu similarity index 100% rename from Samples/7_libNVVM/cuda-c-linking/math-funcs.cu rename to cpp/7_libNVVM/cuda-c-linking/math-funcs.cu diff --git a/Samples/7_libNVVM/cuda-shared-memory/CMakeLists.txt b/cpp/7_libNVVM/cuda-shared-memory/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/cuda-shared-memory/CMakeLists.txt rename to cpp/7_libNVVM/cuda-shared-memory/CMakeLists.txt diff --git a/Samples/7_libNVVM/cuda-shared-memory/extern_shared_memory.ll b/cpp/7_libNVVM/cuda-shared-memory/extern_shared_memory.ll similarity index 100% rename from Samples/7_libNVVM/cuda-shared-memory/extern_shared_memory.ll rename to cpp/7_libNVVM/cuda-shared-memory/extern_shared_memory.ll diff --git a/Samples/7_libNVVM/cuda-shared-memory/shared_memory.ll b/cpp/7_libNVVM/cuda-shared-memory/shared_memory.ll similarity index 100% rename from Samples/7_libNVVM/cuda-shared-memory/shared_memory.ll rename to cpp/7_libNVVM/cuda-shared-memory/shared_memory.ll diff --git a/Samples/7_libNVVM/device-side-launch/CMakeLists.txt b/cpp/7_libNVVM/device-side-launch/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/device-side-launch/CMakeLists.txt rename to cpp/7_libNVVM/device-side-launch/CMakeLists.txt diff --git a/Samples/7_libNVVM/device-side-launch/README.md b/cpp/7_libNVVM/device-side-launch/README.md similarity index 100% rename from Samples/7_libNVVM/device-side-launch/README.md rename to cpp/7_libNVVM/device-side-launch/README.md diff --git a/Samples/7_libNVVM/device-side-launch/dsl-gpu64.ll b/cpp/7_libNVVM/device-side-launch/dsl-gpu64.ll similarity index 100% rename from Samples/7_libNVVM/device-side-launch/dsl-gpu64.ll rename to cpp/7_libNVVM/device-side-launch/dsl-gpu64.ll diff --git a/Samples/7_libNVVM/device-side-launch/dsl.c b/cpp/7_libNVVM/device-side-launch/dsl.c similarity index 100% rename from Samples/7_libNVVM/device-side-launch/dsl.c rename to cpp/7_libNVVM/device-side-launch/dsl.c diff --git a/Samples/7_libNVVM/ptxgen/CMakeLists.txt b/cpp/7_libNVVM/ptxgen/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/ptxgen/CMakeLists.txt rename to cpp/7_libNVVM/ptxgen/CMakeLists.txt diff --git a/Samples/7_libNVVM/ptxgen/README.md b/cpp/7_libNVVM/ptxgen/README.md similarity index 100% rename from Samples/7_libNVVM/ptxgen/README.md rename to cpp/7_libNVVM/ptxgen/README.md diff --git a/Samples/7_libNVVM/ptxgen/ptxgen.c b/cpp/7_libNVVM/ptxgen/ptxgen.c similarity index 100% rename from Samples/7_libNVVM/ptxgen/ptxgen.c rename to cpp/7_libNVVM/ptxgen/ptxgen.c diff --git a/Samples/7_libNVVM/ptxgen/test.ll b/cpp/7_libNVVM/ptxgen/test.ll similarity index 100% rename from Samples/7_libNVVM/ptxgen/test.ll rename to cpp/7_libNVVM/ptxgen/test.ll diff --git a/Samples/7_libNVVM/simple/CMakeLists.txt b/cpp/7_libNVVM/simple/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/simple/CMakeLists.txt rename to cpp/7_libNVVM/simple/CMakeLists.txt diff --git a/Samples/7_libNVVM/simple/README.md b/cpp/7_libNVVM/simple/README.md similarity index 100% rename from Samples/7_libNVVM/simple/README.md rename to cpp/7_libNVVM/simple/README.md diff --git a/Samples/7_libNVVM/simple/simple-gpu64.ll b/cpp/7_libNVVM/simple/simple-gpu64.ll similarity index 100% rename from Samples/7_libNVVM/simple/simple-gpu64.ll rename to cpp/7_libNVVM/simple/simple-gpu64.ll diff --git a/Samples/7_libNVVM/simple/simple.c b/cpp/7_libNVVM/simple/simple.c similarity index 100% rename from Samples/7_libNVVM/simple/simple.c rename to cpp/7_libNVVM/simple/simple.c diff --git a/Samples/7_libNVVM/syscalls/CMakeLists.txt b/cpp/7_libNVVM/syscalls/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/syscalls/CMakeLists.txt rename to cpp/7_libNVVM/syscalls/CMakeLists.txt diff --git a/Samples/7_libNVVM/syscalls/malloc-free.ll b/cpp/7_libNVVM/syscalls/malloc-free.ll similarity index 100% rename from Samples/7_libNVVM/syscalls/malloc-free.ll rename to cpp/7_libNVVM/syscalls/malloc-free.ll diff --git a/Samples/7_libNVVM/syscalls/vprintf.ll b/cpp/7_libNVVM/syscalls/vprintf.ll similarity index 100% rename from Samples/7_libNVVM/syscalls/vprintf.ll rename to cpp/7_libNVVM/syscalls/vprintf.ll diff --git a/Samples/7_libNVVM/utils/build.bat b/cpp/7_libNVVM/utils/build.bat similarity index 100% rename from Samples/7_libNVVM/utils/build.bat rename to cpp/7_libNVVM/utils/build.bat diff --git a/Samples/7_libNVVM/utils/build.sh b/cpp/7_libNVVM/utils/build.sh similarity index 100% rename from Samples/7_libNVVM/utils/build.sh rename to cpp/7_libNVVM/utils/build.sh diff --git a/Samples/7_libNVVM/uvmlite/CMakeLists.txt b/cpp/7_libNVVM/uvmlite/CMakeLists.txt similarity index 100% rename from Samples/7_libNVVM/uvmlite/CMakeLists.txt rename to cpp/7_libNVVM/uvmlite/CMakeLists.txt diff --git a/Samples/7_libNVVM/uvmlite/README.md b/cpp/7_libNVVM/uvmlite/README.md similarity index 100% rename from Samples/7_libNVVM/uvmlite/README.md rename to cpp/7_libNVVM/uvmlite/README.md diff --git a/Samples/7_libNVVM/uvmlite/uvmlite.c b/cpp/7_libNVVM/uvmlite/uvmlite.c similarity index 100% rename from Samples/7_libNVVM/uvmlite/uvmlite.c rename to cpp/7_libNVVM/uvmlite/uvmlite.c diff --git a/Samples/7_libNVVM/uvmlite/uvmlite64.ll b/cpp/7_libNVVM/uvmlite/uvmlite64.ll similarity index 100% rename from Samples/7_libNVVM/uvmlite/uvmlite64.ll rename to cpp/7_libNVVM/uvmlite/uvmlite64.ll diff --git a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h diff --git a/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h b/cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h rename to cpp/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h diff --git a/Samples/8_Platform_Specific/Tegra/README.md b/cpp/8_Platform_Specific/Tegra/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/README.md rename to cpp/8_Platform_Specific/Tegra/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/README.md b/cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/README.md rename to cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/main.cu b/cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/main.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAErrorReporting/main.cu rename to cpp/8_Platform_Specific/Tegra/cuDLAErrorReporting/main.cu diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/README.md b/cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/README.md rename to cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/main.cu b/cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/main.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAHybridMode/main.cu rename to cpp/8_Platform_Specific/Tegra/cuDLAHybridMode/main.cu diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/README.md b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/README.md rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/main.cu b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/main.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/main.cu rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsHybrid/main.cu diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/README.md b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/README.md rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/main.cpp b/cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/main.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/main.cpp rename to cpp/8_Platform_Specific/Tegra/cuDLALayerwiseStatsStandalone/main.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/README.md b/cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/README.md rename to cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/main.cpp b/cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/main.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cuDLAStandaloneMode/main.cpp rename to cpp/8_Platform_Specific/Tegra/cuDLAStandaloneMode/main.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv b/cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv rename to cpp/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/README.md b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/README.md rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/README.md diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cudaNvSciNvMedia_Readme.pdf b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cudaNvSciNvMedia_Readme.pdf similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cudaNvSciNvMedia_Readme.pdf rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cudaNvSciNvMedia_Readme.pdf diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.cu b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.cu rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.cu diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/cuda_consumer.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/main.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/main.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/main.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/main.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_producer.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/cmdline.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/config_parser.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/image_utils.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/log_utils.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvmedia_utils/misc_utils.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.cpp b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.cpp rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.cpp diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.h b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.h rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/nvsci_setup.h diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/sample.cfg b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/sample.cfg similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/sample.cfg rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/sample.cfg diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/teapot.rgba b/cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/teapot.rgba similarity index 100% rename from Samples/8_Platform_Specific/Tegra/cudaNvSciNvMedia/teapot.rgba rename to cpp/8_Platform_Specific/Tegra/cudaNvSciNvMedia/teapot.rgba diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/fluidsGLES/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/fluidsGLES/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/README.md b/cpp/8_Platform_Specific/Tegra/fluidsGLES/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/README.md rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/README.md diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/data/ref_fluidsGLES.ppm b/cpp/8_Platform_Specific/Tegra/fluidsGLES/data/ref_fluidsGLES.ppm similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/data/ref_fluidsGLES.ppm rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/data/ref_fluidsGLES.ppm diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/defines.h b/cpp/8_Platform_Specific/Tegra/fluidsGLES/defines.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/defines.h rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/defines.h diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES.cpp b/cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES.cpp rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES.cpp diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cu b/cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cu rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cu diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cuh b/cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cuh similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cuh rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.cuh diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.h b/cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.h rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/fluidsGLES_kernels.h diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/graphics_interface.h b/cpp/8_Platform_Specific/Tegra/fluidsGLES/graphics_interface.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/graphics_interface.h rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/graphics_interface.h diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/mesh.frag.glsl b/cpp/8_Platform_Specific/Tegra/fluidsGLES/mesh.frag.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/mesh.frag.glsl rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/mesh.frag.glsl diff --git a/Samples/8_Platform_Specific/Tegra/fluidsGLES/mesh.vert.glsl b/cpp/8_Platform_Specific/Tegra/fluidsGLES/mesh.vert.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/fluidsGLES/mesh.vert.glsl rename to cpp/8_Platform_Specific/Tegra/fluidsGLES/mesh.vert.glsl diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/nbody_opengles/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/nbody_opengles/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/README.md b/cpp/8_Platform_Specific/Tegra/nbody_opengles/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/README.md rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/README.md diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystem.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystem.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystem.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystem.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu_impl.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu_impl.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu_impl.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcpu_impl.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.cu b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.cu rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.cu diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda_impl.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda_impl.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda_impl.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/bodysystemcuda_impl.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/galaxy_20K.bin b/cpp/8_Platform_Specific/Tegra/nbody_opengles/galaxy_20K.bin similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/galaxy_20K.bin rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/galaxy_20K.bin diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/nbody_opengles.cpp b/cpp/8_Platform_Specific/Tegra/nbody_opengles/nbody_opengles.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/nbody_opengles.cpp rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/nbody_opengles.cpp diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/render_particles.cpp b/cpp/8_Platform_Specific/Tegra/nbody_opengles/render_particles.cpp similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/render_particles.cpp rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/render_particles.cpp diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/render_particles.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/render_particles.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/render_particles.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/render_particles.h diff --git a/Samples/8_Platform_Specific/Tegra/nbody_opengles/tipsy.h b/cpp/8_Platform_Specific/Tegra/nbody_opengles/tipsy.h similarity index 100% rename from Samples/8_Platform_Specific/Tegra/nbody_opengles/tipsy.h rename to cpp/8_Platform_Specific/Tegra/nbody_opengles/tipsy.h diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/simpleGLES/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/simpleGLES/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/simpleGLES/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/simpleGLES/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/simpleGLES/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/README.md b/cpp/8_Platform_Specific/Tegra/simpleGLES/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/README.md rename to cpp/8_Platform_Specific/Tegra/simpleGLES/README.md diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/data/ref_simpleGL.bin b/cpp/8_Platform_Specific/Tegra/simpleGLES/data/ref_simpleGL.bin similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/data/ref_simpleGL.bin rename to cpp/8_Platform_Specific/Tegra/simpleGLES/data/ref_simpleGL.bin diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/graphics_interface.c b/cpp/8_Platform_Specific/Tegra/simpleGLES/graphics_interface.c similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/graphics_interface.c rename to cpp/8_Platform_Specific/Tegra/simpleGLES/graphics_interface.c diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/mesh.frag.glsl b/cpp/8_Platform_Specific/Tegra/simpleGLES/mesh.frag.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/mesh.frag.glsl rename to cpp/8_Platform_Specific/Tegra/simpleGLES/mesh.frag.glsl diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/mesh.vert.glsl b/cpp/8_Platform_Specific/Tegra/simpleGLES/mesh.vert.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/mesh.vert.glsl rename to cpp/8_Platform_Specific/Tegra/simpleGLES/mesh.vert.glsl diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES/simpleGLES.cu b/cpp/8_Platform_Specific/Tegra/simpleGLES/simpleGLES.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES/simpleGLES.cu rename to cpp/8_Platform_Specific/Tegra/simpleGLES/simpleGLES.cu diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/c_cpp_properties.json b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/c_cpp_properties.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/c_cpp_properties.json rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/c_cpp_properties.json diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/extensions.json b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/extensions.json similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/extensions.json rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/.vscode/extensions.json diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/CMakeLists.txt diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/README.md b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/README.md similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/README.md rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/README.md diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/data/ref_simpleGLES_EGLOutput.bin b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/data/ref_simpleGLES_EGLOutput.bin similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/data/ref_simpleGLES_EGLOutput.bin rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/data/ref_simpleGLES_EGLOutput.bin diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/graphics_interface_egloutput_via_egl.c b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/graphics_interface_egloutput_via_egl.c similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/graphics_interface_egloutput_via_egl.c rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/graphics_interface_egloutput_via_egl.c diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.frag.glsl b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.frag.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.frag.glsl rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.frag.glsl diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.vert.glsl b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.vert.glsl similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.vert.glsl rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/mesh.vert.glsl diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/simpleGLES_EGLOutput.cu b/cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/simpleGLES_EGLOutput.cu similarity index 100% rename from Samples/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/simpleGLES_EGLOutput.cu rename to cpp/8_Platform_Specific/Tegra/simpleGLES_EGLOutput/simpleGLES_EGLOutput.cu diff --git a/Samples/CMakeLists.txt b/cpp/CMakeLists.txt similarity index 100% rename from Samples/CMakeLists.txt rename to cpp/CMakeLists.txt diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..d4976205 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,59 @@ +[tool.ruff] +# Target Python 3.8+ for CUDA samples compatibility +target-version = "py38" + +# Include common Python directories +include = ["python/**/*.py"] + +# Exclude generated and vendor directories +exclude = [ + ".git", + "__pycache__", + "build", + "dist", + "*.egg-info", +] + +# Set line length to 88 (Black's default) +line-length = 88 +indent-width = 4 + +[tool.ruff.lint] +# Enable important linting rules +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "N", # pep8-naming + "UP", # pyupgrade + "YTT", # flake8-2020 + "S", # flake8-bandit + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "T20", # flake8-print +] + +# Ignore specific rules that may not fit CUDA examples +ignore = [ + "S101", # Allow assert statements (common in examples) + "T20", # Allow print statements (needed for demo output) + "S311", # Allow random for demo data generation + "N806", # Allow non-lowercase variable names (CUDA conventions) + "N999", # Allow numbered module names for sample organization +] + +[tool.ruff.format] +# Use double quotes for strings +quote-style = "double" + +# Keep magic trailing commas +skip-magic-trailing-comma = false + +# Use 4-space indentation +indent-style = "space" + +[tool.ruff.lint.isort] +# Sort imports +force-single-line = false +combine-as-imports = true diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 00000000..5d24ff14 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,220 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +*.lcov +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi/* +!.pixi/config.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule* +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ +# Temporary file for partial code execution +tempCodeRunnerFile.py + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml diff --git a/python/1_GettingStarted/blurImageUnifiedMemory/README.md b/python/1_GettingStarted/blurImageUnifiedMemory/README.md new file mode 100644 index 00000000..7f228dfe --- /dev/null +++ b/python/1_GettingStarted/blurImageUnifiedMemory/README.md @@ -0,0 +1,168 @@ +# Sample: Image Blur with Unified Memory (Python) + +## Description + +Blur images on GPU using modern `cuda.core` APIs for kernel compilation, execution, and memory management. This sample demonstrates **zero-copy data sharing** between CPU and GPU using unified (managed) memory. + +## What You'll Learn + +- Compiling CUDA kernels at runtime with `cuda.core.Program` +- Launching kernels with `cuda.core.launch` and `LaunchConfig` +- Using unified memory with `cuda.core.ManagedMemoryResource` +- **Zero-copy CPU access** to unified memory via `np.from_dlpack()` +- Seamless CPU/GPU memory access without explicit transfers + +## Key Concepts + +### Kernel Compilation with cuda.core.Program + +```python +# Compile CUDA C++ kernel at runtime +program = Program(KERNEL_CODE, code_type="c++", options=options) +compiled = program.compile(target_type="cubin") +kernel = compiled.get_kernel("box_blur_3x3") +``` + +### Kernel Launch with cuda.core.launch + +```python +# Configure and launch kernel +config = LaunchConfig(grid=grid_size, block=block_size) + +# Buffers can be passed directly as kernel arguments +launch(stream, config, kernel, src_buf, dst_buf, H, W) +``` + +### Unified Memory (Managed Memory) + +This sample uses `ManagedMemoryResource` for simplicity: a single allocation is accessible from both CPU and GPU without explicit transfers. For performance-critical workloads, consider `LegacyPinnedMemoryResource` + `DeviceMemoryResource` instead, which gives explicit control over host/device placement and transfer costs. + +Unified memory is accessible from both CPU and GPU without explicit data transfers: + +```python +# Allocate unified memory +options = ManagedMemoryResourceOptions(preferred_location=device.device_id) +mr = ManagedMemoryResource(options) +src_buf = mr.allocate(n_bytes, stream) +dst_buf = mr.allocate(n_bytes, stream) +try: + # Synchronize to ensure allocations are complete before CPU access + stream.sync() + + # Create numpy views of unified memory using DLPack protocol (zero-copy) + src_np = np.from_dlpack(src_buf).view(np.float32).reshape(H, W) + dst_np = np.from_dlpack(dst_buf).view(np.float32).reshape(H, W) + + # CPU writes directly to unified memory + src_np[:] = input_data + + # Launch kernel - buffers can be passed directly as arguments + launch(stream, config, kernel, src_buf, dst_buf, H, W) + stream.sync() + + # Return zero-copy view; caller must close buffers when done + return dst_np, src_buf, dst_buf +except Exception: + src_buf.close() + dst_buf.close() + raise +``` + +When returning a zero-copy view, the caller must close the buffers after use (e.g., in a `try/finally` block) to avoid leaking managed memory. + +## Key APIs + +### From `cuda.core`: + +- `Device` - CUDA device management +- `Program` - Runtime kernel compilation (NVRTC) +- `ProgramOptions` - Compilation options (architecture target) +- `LaunchConfig` - Kernel launch configuration (grid/block dimensions) +- `launch` - Execute compiled kernel +- `ManagedMemoryResource` - Unified memory allocation + +### Zero-Copy Techniques: + +- `np.from_dlpack(buffer)` - Create numpy view of unified memory using DLPack protocol +- Pass `buffer` directly to `launch()` as kernel arguments +- When returning a zero-copy view, return `(view, src_buf, dst_buf)` and have the caller close buffers in `try/finally` after use + +## Kernel Techniques + +- **2D Thread Mapping** - Each thread computes one output pixel +- **Stencil Pattern** - Read neighboring pixels (3x3 neighborhood) +- **Boundary Handling** - Clamp to edge for border pixels +- **Box Filter** - 3x3 averaging for blur effect + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support +- Minimum GPU memory: 256 MB + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- `cuda-python` package (13.0.0+) +- `cuda-core` package (>=0.6.0) +- `numpy` package (>=2.3.2) +- `pillow` package (10.0.0+) + +## Installation + +```bash +cd /path/to/cuda-samples/python/1_GettingStarted/blurImageUnifiedMemory +pip install -r requirements.txt +``` + +## How to Run + +```bash +python blurImageUnifiedMemory.py +``` + +## Expected Output + +``` +============================================================ +Image Blur with Unified Memory (cuda.core) +============================================================ + +Device: +Compute Capability: sm_ + +Compiling CUDA kernel with cuda.core.Program... + Compiled for architecture: sm_ + +Image size: 256x256 grayscale +Creating sample image... +Blurring image on GPU... + +Saving results... + Saved: original_image.png + Saved: blurred_image.png + +Verifying result... + Test PASSED + Max difference from original: +``` + +## Output Files + +- `original_image.png` - Test pattern image before blur +- `blurred_image.png` - Image after 3x3 box blur + +## Files + +- `blurImageUnifiedMemory.py` - Python implementation using cuda.core +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [cuda.core.Program](https://nvidia.github.io/cuda-python/cuda-core/latest/generated/cuda.core.Program.html) +- [cuda.core.ManagedMemoryResource](https://nvidia.github.io/cuda-python/cuda-core/latest/generated/cuda.core.ManagedMemoryResource.html) +- [CUDA Managed Memory](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-unified-memory-programming-hd) diff --git a/python/1_GettingStarted/blurImageUnifiedMemory/blurImageUnifiedMemory.py b/python/1_GettingStarted/blurImageUnifiedMemory/blurImageUnifiedMemory.py new file mode 100644 index 00000000..254f5056 --- /dev/null +++ b/python/1_GettingStarted/blurImageUnifiedMemory/blurImageUnifiedMemory.py @@ -0,0 +1,269 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Image Blur with Unified Memory using cuda.core + +Demonstrates GPU image blurring using cuda.core APIs for kernel compilation, +launch, and unified memory allocation. +""" + +import sys + +try: + import numpy as np + from cuda.core import ( + Device, + LaunchConfig, + ManagedMemoryResource, + ManagedMemoryResourceOptions, + Program, + ProgramOptions, + launch, + ) + from PIL import Image +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# CUDA kernel source code - compiled at runtime by cuda.core.Program +BOX_BLUR_KERNEL_CODE = r""" +extern "C" __global__ +void box_blur_3x3(const float* __restrict__ src, + float* __restrict__ dst, int H, int W) { + /* + * Simple 3x3 box blur CUDA kernel. + * + * Each thread computes one output pixel by averaging + * the 3x3 neighborhood of input pixels (stencil pattern). + */ + + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x >= W || y >= H) return; + + float sum = 0.0f; + int count = 0; + + // 3x3 stencil: iterate over neighborhood + for (int dy = -1; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++) { + int nx = x + dx; + int ny = y + dy; + + // Boundary check (clamp to edge) + if (nx >= 0 && nx < W && ny >= 0 && ny < H) { + sum += src[ny * W + nx]; + count++; + } + } + } + + dst[y * W + x] = sum / count; +} +""" + + +def make_test_image(h: int, w: int, dtype=np.uint8) -> np.ndarray: + """Create a test grayscale image for demonstration.""" + img = np.zeros((h, w), dtype=dtype) + + # Create horizontal stripes + for i in range(0, h, 50): + img[i : i + 25, :] = 255 + + # Create vertical stripes with different intensity + for j in range(0, w, 50): + img[:, j : j + 25] = 128 + + # Add circular pattern for interesting blur effects + center_y, center_x = h // 2, w // 2 + y, x = np.ogrid[:h, :w] + circle_mask = (x - center_x) ** 2 + (y - center_y) ** 2 <= (min(h, w) // 6) ** 2 + img[circle_mask] = 200 + + return np.ascontiguousarray(img) + + +def blur_image_unified_memory( + host_np: np.ndarray, device: Device, stream, kernel +) -> tuple[np.ndarray, object, object]: + """ + Blur image on GPU using unified memory with cuda.core. + + This function demonstrates: + 1. Allocate managed memory using ManagedMemoryResource + 2. Create zero-copy numpy views using np.from_dlpack() + 3. Launch kernel via cuda.core.launch + + Args: + host_np: NumPy array containing image data on CPU + device: CUDA device to use + stream: cuda.core Stream for async operations + kernel: Compiled cuda.core Kernel object + + Returns: + Tuple of (dst_np, src_buf, dst_buf). dst_np is a zero-copy view into + unified memory. Caller must close src_buf and dst_buf when done with + dst_np to avoid leaking managed memory. + """ + H, W = host_np.shape + n_bytes = H * W * np.dtype(np.float32).itemsize + + # Create managed memory resource for unified memory allocation + options = ManagedMemoryResourceOptions(preferred_location=device.device_id) + mr = ManagedMemoryResource(options) + + # Allocate unified memory buffers for source and destination images + src_buf = mr.allocate(n_bytes, stream) + dst_buf = mr.allocate(n_bytes, stream) + try: + # Synchronize to ensure allocations are complete before CPU access + stream.sync() + + # Create numpy views of unified memory using DLPack protocol (zero-copy) + src_np = np.from_dlpack(src_buf).view(np.float32).reshape(H, W) + dst_np = np.from_dlpack(dst_buf).view(np.float32).reshape(H, W) + + # Write input data to unified memory (CPU can access directly) + src_np[:] = host_np.astype(np.float32) / 255.0 + + # Configure kernel launch parameters + block_size = (16, 16) + grid_size = ( + (W + block_size[0] - 1) // block_size[0], + (H + block_size[1] - 1) // block_size[1], + ) + + # Create LaunchConfig for kernel execution + config = LaunchConfig(grid=grid_size, block=block_size) + + # Launch kernel - buffers can be passed directly as kernel arguments + launch( + stream, + config, + kernel, + src_buf, + dst_buf, + np.int32(H), + np.int32(W), + ) + + # Synchronize to ensure kernel completion before reading results + stream.sync() + + # Return zero-copy view; caller closes buffers when done + return (dst_np, src_buf, dst_buf) + except Exception: + src_buf.close() + dst_buf.close() + raise + + +def main(): + """ + Complete demonstration of GPU image blurring with cuda.core. + + This example shows: + 1. Device initialization with cuda.core.Device + 2. Kernel compilation with cuda.core.Program + 3. Unified memory with cuda.core.ManagedMemoryResource + 4. Kernel launch with cuda.core.launch and LaunchConfig + """ + print("=" * 60) + print("Image Blur with Unified Memory (cuda.core)") + print("=" * 60) + + # Initialize CUDA device + device = Device(0) + device.set_current() + + print(f"\nDevice: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + + # Create stream for async operations + stream = device.create_stream() + try: + # Compile kernel using cuda.core.Program + print("\nCompiling CUDA kernel with cuda.core.Program...") + arch = f"sm_{device.arch}" + options = ProgramOptions(arch=arch) + program = Program(BOX_BLUR_KERNEL_CODE, code_type="c++", options=options) + compiled = program.compile(target_type="cubin") + kernel = compiled.get_kernel("box_blur_3x3") + print(f" Compiled for architecture: {arch}") + + # Image parameters + H, W = 256, 256 + print(f"\nImage size: {H}x{W} grayscale") + + # Create test image + print("Creating sample image...") + host_np = make_test_image(H, W, dtype=np.uint8) + + # Blur image on GPU using cuda.core (returns zero-copy view + buffers) + print("Blurring image on GPU...") + blurred_result, src_buf, dst_buf = blur_image_unified_memory( + host_np, device, stream, kernel + ) + try: + # Save images (use zero-copy view before releasing buffers) + print("\nSaving results...") + original_pil = Image.fromarray(host_np, mode="L") + original_pil.save("original_image.png") + print(" Saved: original_image.png") + + blurred_uint8 = (np.clip(blurred_result, 0, 1) * 255).astype(np.uint8) + blurred_pil = Image.fromarray(blurred_uint8, mode="L") + blurred_pil.save("blurred_image.png") + print(" Saved: blurred_image.png") + + # Verify blur was applied + print("\nVerifying result...") + original_float = host_np.astype(np.float32) / 255.0 + max_diff = np.max(np.abs(blurred_result - original_float)) + blur_applied = max_diff > 0.01 + + if blur_applied: + print(" Test PASSED") + else: + print(" Test FAILED - blur not applied") + sys.exit(1) + + print(f" Max difference from original: {max_diff:.4f}") + finally: + src_buf.close() + dst_buf.close() + finally: + stream.close() + + +if __name__ == "__main__": + main() diff --git a/python/1_GettingStarted/blurImageUnifiedMemory/requirements.txt b/python/1_GettingStarted/blurImageUnifiedMemory/requirements.txt new file mode 100644 index 00000000..c7a8608e --- /dev/null +++ b/python/1_GettingStarted/blurImageUnifiedMemory/requirements.txt @@ -0,0 +1,6 @@ +# Image Blur with Unified Memory Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +numpy>=2.3.2 +pillow>=10.0.0 diff --git a/python/1_GettingStarted/copyImageArraytoGPU/README.md b/python/1_GettingStarted/copyImageArraytoGPU/README.md new file mode 100644 index 00000000..f5b426d2 --- /dev/null +++ b/python/1_GettingStarted/copyImageArraytoGPU/README.md @@ -0,0 +1,119 @@ +# Sample: Image Array Copy to GPU (Python) + +## Description + +Copy image arrays between CPU and GPU memory using the modern `cuda.core` API with optimal performance through pinned memory and asynchronous transfers. + +## What You'll Learn + +- How to use pinned memory for faster CPU↔GPU transfers +- Using the `cuda.core` API for memory management +- Working with DLPack for zero-copy array views +- Performing asynchronous memory transfers with CUDA streams +- Interoperability between CUDA Core API and CuPy +- Proper CUDA resource management and cleanup + +## Key Libraries + +- `cuda.core` - Modern CUDA Python API +- `numpy` - Array operations and DLPack support +- `cupy` - GPU array operations and CUDA interoperability + +## Key APIs + +### From `cuda.core`: + +- `Device()` - Initialize and access CUDA device +- `Device.set_current()` - Set the current device for API calls +- `Device.create_stream()` - Create CUDA stream for async operations +- `Device.memory_resource` - Access device memory allocator +- `PinnedMemoryResource()` - Allocate pinned host memory +- `buffer.copy_to()` - Copy data between memory spaces +- `buffer.close()` - Release allocated memory + +### From `numpy`: + +- `np.from_dlpack()` - Create array view from DLPack capsule +- `np.copyto()` - Copy data between arrays + +### From `cupy`: + +- `cp.from_dlpack()` - Create GPU array view from DLPack capsule +- `cp.cuda.ExternalStream()` - Use external CUDA stream + +### From `cuda_samples_utils`: + +- `verify_array_result()` - Verify computation results + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support +- Sufficient GPU memory for image data (sample uses ~200KB for 256×256×3 image) + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- NumPy 2.3.2 or newer (required for DLPack support) +- `cuda-python` package (>=13.0.0+) +- `cuda-core` package (>=0.6.0) +- `cupy-cuda13x` package (13.0.0+) + +## Installation + +Install the required packages from requirements.txt: + +```bash +cd /path/to/cuda-samples/python/1_GettingStarted/copyImageArraytoGPU +pip install -r requirements.txt +``` + +The requirements.txt installs: +- `numpy` (2.3.2+, required for DLPack) +- `cuda-python` (>=13.0.0+) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (13.0.0+) + +## How to Run + +### Basic usage: + +```bash +cd samples/python/1_GettingStarted/copyImageArraytoGPU +python copyImageArraytoGPU.py +``` + +## Expected Output + +``` +[Image Array Copy to GPU using CUDA Core API] +Device: NVIDIA GeForce RTX 4090 +[Image array copy of 256x256x3 image] +Creating sample image... +Copying image to GPU... +Creating CuPy view of GPU data... +Mean pixel value (computed on GPU): 127.50 +Copying image back from GPU... +Verifying result... +Test PASSED + +Done +``` + +**Note:** Device name will vary based on your GPU. + +## Files + +- `copyImageArraytoGPU.py` - Python implementation using cuda.core API +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [cuda.core API Guide](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [DLPack Specification](https://dmlc.github.io/dlpack/latest/) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/1_GettingStarted/copyImageArraytoGPU/copyImageArraytoGPU.py b/python/1_GettingStarted/copyImageArraytoGPU/copyImageArraytoGPU.py new file mode 100644 index 00000000..d78a7b6a --- /dev/null +++ b/python/1_GettingStarted/copyImageArraytoGPU/copyImageArraytoGPU.py @@ -0,0 +1,239 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Image Array Copy to GPU using CUDA Core API + +This sample demonstrates how to copy image arrays between CPU and GPU memory +using NVIDIA's CUDA Core Python API with optimal performance. +""" + +import sys +from pathlib import Path + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + +try: + import cupy as cp + import numpy as np + from cuda.core import Buffer, Device, PinnedMemoryResource, Stream +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# ----------------------------- Helper Functions ------------------------------ + + +def make_random_image(h: int, w: int, c: int, dtype=np.uint8) -> np.ndarray: + """ + Create a random test image for demonstration. + + Args: + h: Image height in pixels + w: Image width in pixels + c: Number of channels (e.g., 3 for RGB) + dtype: NumPy data type (e.g., np.uint8 for 0-255 pixel values) + + Returns: + A contiguous NumPy array representing the image + """ + img = np.random.randint(0, 256, size=(h, w, c), dtype=dtype) + return np.ascontiguousarray(img) # Ensure memory is contiguous for GPU transfer + + +# ----------------------------- Core GPU Functions --------------------------- + + +def copy_image_to_gpu_cuda_core( + host_np: np.ndarray, dev: Device, stream: Stream +) -> tuple[Buffer, Buffer]: + """ + Copy image from CPU memory to GPU memory using optimal transfer method. + + This function demonstrates the recommended approach: + 1. Use pinned memory for faster transfers + 2. Use DLPack for zero-copy array views + 3. Perform async transfers on a CUDA stream + + Args: + host_np: NumPy array containing image data on CPU + dev: CUDA device object + stream: CUDA stream for async operations + + Returns: + Tuple of (device_buffer, pinned_buffer) - both need to be cleaned up later + """ + nbytes = host_np.nbytes # Calculate total bytes needed + + # Step 1: Set up memory resources + # Device memory resource - allocates on GPU + device_mr = dev.memory_resource + # Pinned memory resource - allocates CPU memory that GPU can access faster + pinned_mr = PinnedMemoryResource() + + # Step 2: Allocate memory buffers + pinned_buffer = pinned_mr.allocate(nbytes, stream=stream) # Fast CPU memory + device_buffer = device_mr.allocate(nbytes, stream=stream) # GPU memory + + # Step 3: Create a NumPy view of pinned memory using DLPack + # This allows us to work with pinned memory as if it's a regular NumPy array + pinned_view = ( + np.from_dlpack(pinned_buffer).view(dtype=host_np.dtype).reshape(host_np.shape) + ) + + # Step 4: Copy image data from regular CPU memory to pinned CPU memory + # This is a CPU-to-CPU copy, so it's very fast + np.copyto(pinned_view, host_np) + + # Step 5: Copy from pinned CPU memory to GPU memory + # This is the actual CPU-to-GPU transfer, done asynchronously + pinned_buffer.copy_to(device_buffer, stream=stream) + + return device_buffer, pinned_buffer + + +def copy_image_from_gpu_cuda_core( + device_buffer: Buffer, shape: tuple, dtype: type, dev: Device, stream: Stream +) -> np.ndarray: + """ + Copy image from GPU memory back to CPU memory. + + This function reverses the GPU-to-CPU transfer process: + 1. Allocate pinned CPU memory for fast transfer + 2. Copy from GPU to pinned CPU memory + 3. Create NumPy view and copy to regular CPU memory + + Args: + device_buffer: GPU buffer containing image data + shape: Original image shape tuple (height, width, channels) + dtype: Original image data type + dev: CUDA device object + stream: CUDA stream for async operations + + Returns: + NumPy array with image data copied from GPU + """ + nbytes = np.prod(shape) * np.dtype(dtype).itemsize # Calculate total bytes + + # Step 1: Create pinned memory for fast GPU-to-CPU transfer + pinned_mr = PinnedMemoryResource() + pinned_buffer = pinned_mr.allocate(nbytes, stream=stream) + + # Step 2: Copy from GPU memory to pinned CPU memory + device_buffer.copy_to(pinned_buffer, stream=stream) + stream.sync() # Wait for the GPU transfer to complete + + # Step 3: Create NumPy view of pinned memory using DLPack + pinned_view = np.from_dlpack(pinned_buffer).view(dtype=dtype).reshape(shape) + + # Step 4: Copy from pinned CPU memory to regular CPU memory + # This creates the final result that can be used normally + host_result = pinned_view.copy() + + # Step 5: Clean up the temporary pinned buffer + pinned_buffer.close(stream) + + return host_result + + +# ------------------------------ Main Demo ------------------------------------ + + +def main(): + """ + Complete demonstration of GPU image copying workflow. + + This example shows: + 1. Setting up CUDA device and stream + 2. Creating a sample image + 3. Copying image to GPU + 4. Accessing GPU data with CuPy (optional) + 5. Copying image back from GPU + 6. Verifying data integrity + 7. Proper cleanup of resources + """ + print("[Image Array Copy to GPU using CUDA Core API]") + + # Image parameters - modify these to test different sizes + H, W, C = 256, 256, 3 # Height=256, Width=256, Channels=3 (RGB) + dtype = np.uint8 # Standard image pixel type (0-255 values) + + # Step 1: Set up CUDA device and stream + dev = Device() # Get default CUDA device (GPU 0) + dev.set_current() # Make this device the active one + stream = dev.create_stream() # Create stream for async operations + + print(f"Device: {dev.name}") + print(f"[Image array copy of {H}x{W}x{C} image]") + + # Step 2: Configure CuPy to use our CUDA stream (for interoperability) + cp.cuda.ExternalStream(int(stream.handle)).use() + + # Step 3: Create a test image on CPU + print("Creating sample image...") + host_np = make_random_image(H, W, C, dtype=dtype) + + # Step 4: Copy image from CPU to GPU + print("Copying image to GPU...") + device_buffer, pinned_buffer = copy_image_to_gpu_cuda_core(host_np, dev, stream) + + # Step 5: (Optional) Get a CuPy view of GPU data for processing + # This shows how you can work with the GPU data without copying it back + print("Creating CuPy view of GPU data...") + device_cp = cp.from_dlpack(device_buffer).view(dtype=dtype).reshape(H, W, C) + + # Example: compute mean pixel value on GPU + mean_value = float(cp.mean(device_cp)) + print(f"Mean pixel value (computed on GPU): {mean_value:.2f}") + + # Step 6: Copy image back from GPU to CPU + print("Copying image back from GPU...") + host_back = copy_image_from_gpu_cuda_core( + device_buffer, host_np.shape, host_np.dtype, dev, stream + ) + + # Step 7: Verify that the data survived the round trip + print("Verifying result...") + host_back_cp = cp.asarray(host_back) + host_np_cp = cp.asarray(host_np) + verify_array_result(host_back_cp, host_np_cp, rtol=0, atol=0) + + # Step 8: Clean up all allocated resources + device_buffer.close(stream) # Free GPU memory + pinned_buffer.close(stream) # Free pinned CPU memory + stream.close() # Close CUDA stream + cp.cuda.Stream.null.use() # Reset CuPy's stream to default + + print("\nDone") + + +if __name__ == "__main__": + main() diff --git a/python/1_GettingStarted/copyImageArraytoGPU/requirements.txt b/python/1_GettingStarted/copyImageArraytoGPU/requirements.txt new file mode 100644 index 00000000..4e85c082 --- /dev/null +++ b/python/1_GettingStarted/copyImageArraytoGPU/requirements.txt @@ -0,0 +1,6 @@ +# Image Array Copy to GPU Sample Requirements + +numpy>=2.3.2 +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/1_GettingStarted/deviceQuery/README.md b/python/1_GettingStarted/deviceQuery/README.md new file mode 100644 index 00000000..52c3797a --- /dev/null +++ b/python/1_GettingStarted/deviceQuery/README.md @@ -0,0 +1,189 @@ +# Sample: Device Query (Python) + +## Description + +Query and display detailed properties of all CUDA-capable devices in your system using the modern `cuda.core` API. + +## What You'll Learn + +- How to enumerate CUDA devices in the system +- Using the `cuda.core` API for device management +- Querying comprehensive device properties (compute capability, memory, limits) +- Accessing low-level device attributes via `cuda.bindings` +- Checking peer-to-peer (P2P) access capabilities between GPUs + +## Key Libraries + +- `cuda.core` - Modern CUDA Python API +- `cuda.bindings` - Low-level CUDA bindings for runtime and driver APIs + +## Key APIs + +### From `cuda.core`: + +- `Device.get_all_devices()` - Get tuple of all available Device instances +- `Device(device_id)` - Get Device object for specific device ID +- `system.get_driver_version()` - Query CUDA driver version +- `Device.set_current()` - Set the current device for API calls +- `Device.properties` - Access comprehensive device properties +- `Device.name` - Get device name string +- `Device.can_access_peer()` - Check P2P access to peer device + +### From `cuda.bindings.runtime`: + +- `cudart.cudaRuntimeGetVersion()` - Get CUDA runtime version +- `cudart.cudaDeviceGetAttribute()` - Query specific device attributes + +### From `cuda.bindings.driver`: + +- `cuda.cuMemGetInfo()` - Get memory information for current device + +## Device Properties Queried + +### Compute Capabilities: +- Compute capability version (major.minor) +- Driver and runtime versions +- Number of multiprocessors and CUDA cores + +### Memory Information: +- Total global memory +- Memory clock rate and bus width +- L2 cache size +- Constant and shared memory sizes +- Maximum memory pitch + +### Execution Configuration Limits: +- Maximum threads per block and per multiprocessor +- Maximum block dimensions (x, y, z) +- Maximum grid dimensions (x, y, z) +- Warp size +- Registers per block + +### Texture Capabilities: +- Maximum texture dimensions (1D, 2D, 3D) +- Maximum layered texture sizes + +### Feature Support: +- Unified Addressing (UVA) +- Managed Memory +- Compute Preemption +- Cooperative Kernel Launch +- ECC support +- Host page-locked memory mapping +- Concurrent copy and kernel execution + +### System Information: +- PCI bus information +- Compute mode +- Driver mode (Windows only) +- P2P access matrix (multi-GPU systems) + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support (any compute capability) +- No specific GPU memory requirement (query only) + +### Software: + +- CUDA Toolkit 13.0 or newer (recommended; matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` package (>=13.0.0) +- `cuda-core` package (>=0.6.0) + +## Installation + +Install the required packages from requirements.txt: + +```bash +cd cuda-samples/python/1_GettingStarted/deviceQuery +pip install -r requirements.txt +``` + +The requirements.txt installs: +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) + +## How to Run + +### Basic usage: + +```bash +cd cuda-samples/python/1_GettingStarted/deviceQuery +python deviceQuery.py +``` + +### Skip P2P information: + +```bash +python deviceQuery.py --no-p2p +``` + +## Expected Output + +``` +[CUDA Device Query using CUDA Core API] +Detected 1 CUDA Capable device(s) + +Device 0: + CUDA Driver Version / Runtime Version 12.4 / 12.6 + CUDA Capability Major/Minor version number: 8.9 + Total amount of global memory: 24217 MBytes (25393954816 bytes) + (132) Multiprocessors, (128) CUDA Cores/MP: 16896 CUDA Cores + GPU Max Clock rate: 1980 MHz (1.98 GHz) + Memory Clock rate: 10501 Mhz + Memory Bus Width: 384-bit + L2 Cache Size: 67108864 bytes + Maximum Texture Dimension Size (x,y,z) 1D=(131072), 2D=(131072, 65536), 3D=(16384, 16384, 16384) + Maximum Layered 1D Texture Size, (num) layers 1D=(32768), 2048 layers + Maximum Layered 2D Texture Size, (num) layers 2D=(32768, 32768), 2048 layers + Total amount of constant memory: 65536 bytes + Total amount of shared memory per block: 49152 bytes + Total shared memory per multiprocessor: 102400 bytes + Total number of registers available per block: 65536 + Warp size: 32 + Maximum number of threads per multiprocessor: 1536 + Maximum number of threads per block: 1024 + Max dimension size of a thread block (x,y,z): (1024, 1024, 64) + Max dimension size of a grid size (x,y,z): (2147483647, 65535, 65535) + Maximum memory pitch: 2147483647 bytes + Texture alignment: 512 bytes + Concurrent copy and kernel execution: Yes with 2 copy engine(s) + Run time limit on kernels: Yes + Integrated GPU sharing Host Memory: No + Support host page-locked memory mapping: Yes + Device has ECC support: Enabled + Device supports Unified Addressing (UVA): Yes + Device supports Managed Memory: Yes + Device supports Compute Preemption: Yes + Supports Cooperative Kernel Launch: Yes + Device PCI Domain ID / Bus ID / location ID: 0 / 1 / 0 + Compute Mode: + < Default (multiple host threads can use cudaSetDevice() with device simultaneously) > + +Done +``` + +**Note:** Output will vary based on your specific GPU model and system configuration. + +For multi-GPU systems, the output will include information for all detected devices and a P2P access matrix showing which GPUs can directly access each other's memory. + +## Files + +- `deviceQuery.py` - Python implementation using cuda.core API +- `requirements.txt` - Sample dependencies + +## Use Cases + +- **System Diagnostics** - Verify CUDA installation and GPU detection +- **Hardware Profiling** - Understand GPU capabilities before optimization +- **Multi-GPU Systems** - Identify P2P topology for optimal data placement +- **Kernel Development** - Determine execution configuration limits +- **Compatibility Checks** - Verify compute capability requirements + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [cuda.core API Guide](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CUDA Programming Guide - Device Information](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-enumeration) diff --git a/python/1_GettingStarted/deviceQuery/deviceQuery.py b/python/1_GettingStarted/deviceQuery/deviceQuery.py new file mode 100755 index 00000000..06285ca4 --- /dev/null +++ b/python/1_GettingStarted/deviceQuery/deviceQuery.py @@ -0,0 +1,389 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Device Query using CUDA Core API + +This sample enumerates the properties of the CUDA devices present in the system. +""" + +import platform +import sys + +# cuda.bindings used for properties not yet exposed in cuda.core (see comments below) +try: + from cuda.bindings import driver as cuda, runtime as cudart + from cuda.core import Device, system +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +def print_property(label, value, indent=2): + """ + Helper function to print device properties with aligned formatting. + + Parameters + ---------- + label : str + Property label + value : any + Property value + indent : int + Number of spaces for indentation (default: 2) + """ + field_width = 47 + spaces = " " * indent + print(f"{spaces}{label:<{field_width}}{value}") + + +def fmt_bytes(size_in_bytes): + """Format bytes to human-readable string with MBytes.""" + return f"{size_in_bytes / (1024 * 1024):.0f} MBytes ({size_in_bytes} bytes)" + + +def fmt_hz(rate_in_khz): + """Format frequency in kHz to MHz and GHz.""" + return f"{rate_in_khz * 1e-3:.0f} MHz ({rate_in_khz * 1e-6:.2f} GHz)" + + +def fmt_yes_no(val): + """Format boolean value to Yes/No string.""" + return "Yes" if val else "No" + + +def convert_sm_ver_to_cores(major, minor): + """ + Maps SM version to the number of CUDA cores per SM. + + Information taken from: + https://github.com/NVIDIA/cuda-samples/blob/master/Common/helper_cuda.h + + Parameters + ---------- + major : int + Major compute capability version + minor : int + Minor compute capability version + + Returns + ------- + int + Number of CUDA cores per SM, or 0 if unknown + """ + sm_to_cores = { + (3, 0): 192, + (3, 2): 192, + (3, 5): 192, + (3, 7): 192, + (5, 0): 128, + (5, 2): 128, + (5, 3): 128, + (6, 0): 64, + (6, 1): 128, + (6, 2): 128, + (7, 0): 64, + (7, 2): 64, + (7, 5): 64, + (8, 0): 64, + (8, 6): 128, + (8, 7): 128, + (8, 9): 128, + (9, 0): 128, + (10, 0): 128, + (10, 1): 128, + (10, 3): 128, + (11, 0): 128, + (12, 0): 128, + (12, 1): 128, + } + return sm_to_cores.get((major, minor), 0) + + +def print_device_info(dev_id, device): + """ + Print detailed information for a single CUDA device. + Uses device.properties (cuda.core) for most fields; cuda.bindings for + runtime version and global memory (not yet in high-level API). + """ + device.set_current() + props = device.properties + + print() + print(f"Device {dev_id}: {device.name}") + + # cuda.bindings workaround: runtime version not in cuda.core + driver_major, driver_minor = system.get_driver_version() + err, runtime_version = cudart.cudaRuntimeGetVersion() + if err != cudart.cudaError_t.cudaSuccess: + raise RuntimeError(f"Failed to get CUDA runtime version: {err}") + runtime_major = runtime_version // 1000 + runtime_minor = (runtime_version % 1000) // 10 + + print_property( + "CUDA Driver Version / Runtime Version", + f"{driver_major}.{driver_minor} / {runtime_major}.{runtime_minor}", + ) + print_property( + "CUDA Capability Major/Minor version number:", + f"{props.compute_capability_major}.{props.compute_capability_minor}", + ) + + # cuda.bindings workaround: global memory (free/total) not in device.properties + err, free_mem, total_mem_bytes = cuda.cuMemGetInfo() + if err != cuda.CUresult.CUDA_SUCCESS: + raise RuntimeError(f"Failed to get memory info: {err}") + print_property("Total amount of global memory:", fmt_bytes(total_mem_bytes)) + + sm_cores = convert_sm_ver_to_cores( + props.compute_capability_major, props.compute_capability_minor + ) + total_cores = sm_cores * props.multiprocessor_count + print_property( + f"({props.multiprocessor_count:3d}) Multiprocessors, " + f"({sm_cores:3d}) CUDA Cores/MP:", + f"{total_cores} CUDA Cores", + ) + + print_property("GPU Max Clock rate:", fmt_hz(props.clock_rate)) + print_property("Memory Clock rate:", f"{props.memory_clock_rate * 1e-3:.0f} Mhz") + print_property("Memory Bus Width:", f"{props.global_memory_bus_width}-bit") + if props.l2_cache_size > 0: + print_property("L2 Cache Size:", f"{props.l2_cache_size} bytes") + + print_property( + "Maximum Texture Dimension Size (x,y,z)", + f"1D=({props.maximum_texture1d_width}), " + f"2D=({props.maximum_texture2d_width}, {props.maximum_texture2d_height}), " + f"3D=({props.maximum_texture3d_width}, {props.maximum_texture3d_height}, " + f"{props.maximum_texture3d_depth})", + ) + print_property( + "Maximum Layered 1D Texture Size, (num) layers", + f"1D=({props.maximum_texture1d_layered_width}), " + f"{props.maximum_texture1d_layered_layers} layers", + ) + print_property( + "Maximum Layered 2D Texture Size, (num) layers", + f"2D=({props.maximum_texture2d_layered_width}, " + f"{props.maximum_texture2d_layered_height}), " + f"{props.maximum_texture2d_layered_layers} layers", + ) + + print_property( + "Total amount of constant memory:", f"{props.total_constant_memory} bytes" + ) + print_property( + "Total amount of shared memory per block:", + f"{props.max_shared_memory_per_block} bytes", + ) + print_property( + "Total shared memory per multiprocessor:", + f"{props.max_shared_memory_per_multiprocessor} bytes", + ) + print_property( + "Total number of registers available per block:", props.max_registers_per_block + ) + + print_property("Warp size:", props.warp_size) + print_property( + "Maximum number of threads per multiprocessor:", + props.max_threads_per_multiprocessor, + ) + print_property("Maximum number of threads per block:", props.max_threads_per_block) + print_property( + "Max dimension size of a thread block (x,y,z):", + f"({props.max_block_dim_x}, {props.max_block_dim_y}, {props.max_block_dim_z})", + ) + print_property( + "Max dimension size of a grid size (x,y,z):", + f"({props.max_grid_dim_x}, {props.max_grid_dim_y}, {props.max_grid_dim_z})", + ) + print_property("Maximum memory pitch:", f"{props.max_pitch} bytes") + print_property("Texture alignment:", f"{props.texture_alignment} bytes") + + print_property( + "Concurrent copy and kernel execution:", + f"{fmt_yes_no(props.gpu_overlap)} with " + f"{props.async_engine_count} copy engine(s)", + ) + print_property("Run time limit on kernels:", fmt_yes_no(props.kernel_exec_timeout)) + + print_property("Integrated GPU sharing Host Memory:", fmt_yes_no(props.integrated)) + print_property( + "Support host page-locked memory mapping:", + fmt_yes_no(props.can_map_host_memory), + ) + print_property( + "Device has ECC support:", "Enabled" if props.ecc_enabled else "Disabled" + ) + if platform.system() == "Windows": + mode = ( + "TCC (Tesla Compute Cluster Driver)" + if props.tcc_driver + else "WDDM (Windows Display Driver Model)" + ) + print_property("CUDA Device Driver Mode (TCC or WDDM):", mode) + + print_property( + "Device supports Unified Addressing (UVA):", + fmt_yes_no(props.unified_addressing), + ) + print_property("Device supports Managed Memory:", fmt_yes_no(props.managed_memory)) + print_property( + "Device supports Compute Preemption:", + fmt_yes_no(props.compute_preemption_supported), + ) + print_property( + "Supports Cooperative Kernel Launch:", fmt_yes_no(props.cooperative_launch) + ) + + print_property( + "Device PCI Domain ID / Bus ID / location ID:", + f"{props.pci_domain_id} / {props.pci_bus_id} / {props.pci_device_id}", + ) + compute_modes = { + 0: ( + "Default (multiple host threads can use cudaSetDevice() " + "with device simultaneously)" + ), + 1: ( + "Exclusive (only one host thread in one process is able to " + "use cudaSetDevice() with this device)" + ), + 2: "Prohibited (no host thread can use cudaSetDevice() with this device)", + 3: ( + "Exclusive Process (many threads in one process is able to " + "use cudaSetDevice() with this device)" + ), + } + print_property("Compute Mode:", "") + print(f" < {compute_modes.get(props.compute_mode, 'Unknown')} >") + + +def print_p2p_access_info(devices): + """ + Print peer-to-peer access information for multi-GPU systems. + + Parameters + ---------- + devices : tuple of Device + Tuple of CUDA device objects + """ + print() + print("Peer-to-Peer (P2P) access support:") + for i, dev_i in enumerate(devices): + for j, dev_j in enumerate(devices): + if i == j: + continue + try: + can_access = dev_i.can_access_peer(dev_j) + print( + f"> Peer access from {dev_i.name} (GPU{i}) -> " + f"{dev_j.name} (GPU{j}) : {fmt_yes_no(can_access)}" + ) + except Exception as e: + print( + "Warning: Could not check peer access between " + f"device {i} and {j}: {e}" + ) + + +def query_devices(show_p2p=True): + """ + Query and display information about all CUDA devices. + + Parameters + ---------- + show_p2p : bool + Whether to show peer-to-peer access information (default: True) + + Returns + ------- + bool + True if successful, False otherwise + """ + try: + print("[CUDA Device Query using CUDA Core API]") + devices = Device.get_all_devices() + except Exception as e: + print(f"Error: Failed to get devices: {e}") + import traceback + + traceback.print_exc() + return False + + if len(devices) == 0: + print("There are no available device(s) that support CUDA") + return True + + print(f"Detected {len(devices)} CUDA Capable device(s)") + + for dev_id, device in enumerate(devices): + try: + print_device_info(dev_id, device) + except Exception as e: + print(f"Error: Failed to get information for device {dev_id}: {e}") + import traceback + + traceback.print_exc() + return False + + if show_p2p and len(devices) >= 2: + print_p2p_access_info(devices) + + return True + + +def main(): + """ + Main entry point for the device query sample. + """ + import argparse + + parser = argparse.ArgumentParser( + description="Query CUDA Device Properties using CUDA Core API", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--no-p2p", action="store_true", help="Skip peer-to-peer access information" + ) + + args = parser.parse_args() + + success = query_devices(show_p2p=not args.no_p2p) + + if success: + print("\nDone") + return 0 + else: + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/1_GettingStarted/deviceQuery/requirements.txt b/python/1_GettingStarted/deviceQuery/requirements.txt new file mode 100644 index 00000000..9da207a3 --- /dev/null +++ b/python/1_GettingStarted/deviceQuery/requirements.txt @@ -0,0 +1,4 @@ +# Device Query Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 diff --git a/python/1_GettingStarted/kernelNsysProfile/README.md b/python/1_GettingStarted/kernelNsysProfile/README.md new file mode 100644 index 00000000..2979fe8d --- /dev/null +++ b/python/1_GettingStarted/kernelNsysProfile/README.md @@ -0,0 +1,72 @@ +# Sample: Kernel Nsys Profiling - CUDA C++ Kernel Profiling with cuda.core (Python) + +## Description + +This sample demonstrates how to profile custom CUDA C++ kernels compiled and launched with `cuda.core` using NVIDIA Nsight Systems. It implements three GPU operations (vector addition, SAXPY, vector transform) as custom kernels and shows how to instrument code with NVTX markers for profiling analysis. + +## What you will learn + +- How to write and compile CUDA C++ kernels with `cuda.core.Program` +- How to launch kernels with `LaunchConfig` and manage CUDA streams +- How to use NVTX markers (`nvtx.annotate()`) to annotate code sections +- How to profile kernels with Nsight Systems and analyze performance +- Modern CUDA Python workflow with `cuda.core.Device` and proper resource cleanup + +## Requirements + +- NVIDIA GPU with Compute Capability 7.0+ +- CUDA Toolkit 13.0+ +- Python 3.10+ +- Packages: `numpy`, `cuda-python`, `cuda-core`, `cupy-cuda13x`, `nvtx` (see `requirements.txt`; NumPy >=2.3.2) + +**Install:** +```bash +pip install -r requirements.txt +``` + +## How to run + +```bash +python kernelNsysProfile.py +python kernelNsysProfile.py --array-size 10000000 # Custom size +``` + +## Nsys Profiling + +**Basic profile:** +```bash +nsys profile -o gpu_profile python kernelNsysProfile.py +nsys-ui gpu_profile.nsys-rep # View results +``` + +The program uses color-coded NVTX markers: +- **Purple**: Phase 2 (cuda.core Custom Kernels - main focus) +- **Yellow/Blue/Green**: Other phases +- **Cyan**: Nested operations + +Focus on Phase 2 to analyze kernel execution times, launch overhead, and GPU utilization. + +**For detailed Nsys usage and analysis techniques, see the [NVIDIA Nsight Systems documentation](https://docs.nvidia.com/nsight-systems/).** + +## Troubleshooting + +**Missing packages:** +```bash +pip install -r requirements.txt +``` + +**Out of memory:** +```bash +python kernelNsysProfile.py -n 10000000 # Reduce array size +``` + +**Nsys not found:** +```bash +export PATH=/usr/local/cuda/bin:$PATH +``` + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [NVIDIA Nsight Systems Documentation](https://docs.nvidia.com/nsight-systems/) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/1_GettingStarted/kernelNsysProfile/kernelNsysProfile.py b/python/1_GettingStarted/kernelNsysProfile/kernelNsysProfile.py new file mode 100644 index 00000000..0c599175 --- /dev/null +++ b/python/1_GettingStarted/kernelNsysProfile/kernelNsysProfile.py @@ -0,0 +1,327 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Kernel Nsys Profiling Sample - CUDA C++ Kernel Profiling with cuda.core + +This sample demonstrates how to profile custom CUDA C++ kernels compiled and +launched with cuda.core using NVIDIA Nsight Systems. + +The sample implements three common GPU operations as custom CUDA C++ kernels: +- Vector addition: c = a + b +- SAXPY: y = alpha * x + y +- Vector transform: sqrt(x*x + 1) + sin(x) + +Use Nsight Systems to analyze: +- Custom kernel execution times +- Kernel launch patterns and overhead +- GPU utilization and memory access patterns +- NVTX markers for structured profiling + +Workflow: +- Phase 1: Create GPU arrays +- Phase 2: Compile and execute cuda.core custom kernels (profiling focus) +- Phase 3: Verify correctness with CuPy reference implementation +- Phase 4: Validate results +""" + +import argparse +import sys +from pathlib import Path + +try: + import cupy as cp + import numpy as np + import nvtx + from cuda.core import Device, LaunchConfig, launch +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result # noqa: E402 + +# CUDA C++ kernel definitions +# For larger projects, separating kernels into a separate file is also valid. +KERNELS_CODE = """ +template +__global__ void vector_add(const T* a, const T* b, T* c, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + c[i] = a[i] + b[i]; + } +} + +template +__global__ void saxpy(const T alpha, const T* x, T* y, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + y[i] = alpha * x[i] + y[i]; + } +} + +template +__global__ void vector_transform(const T* a, T* b, size_t N) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) { + T val = a[i]; + b[i] = sqrt(val * val + T(1.0)) + sin(val); + } +} +""" + + +def get_cuda_core_kernels(device): + """ + Compile cuda.core kernels and return them. + + Args: + device: cuda.core.Device object + + Returns: + dict: Dictionary of compiled kernels + """ + from cuda.core import Program, ProgramOptions + + # Compile all kernels at once + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(KERNELS_CODE, code_type="c++", options=program_options) + mod = prog.compile( + "cubin", + name_expressions=( + "vector_add", + "saxpy", + "vector_transform", + ), + ) + + # Extract individual kernels + return { + "vector_add": mod.get_kernel("vector_add"), + "saxpy": mod.get_kernel("saxpy"), + "vector_transform": mod.get_kernel("vector_transform"), + } + + +def run(size): + """Main execution function""" + + # ================================================================= + # Device Initialization using cuda.core + # ================================================================= + with nvtx.annotate("Device Initialization", color="green"): + try: + # Create device object (defaults to device 0) + dev = Device() + dev.set_current() + + print() + print(f"Device: {dev.name}") + print(f"Compute Capability: sm_{dev.arch}") + print() + + # Synchronize device + dev.sync() + + except Exception as e: + print("ERROR: CUDA initialization failed!") + print(f"Error: {e}") + sys.exit(1) + + print("Profiling cuda.core Custom Kernels") + print(f"Array size: {size:,}\n") + + # Constant for SAXPY operation + alpha = 2.5 + + # Initialize random seed + rng = cp.random.default_rng(42) + + # ================================================================= + # Phase 1: Create GPU Arrays with CuPy + # ================================================================= + with nvtx.annotate("Create GPU Arrays", color="yellow"): + a_gpu = rng.standard_normal(size, dtype=cp.float32) + b_gpu = rng.standard_normal(size, dtype=cp.float32) + dev.sync() + + print("Phase 1: Created arrays on GPU with CuPy") + print(f" Array shape: {a_gpu.shape}") + print(f" Array dtype: {a_gpu.dtype}") + print( + f" Array a - Mean: {float(cp.mean(a_gpu)):.4f}, " + f"Std: {float(cp.std(a_gpu)):.4f}" + ) + print( + f" Array b - Mean: {float(cp.mean(b_gpu)):.4f}, " + f"Std: {float(cp.std(b_gpu)):.4f}\n" + ) + + # ================================================================= + # Phase 2: cuda.core Custom Kernels on GPU + # ================================================================= + with nvtx.annotate("cuda.core Custom Kernels", color="purple"): + print("Phase 2: cuda.core custom CUDA C++ kernels on GPU") + + # Create a stream for cuda.core operations + stream = dev.create_stream() + try: + with nvtx.annotate("Compile Kernels", color="cyan"): + kernels_dict = get_cuda_core_kernels(dev) + stream.sync() + print("Compiled custom CUDA C++ kernels") + + # Prepare launch configuration + # Grid-stride loops in kernels handle any grid size robustly + block = 256 + grid = (size + block - 1) // block + config = LaunchConfig(grid=grid, block=block) + + # Execute cuda.core vector_add kernel + with nvtx.annotate("Vector Add (cuda.core)", color="cyan"): + c_cuda = cp.empty_like(a_gpu) + launch( + stream, + config, + kernels_dict["vector_add"], + a_gpu.data.ptr, + b_gpu.data.ptr, + c_cuda.data.ptr, + cp.uint64(size), + ) + stream.sync() + + # Execute cuda.core SAXPY kernel + with nvtx.annotate("SAXPY (cuda.core)", color="cyan"): + y_cuda = b_gpu.copy() + launch( + stream, + config, + kernels_dict["saxpy"], + np.float32(alpha), + a_gpu.data.ptr, + y_cuda.data.ptr, + cp.uint64(size), + ) + stream.sync() + + # Execute cuda.core vector_transform kernel + with nvtx.annotate("Vector Transform (cuda.core)", color="cyan"): + transform_cuda = cp.empty_like(a_gpu) + launch( + stream, + config, + kernels_dict["vector_transform"], + a_gpu.data.ptr, + transform_cuda.data.ptr, + cp.uint64(size), + ) + stream.sync() + + print("Vector Addition (custom kernel)") + print("SAXPY (custom kernel)") + print("Vector Transform (custom kernel)\n") + finally: + stream.close() + + # ================================================================= + # Phase 3: Generate Reference Results with CuPy (for verification) + # ================================================================= + with nvtx.annotate("Generate Reference Results", color="blue"): + print("Phase 3: Generate reference results for verification") + + with nvtx.annotate("Vector Add (Reference)", color="cyan"): + c_cupy = a_gpu + b_gpu + dev.sync() + + with nvtx.annotate("SAXPY (Reference)", color="cyan"): + y_cupy = alpha * a_gpu + b_gpu + dev.sync() + + with nvtx.annotate("Vector Transform (Reference)", color="cyan"): + transform_cupy = cp.sqrt(a_gpu * a_gpu + 1.0) + cp.sin(a_gpu) + dev.sync() + + print("Reference results generated\n") + + # ================================================================= + # Phase 4: Verify Kernel Correctness + # ================================================================= + with nvtx.annotate("Verification", color="green"): + print("Phase 4: Verify kernel correctness") + + # Verify custom kernels against reference results + # Use relaxed tolerances for single-precision float comparisons + # Small differences can occur due to instruction ordering and + # compiler optimizations + print(" Validating cuda.core kernels:") + + print(" Vector Add: ", end="") + vec_add_match = verify_array_result(c_cuda, c_cupy, rtol=1e-5, atol=1e-6) + + print(" SAXPY: ", end="") + saxpy_match = verify_array_result(y_cuda, y_cupy, rtol=1e-5, atol=1e-6) + + print(" Transform: ", end="") + transform_match = verify_array_result( + transform_cuda, transform_cupy, rtol=1e-5, atol=1e-6 + ) + + all_pass = vec_add_match and saxpy_match and transform_match + + if not all_pass: + print("\n ERROR: Kernel verification failed!") + return 1 + print() + + # Final synchronization + dev.sync() + print("The sample is complete PASSED!") + + +def main(): + parser = argparse.ArgumentParser( + description="Kernel Nsys Profiling - Profile custom CUDA C++ " + "kernels with cuda.core" + ) + parser.add_argument( + "-n", + "--array-size", + type=int, + default=50000, + metavar="N", + help="Array size (default: 50,000)", + ) + + args = parser.parse_args() + run(size=args.array_size) + + +if __name__ == "__main__": + main() diff --git a/python/1_GettingStarted/kernelNsysProfile/requirements.txt b/python/1_GettingStarted/kernelNsysProfile/requirements.txt new file mode 100644 index 00000000..8145c408 --- /dev/null +++ b/python/1_GettingStarted/kernelNsysProfile/requirements.txt @@ -0,0 +1,7 @@ +# Nsight System Kernels Profiling Sample - Requirements + +numpy>=2.3.2 +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +nvtx diff --git a/python/1_GettingStarted/numpyVsCupy/README.md b/python/1_GettingStarted/numpyVsCupy/README.md new file mode 100644 index 00000000..28a42848 --- /dev/null +++ b/python/1_GettingStarted/numpyVsCupy/README.md @@ -0,0 +1,73 @@ +# Sample: Numpy vs. Cupy (Python) + +## Description + +This sample demonstrates performance comparison between NumPy (CPU) and CuPy (GPU) for matrix multiplication operations. It benchmarks the execution time of matrix dot products on both CPU and GPU, showing the performance benefits of GPU acceleration for numerical computations. + +## What you will learn + +- How to set up and use CuPy for GPU-accelerated numerical computations. +- How to benchmark NumPy vs CuPy performance for matrix operations. +- How to transfer data between CPU (NumPy) and GPU (CuPy) memory using `cp.asarray()`. +- How to use CUDA device management with the cuda-core library. +- How to validate computational results between CPU and GPU implementations using `np.testing.assert_allclose()`. +- How to handle GPU warmup to avoid first-run overhead in benchmarking. +- How to create and manage explicit CUDA streams with `device.create_stream()`. +- How to properly cleanup streams with `stream.close()` in try/finally blocks. +- How to access GPU device information (name, compute capability). +- How to create timing context managers for performance measurement using CUDA events. + +## Key libraries + +- `numpy` +- `cupy` +- `cuda-core` + +## Key APIs + +**From cuda.core:** +- `Device()` – Get CUDA device object for specific GPU +- `device.create_stream()` – Create explicit CUDA stream +- `stream.close()` – Close and cleanup stream resources + +## Requirements +1. **NVIDIA Graphics Card** with CUDA support +2. **CUDA Drivers** installed on your system +3. **CUDA Toolkit** installed on your system +4. **Python 3.12 or newer** + +**Install packages:** +```bash +pip install -r requirements.txt +``` + +## How to run + +Basic usage: +```bash +# Pre-steps: +export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH +# Run from the Python directory: +cd /path/to/numpyVsCupy/Python +python -m 1_GettingStarted.numpyVsCupy.numpyVsCupy +``` + +With custom parameters: +```bash +python -m 1_GettingStarted.numpyVsCupy.numpyVsCupy --n_size 5000 +``` + +### Command line arguments + +- `--n_size`, `-n`: Size of the matrix (n * n) for benchmarking (default: 4096) + +## Expected Output +``` +Validation PASSED: NumPy and CuPy results match within tolerance +Demo completed successfully! +``` + +## Files +- `numpyVsCupy.py` – Python implementation +- `README.md` – This file +- `requirements.txt` – Required packages diff --git a/python/1_GettingStarted/numpyVsCupy/numpyVsCupy.py b/python/1_GettingStarted/numpyVsCupy/numpyVsCupy.py new file mode 100644 index 00000000..9a51cfc2 --- /dev/null +++ b/python/1_GettingStarted/numpyVsCupy/numpyVsCupy.py @@ -0,0 +1,141 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import contextlib +import sys +import time +from pathlib import Path + +try: + import cupy as cp + import numpy as np + from cuda.core import Device, EventOptions +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + + +@contextlib.contextmanager +def timer(message): + """CPU timing context manager.""" + start = time.time() + yield + end = time.time() + print(f"{message}: {(end - start):.6f} seconds") + + +@contextlib.contextmanager +def gpu_timer(message, stream): + """GPU timing context manager using cuda.core CUDA events.""" + event_options = EventOptions(enable_timing=True) + start_event = stream.record(options=event_options) + yield + end_event = stream.record(options=event_options) + end_event.sync() + + elapsed_time_ms = end_event - start_event # Returns milliseconds + elapsed_time_s = elapsed_time_ms / 1000.0 # Convert to seconds + print(f"{message}: {elapsed_time_s:.6f} seconds") + + +def warmup(): + # Pre-runs a simple GPU operation to avoid first-run overhead in benchmarking. + print("Warmup...") + a_cp = cp.ones((16, 16)) + b_cp = cp.ones((16, 16)) + result_cp = cp.dot(a_cp, b_cp) + return result_cp + + +def run(n): + # Benchmarks NumPy vs. CuPy matrix multiplication for n x n random arrays. + # Prints timing results. + + device = Device() # Use device 0 explicitly + device.set_current() + major, minor = device.compute_capability + print() + print(f"Device Name: {device.name}, SM: {major}.{minor}") + print() + + # Create explicit stream + stream = device.create_stream() + + try: + # Warm up GPU before measuring + warmup() + stream.sync() + + # Generate random matrices on CPU + a_np = np.random.rand(n, n) + b_np = np.random.rand(n, n) + + # NumPy dot product (CPU) + with timer(f"NumPy dot of {n}*{n} arrays"): + result_np = np.dot(a_np, b_np) + + # Transfer NumPy arrays to GPU (using events for timing) + with gpu_timer("Transfer arrays to GPU", stream): + a_cp = cp.asarray(a_np) + b_cp = cp.asarray(b_np) + + # CuPy dot product (GPU) - using events for accurate GPU timing + with gpu_timer(f"CuPy dot of {n}*{n} arrays", stream): + result_cp = cp.dot(a_cp, b_cp) + + print() + # Result validation + if not verify_array_result(result_np, result_cp.get()): + print( + "Validation FAILED: NumPy and CuPy results do not match " + "within tolerance" + ) + sys.exit(1) + + print("Validation PASSED: NumPy and CuPy results match within tolerance") + finally: + stream.close() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--n_size", "-n", default=4096, type=int, help="Size of the matrix(n * n)." + ) + args = parser.parse_args() + run(args.n_size) + print("Demo completed successfully!") + + +if __name__ == "__main__": + main() diff --git a/python/1_GettingStarted/numpyVsCupy/requirements.txt b/python/1_GettingStarted/numpyVsCupy/requirements.txt new file mode 100644 index 00000000..bd5f9171 --- /dev/null +++ b/python/1_GettingStarted/numpyVsCupy/requirements.txt @@ -0,0 +1,7 @@ +# Numpy vs. Cupy - Requirements +# Install with: pip install -r requirements.txt + +numpy>=2.3.2 +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/1_GettingStarted/simplePrint/README.md b/python/1_GettingStarted/simplePrint/README.md new file mode 100644 index 00000000..72a7d01d --- /dev/null +++ b/python/1_GettingStarted/simplePrint/README.md @@ -0,0 +1,263 @@ +# simplePrint - Printing from CUDA Kernels + +## Description + +This sample demonstrates how to use `printf()` inside CUDA kernels using **two different approaches**: + +1. **CUDA C++ kernels** compiled with `cuda.core.Program` - Full C++ features and control +2. **Numba CUDA kernels** - Pythonic kernel authoring using `numba.cuda.grid()` for modern indexing + +The sample shows basic device management, kernel compilation with inline CUDA C++ code, and multi-dimensional kernel launches (2D grid × 3D blocks) using modern CUDA Python. The Numba example demonstrates the recommended `numba.cuda.grid()` indexing style while also showing how it relates to classic CUDA C++ block/thread IDs. Both approaches use `cuda.core` APIs for stream management and synchronization, demonstrating interoperability. + +This is the Python equivalent of the C++ `simplePrintf` sample, enhanced with Numba CUDA examples. + +## Key Concepts + +CUDA Python (cuda.core), Numba CUDA, Kernel Compilation, Printf in Kernels, Multi-dimensional Launch, Pythonic GPU Programming, Modern Thread Indexing (grid()), Stream-based Execution, cuda.core/Numba Interoperability + +## CUDA APIs involved + +### [cuda.core (cuda-python)](https://nvidia.github.io/cuda-python/) + +- `Device()` - Device management +- `Device.create_stream()` - Create CUDA streams +- `Stream.sync()` - Synchronize stream execution +- `Program()` - Compile CUDA C++ kernels +- `LaunchConfig()` - Configure kernel launch +- `launch()` - Execute kernels on streams + +### [Numba CUDA](https://nvidia.github.io/numba-cuda/) + +- `@cuda.jit` - JIT compile Python functions to CUDA kernels +- `cuda.grid()` - Get global thread position (recommended modern approach) +- `cuda.blockIdx`, `cuda.threadIdx` - Thread/block indices (classic style) +- `cuda.gridDim`, `cuda.blockDim` - Grid/block dimensions +- **Note:** Uses `cuda.core` APIs for stream management (interoperability) + +### CUDA Kernel Functions + +- `printf()` - Print from device code (C++) +- `print()` - Print from device code (Numba, limited formatting) +- `blockIdx`, `threadIdx` - Thread/block indices +- `gridDim`, `blockDim` - Grid/block dimensions + +### What You Learn + +- Device initialization with `cuda.core.Device` +- Compiling CUDA C++ kernels with `Program` and `ProgramOptions` +- Writing Pythonic CUDA kernels with Numba's `@cuda.jit` decorator +- Using `numba.cuda.grid()` for modern thread indexing (recommended approach) +- Understanding the relationship between global coordinates and classic block/thread IDs +- **Interoperability**: Using `cuda.core` streams with Numba CUDA kernels +- Comparing CUDA C++ vs Pythonic kernel authoring approaches +- Multi-dimensional kernel launches (2D grid, 3D blocks) +- Using streams for kernel execution and synchronization +- Using `printf()` and `print()` in GPU kernels for debugging +- Understanding print limitations in Numba CUDA (no f-strings) +- Proper error handling and resource management + +## Requirements + +### Hardware: + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Minimum GPU memory: 512 MB + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- `cuda-python` package (13.0+) +- `cuda-core` package (>=0.6.0) +- `numba-cuda` package (0.24.0+, for Pythonic kernel authoring) + +Download and install: +- [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) +- [cuda-python package](https://nvidia.github.io/cuda-python/): `pip install cuda-python` +- [numba-cuda](https://nvidia.github.io/numba-cuda/): `pip install numba-cuda` + +## Build and Run + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run the sample +python simplePrint.py +``` + +## Expected Output + +``` +Simple Print - Printing from CUDA Kernels +Demonstrating both CUDA C++ and Numba CUDA approaches + +Device: +Compute Capability: sm_ + +====================================================================== +METHOD 1: CUDA C++ Kernel (via cuda.core.Program) +====================================================================== +Advantage: Full C++ features, better for complex kernels + +Compiling CUDA C++ kernel... +Kernel compiled successfully. + +Kernel configuration: + Grid: (2, 2) + Block: (2, 2, 2) + Total threads: 32 + +Launching kernel with value=10. Output: + +[0, 0]: Value is: 10 +[0, 1]: Value is: 10 +[0, 2]: Value is: 10 +[0, 3]: Value is: 10 +[0, 4]: Value is: 10 +[0, 5]: Value is: 10 +[0, 6]: Value is: 10 +[0, 7]: Value is: 10 +[1, 0]: Value is: 10 +... +[3, 7]: Value is: 10 + +CUDA C++ kernel execution complete. + + +====================================================================== +METHOD 2: Numba CUDA Kernel (Pythonic / modern indexing) +====================================================================== +Advantage: Uses numba.cuda.grid(3) for global indexing, + while still showing classic CUDA C++ IDs for reference. + Uses cuda.core for stream management (interoperability). + +Kernel configuration: + Grid: (2, 2) + Block: (2, 2, 2) + Total threads: 32 + +Launching Numba CUDA kernel (grid(3) + classic IDs) with value=10: +Uses numba.cuda.grid(3) to get global (x, y, z), +and prints the corresponding blockId/threadId like the C++ sample. +Stream managed by cuda.core for consistency with C++ example. + +global[ 0 , 0 , 0 ] -> [ 0 , 0 ]: Value is: 10 +global[ 1 , 0 , 0 ] -> [ 0 , 1 ]: Value is: 10 +global[ 0 , 1 , 0 ] -> [ 0 , 2 ]: Value is: 10 +... +global[ 3 , 3 , 1 ] -> [ 3 , 7 ]: Value is: 10 + +Numba CUDA kernel execution complete. + +====================================================================== +Done! Both kernel approaches demonstrated successfully. +====================================================================== +``` + +## Understanding the Output + +- **Grid**: 2×2 = 4 blocks (labeled 0-3) +- **Block**: 2×2×2 = 8 threads per block (labeled 0-7) +- **Total**: 32 threads, each printing its position and value + +### CUDA C++ Kernel: +Each thread calculates: +- Block ID (linear): `blockIdx.y * gridDim.x + blockIdx.x` +- Thread ID (linear): `threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x` + +### Numba CUDA Kernel: +Each thread shows: +- **Global position** using `numba.cuda.grid(3)` → `(x, y, z)` coordinates across entire grid +- **Classic IDs** (block ID, thread ID) calculated the same way as C++ for comparison +- This demonstrates how modern indexing relates to traditional CUDA C++ style + +### Comparing the Two Approaches + +**CUDA C++ Kernel (Method 1):** +- Uses C++ syntax and `printf()` with full formatting control +- Requires compilation via `cuda.core.Program` +- Best for complex kernels needing C++ features (templates, libraries, etc.) +- Uses classic block/thread ID indexing +- Output: `[0, 0]: Value is: 10` (clean formatting) + +**Numba CUDA Kernel (Method 2):** +- Uses Python syntax with `@cuda.jit` decorator +- JIT compiled automatically when called +- Best for prototyping and simpler kernels +- **Modern indexing**: Uses `numba.cuda.grid(3)` to get global thread coordinates (recommended) +- Also shows classic block/thread IDs to help relate the two indexing models +- **Interoperability**: Uses `cuda.core` streams via `stream` for consistency +- Demonstrates that numba-cuda kernels can work seamlessly with cuda.core infrastructure +- Limited print formatting (no f-strings, basic `print()` only; adds spaces between arguments) +- Output: `global[ 0 , 0 , 0 ] -> [ 0 , 0 ]: Value is: 10` (shows both indexing styles; note extra spaces due to `print()` behavior) + +## Experiments + +Try modifying: + +### For Both Approaches: +- **Grid size**: Change `grid=(4, 4)` for 16 blocks +- **Block size**: Change `block=(4, 4, 4)` for 64 threads per block +- **Conditional printing**: Print only from specific threads (e.g., `if threadId == 0:`) + +### CUDA C++ Specific: +- **Format strings**: Experiment with different `printf()` formats +- **Kernel code**: Add complex C++ computations before printing +- **External libraries**: Include CUDA math libraries or device functions (e.g., ``, ``) + +### Numba CUDA Specific: +- **Grid indexing**: Try `numba.cuda.grid(1)` or `numba.cuda.grid(2)` for different dimensions +- **Conditional printing**: Print only from threads where `x == 0` or `y == z` +- **Python operations**: Use NumPy-like operations in the kernel +- **Device math libraries**: Use [nvmath-python device APIs](https://docs.nvidia.com/cuda/nvmath-python/latest/device-apis/index.html) for optimized math operations (similar to CUDA math libraries in C++) +- **Shared memory**: Add `numba.cuda.shared.array()` for fast inter-thread communication +- **Atomic operations**: Try `numba.cuda.atomic.add()` for thread-safe updates +- **Print variations**: Experiment with what numba-cuda's `print()` can and cannot handle +- **Streams**: Create multiple `cuda.core` streams and launch numba-cuda kernels on them concurrently +- **Interoperability**: Mix numba-cuda kernels and CUDA C++ kernels on the same stream + +## Notes + +### General: +- Printing from GPU is relatively slow - use sparingly in production code +- Printf output is buffered and limited (~1MB buffer on most GPUs) + +### CUDA C++ Kernels: +- Always call `stream.sync()` after kernel launch to flush printf output +- Full `printf()` format string support (%, flags, width, precision) + +### Numba CUDA Kernels: +- **Recommended**: Use `numba.cuda.grid(ndim)` for thread indexing (modern, Pythonic) + - `grid(1)` for 1D indexing, `grid(2)` for 2D, `grid(3)` for 3D + - Returns global thread position across the entire grid +- **Interoperability**: Use `cuda.core` streams with Numba kernels via `stream` + - Create streams: `stream = device.create_stream()` + - Launch kernels: `kernel[grid, block, stream](args)` + - Synchronize: `stream.sync()` +- Numba's `print()` has limited capabilities compared to Python's `print()` +- F-strings are NOT supported in Numba CUDA kernels +- Use comma-separated arguments: `print("Value:", x)` instead of f-strings +- **Note**: `print()` automatically adds spaces between comma-separated arguments (e.g., `print("[", x, "]")` outputs `[ 0 ]` not `[0]`) +- Always synchronize the stream to flush output + +## Files + +- `simplePrint.py` - Python implementation using cuda.core API +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +### CUDA Python (cuda.core): +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/) +- [CUDA Python Examples](https://github.com/NVIDIA/cuda-python/tree/main/cuda_core/examples) + +### Numba CUDA: +- [Numba CUDA Documentation](https://nvidia.github.io/numba-cuda/) +- [numba.cuda.grid() Reference](https://nvidia.github.io/numba-cuda/reference/kernel.html#numba.cuda.grid) +- [nvmath-python Device APIs](https://docs.nvidia.com/cuda/nvmath-python/latest/device-apis/index.html) - Optimized math operations for Numba CUDA kernels + +### CUDA References: +- [CUDA C Programming Guide - Printf](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output) +- [C++ simplePrintf Sample](https://github.com/NVIDIA/cuda-samples/tree/master/Samples/0_Introduction/simplePrintf) diff --git a/python/1_GettingStarted/simplePrint/requirements.txt b/python/1_GettingStarted/simplePrint/requirements.txt new file mode 100644 index 00000000..7f53de68 --- /dev/null +++ b/python/1_GettingStarted/simplePrint/requirements.txt @@ -0,0 +1,7 @@ +# Simple Printf Sample - Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +# Numba JIT uses nvJitLink from pip; keep in step with cuda-bindings (e.g. 13.2.x). +nvidia-nvjitlink>=13.2.0 +numba-cuda>=0.24.0 diff --git a/python/1_GettingStarted/simplePrint/simplePrint.py b/python/1_GettingStarted/simplePrint/simplePrint.py new file mode 100644 index 00000000..44107bbf --- /dev/null +++ b/python/1_GettingStarted/simplePrint/simplePrint.py @@ -0,0 +1,287 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Simple Print - Printing from CUDA Kernels + +This sample demonstrates how to print output from CUDA kernels using printf(). +It shows: +1. Device management with cuda.core.Device +2. Compiling CUDA C++ code that uses printf() +3. Launching kernels with 2D grids and 3D blocks +4. Seeing kernel output printed to stdout +5. Using Numba CUDA for Pythonic kernel authoring + +This sample demonstrates both approaches: +- CUDA C++ kernels compiled via cuda.core.Program (more control, C++ features) +- Numba CUDA kernels (more Pythonic, easier to write) + +This is the Python equivalent of the C++ simplePrintf sample. +""" + +import sys +import traceback + +try: + from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + +try: + from numba import cuda as numba_cuda + + NUMBA_AVAILABLE = True +except ImportError: + NUMBA_AVAILABLE = False + print("Warning: numba not found. Numba CUDA example will be skipped.") + print("To install: pip install numba") + + +# CUDA C++ kernel with printf +# This kernel prints the block index, thread index, and a value from each thread +PRINTF_KERNEL = """ +extern "C" +__global__ void printKernel(int val) { + // Calculate linear block index from 2D grid + int blockId = blockIdx.y * gridDim.x + blockIdx.x; + + // Calculate linear thread index from 3D block + int threadId = threadIdx.z * blockDim.x * blockDim.y + + threadIdx.y * blockDim.x + + threadIdx.x; + + // Print from each thread + printf("[%d, %d]:\\t\\tValue is: %d\\n", blockId, threadId, val); +} +""" + + +# Numba CUDA kernel - Pythonic equivalent using numba.cuda.grid() +# This demonstrates the same functionality using Numba's Python-based kernel syntax +if NUMBA_AVAILABLE: + + @numba_cuda.jit + def numba_print_kernel(val): + """ + Numba CUDA kernel showing the *recommended* grid() indexing style, + while also relating it to the classic CUDA C++ blockId/threadId. + + - Primary view: global 3D coordinates from numba.cuda.grid(3) + (modern, Pythonic way to index work for a 3D thread layout). + - Secondary view: linear blockId / threadId matching the CUDA C++ + printf sample, to help CUDA C++ users connect the two models. + """ + # Modern / recommended view: global 3D thread coordinates + x, y, z = numba_cuda.grid(3) + + # Classic CUDA-style indices, same formulas as the C++ sample + block_id = numba_cuda.blockIdx.y * numba_cuda.gridDim.x + numba_cuda.blockIdx.x + + thread_id = ( + numba_cuda.threadIdx.z * numba_cuda.blockDim.x * numba_cuda.blockDim.y + + numba_cuda.threadIdx.y * numba_cuda.blockDim.x + + numba_cuda.threadIdx.x + ) + + # Print both views side-by-side + # Note: Numba print() adds spaces between comma-separated args + print( + "global[", + x, + ",", + y, + ",", + z, + "] -> [", + block_id, + ",", + thread_id, + "]:\t\tValue is:", + val, + ) + + +def run_cuda_cpp_kernel(device, test_value=10): + """ + Demonstrate printing from CUDA C++ kernel compiled with cuda.core. + + This approach gives you full access to CUDA C++ features and allows + for more complex kernel implementations. + """ + print("=" * 70) + print("METHOD 1: CUDA C++ Kernel (via cuda.core.Program)") + print("=" * 70) + print("Advantage: Full C++ features, better for complex kernels") + print() + + # Compile the kernel + print("Compiling CUDA C++ kernel...") + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(PRINTF_KERNEL, code_type="c++", options=program_options) + mod = prog.compile("cubin", name_expressions=("printKernel",)) + kernel = mod.get_kernel("printKernel") + print("Kernel compiled successfully.\n") + + # Create stream for kernel execution + stream = device.create_stream() + + # Configure kernel launch + # Using 2D grid (2x2) and 3D blocks (2x2x2) + grid_x, grid_y = 2, 2 + block_x, block_y, block_z = 2, 2, 2 + + print("Kernel configuration:") + print(f" Grid: ({grid_x}, {grid_y})") + print(f" Block: ({block_x}, {block_y}, {block_z})") + print(f" Total threads: {grid_x * grid_y * block_x * block_y * block_z}") + print() + + # Launch configuration with 2D grid and 3D block + config = LaunchConfig(grid=(grid_x, grid_y), block=(block_x, block_y, block_z)) + + print(f"Launching kernel with value={test_value}. Output:\n") + try: + # Launch kernel + launch(stream, config, kernel, test_value) + + # Synchronize to ensure printf output is flushed + stream.sync() + + print("\nCUDA C++ kernel execution complete.") + except Exception as e: + print(f"\nError during kernel execution: {e}") + traceback.print_exc() + return 1 + finally: + # Cleanup + stream.close() + + return 0 + + +def run_numba_kernel(device, test_value=10): + """ + Demonstrate printing from a Numba CUDA kernel. + + This example uses numba.cuda.grid(3) as the primary indexing mechanism + (recommended modern style), and also prints the equivalent blockId / + threadId used in the CUDA C++ printf sample for side-by-side comparison. + + Uses cuda.core APIs for stream management, demonstrating interoperability + between Numba CUDA kernels and cuda.core infrastructure. + """ + print("\n") + print("=" * 70) + print("METHOD 2: Numba CUDA Kernel (Pythonic / modern indexing)") + print("=" * 70) + print("Advantage: Uses numba.cuda.grid(3) for global indexing,") + print(" while still showing classic CUDA C++ IDs for reference.") + print(" Uses cuda.core for stream management (interoperability).") + print() + + # Same launch configuration as the C++ version + grid_x, grid_y = 2, 2 + block_x, block_y, block_z = 2, 2, 2 + + print("Kernel configuration:") + print(f" Grid: ({grid_x}, {grid_y})") + print(f" Block: ({block_x}, {block_y}, {block_z})") + print(f" Total threads: {grid_x * grid_y * block_x * block_y * block_z}") + print() + + # Use cuda.core stream (same as C++ example) instead of numba.cuda.stream() + stream = device.create_stream() + + print(f"Launching Numba kernel (grid(3) + classic IDs) with value={test_value}:") + print("Uses numba.cuda.grid(3) to get global (x, y, z),") + print("and prints the corresponding blockId/threadId like the C++ sample.") + print("Stream managed by cuda.core for consistency with C++ example.\n") + + try: + # Launch Numba kernel on cuda.core stream + numba_print_kernel[(grid_x, grid_y), (block_x, block_y, block_z), stream]( + test_value + ) + + # Synchronize cuda.core stream (same as C++ example) + stream.sync() + print("\nNumba CUDA kernel execution complete.") + except Exception as e: + print(f"\nError during Numba kernel execution: {e}") + traceback.print_exc() + return 1 + finally: + # Cleanup + stream.close() + + return 0 + + +def main(): + """Main function demonstrating printing from CUDA kernels using both approaches""" + + print("Simple Print - Printing from CUDA Kernels") + print("Demonstrating both CUDA C++ and Numba CUDA approaches") + print() + # Initialize device + device = Device() + device.set_current() + + # Get device properties + print(f"Device: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + print() + + # Value to pass to both kernels + test_value = 10 + + # Run CUDA C++ kernel + result = run_cuda_cpp_kernel(device, test_value) + if result != 0: + return result + + # Run Numba kernel if available + if NUMBA_AVAILABLE: + result = run_numba_kernel(device, test_value) + if result != 0: + return result + else: + print("\n" + "=" * 70) + print("Numba CUDA example skipped (numba not installed)") + print("To run the Numba example: pip install numba") + print("=" * 70) + + print("\n" + "=" * 70) + print("Done! Both kernel approaches demonstrated successfully.") + print("=" * 70) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/1_GettingStarted/systemInfo/README.md b/python/1_GettingStarted/systemInfo/README.md new file mode 100644 index 00000000..6e15f04f --- /dev/null +++ b/python/1_GettingStarted/systemInfo/README.md @@ -0,0 +1,152 @@ +# Sample: System Information Query (Python) + +## Description + +This sample demonstrates how to inspect the CUDA driver, NVML, and every +installed GPU through the +[`cuda.core.system`](https://nvidia.github.io/cuda-python/cuda-core/latest/) +module. + +`cuda.core.system` wraps the NVIDIA Management Library (NVML) and can be +imported without CUDA being installed or initialized, so it is useful as a +lightweight pre-flight check before any CUDA context is created. The script +prints driver and NVML versions, the current process name, per-device +metadata (name, compute capability, architecture, memory, PCI info, +temperature, performance state), and, on multi-GPU systems, the topology +and peer-to-peer capabilities between each pair of devices. + +## What You'll Learn + +- Querying CUDA driver and NVML versions with `cuda.core.system` +- Enumerating GPUs without creating a CUDA context +- Reading per-device metadata exposed by NVML (name, UUID, memory usage, + temperature, performance state) +- Inspecting GPU-to-GPU topology and peer-to-peer (P2P) capabilities + +## Key Libraries + +- [`cuda.core.system`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - Python wrapper around NVML + +## Key APIs + +From `cuda.core.system`: + +- `get_driver_version()`, `get_driver_version_full()`, `get_driver_branch()` - CUDA driver version tuple and branch string +- `get_nvml_version()` - NVML library version +- `get_num_devices()` - number of GPUs visible to NVML +- `get_process_name(pid)` - process name for a given PID +- `Device(index=...)` - NVML-backed device handle (no CUDA context required) + - `name`, `uuid`, `cuda_compute_capability`, `arch`, `brand` + - `memory_info` (`total`, `used`, `free`) + - `pci_info` (`domain`, `bus`, `device`, `bus_id`) + - `temperature.sensor(TemperatureSensors.TEMPERATURE_GPU)` + - `performance_state` +- `get_topology_common_ancestor(dev0, dev1)` - `GpuTopologyLevel` between two devices +- `get_p2p_status(dev0, dev1, GpuP2PCapsIndex.P2P_CAPS_INDEX_READ)` - peer-access capability between two devices + +Import stable symbols from the top-level `cuda.core` package (not `cuda.core.experimental`). + +## Requirements + +1. **NVIDIA Graphics Card** with CUDA support +2. **CUDA Drivers** installed on your system +3. **CUDA Toolkit** installed on your system +4. **Python 3.12 or newer** + +### Hardware + +- One or more NVIDIA GPUs +- Driver compatible with `cuda-python` 13.x + +### Software + +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/1_GettingStarted/systemInfo +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/1_GettingStarted/systemInfo +python systemInfo.py +``` + +### Skip topology queries + +Useful on machines with only one GPU or to shorten the output: + +```bash +python systemInfo.py --no-topology +``` + +## Expected Output + +Output varies with your hardware. On a machine with two GPUs you should see +something like: + +``` +====================================================================== +Driver / NVML +====================================================================== +CUDA driver version: 13.2 +CUDA driver version (full): (13, 2, 0) +NVML version: (13, 595, 58, 3) +Driver branch: r595_88 +Current process: /usr/bin/python + +====================================================================== +Devices detected: 2 +====================================================================== + +-- Device 0 -- +Name: +UUID: ... +Compute capability: 8.9 +Architecture: ADA +Brand: BRAND_GEFORCE +Memory: total=23.99 GiB, used=960.00 KiB, free=23.52 GiB +PCI: domain=0000 bus=41 device=00 id=00000000:41:00.0 +Temperature (GPU sensor): 47 C +Performance state: + +... + +====================================================================== +GPU topology and peer-to-peer +====================================================================== +Device 0 <-> Device 1: topology=TOPOLOGY_HOSTBRIDGE, p2p_read=..., p2p_write=... + +Done +``` + +**Note:** Device names, compute capability, temperatures, and topology +details will vary based on your GPUs and system. + +## Files + +- `systemInfo.py` - Python implementation using `cuda.core.system` +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`cuda.core.system` API reference](https://nvidia.github.io/cuda-python/cuda-core/latest/api.html) +- [NVML reference](https://docs.nvidia.com/deploy/nvml-api/) diff --git a/python/1_GettingStarted/systemInfo/requirements.txt b/python/1_GettingStarted/systemInfo/requirements.txt new file mode 100644 index 00000000..79fef8ab --- /dev/null +++ b/python/1_GettingStarted/systemInfo/requirements.txt @@ -0,0 +1,4 @@ +# System Information Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 diff --git a/python/1_GettingStarted/systemInfo/systemInfo.py b/python/1_GettingStarted/systemInfo/systemInfo.py new file mode 100644 index 00000000..dd4b28ec --- /dev/null +++ b/python/1_GettingStarted/systemInfo/systemInfo.py @@ -0,0 +1,215 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +System Information via cuda.core.system (NVML) + +Demonstrates the ``cuda.core.system`` module, which wraps NVIDIA Management +Library (NVML) functionality. + +This sample prints: + * Driver and NVML versions + * Current process name + * Per-device: name, UUID, compute capability / arch, PCI info, memory usage, + temperature, performance state + * GPU-to-GPU topology and peer-to-peer status (when more than one GPU) +""" + +import os +import sys + +try: + from cuda.core import system + from cuda.core.system import ( + CUDA_BINDINGS_NVML_IS_COMPATIBLE, + GpuP2PCapsIndex, + TemperatureSensors, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +def print_header(title: str) -> None: + print() + print("=" * 70) + print(title) + print("=" * 70) + + +def format_bytes(nbytes: int) -> str: + """Format a byte count as a human-readable string.""" + units = ["B", "KiB", "MiB", "GiB", "TiB"] + size = float(nbytes) + for unit in units: + if size < 1024.0: + return f"{size:.2f} {unit}" + size /= 1024.0 + return f"{size:.2f} PiB" + + +def print_driver_info() -> None: + print_header("Driver / NVML") + major, minor = system.get_driver_version() + print(f"CUDA driver version: {major}.{minor}") + print(f"CUDA driver version (full): {system.get_driver_version_full()}") + if CUDA_BINDINGS_NVML_IS_COMPATIBLE: + print(f"NVML version: {system.get_nvml_version()}") + try: + print(f"Driver branch: {system.get_driver_branch()}") + except Exception as e: # noqa: BLE001 - driver branch is informational + print(f"Driver branch: unavailable ({e})") + else: + print( + "NVML bindings are not compatible with this driver; " + "device info will be limited." + ) + print(f"Current process: {system.get_process_name(os.getpid())}") + + +def print_device_info(device: "system.Device") -> None: + print(f"\n-- Device {device.index} --") + print(f"Name: {device.name}") + print(f"UUID: {device.uuid}") + try: + cc_major, cc_minor = device.cuda_compute_capability + print(f"Compute capability: {cc_major}.{cc_minor}") + except Exception as e: # noqa: BLE001 + print(f"Compute capability: unavailable ({e})") + try: + print(f"Architecture: {device.arch.name}") + except Exception as e: # noqa: BLE001 + print(f"Architecture: unavailable ({e})") + try: + print(f"Brand: {device.brand.name}") + except Exception as e: # noqa: BLE001 + print(f"Brand: unavailable ({e})") + + # Memory + try: + mem = device.memory_info + print( + f"Memory: total={format_bytes(mem.total)}, " + f"used={format_bytes(mem.used)}, " + f"free={format_bytes(mem.free)}" + ) + except Exception as e: # noqa: BLE001 + print(f"Memory: unavailable ({e})") + + # PCI + try: + pci = device.pci_info + print( + f"PCI: domain={pci.domain:04x} bus={pci.bus:02x} " + f"device={pci.device:02x} id={pci.bus_id}" + ) + except Exception as e: # noqa: BLE001 + print(f"PCI: unavailable ({e})") + + # Temperature (GPU sensor) + try: + temp_c = device.temperature.sensor(TemperatureSensors.TEMPERATURE_GPU) + print(f"Temperature (GPU sensor): {temp_c} C") + except Exception as e: # noqa: BLE001 + print(f"Temperature: unavailable ({e})") + + # Performance state + try: + pstate = device.performance_state + print(f"Performance state: {pstate}") + except Exception as e: # noqa: BLE001 + print(f"Performance state: unavailable ({e})") + + +def print_topology(devices: list) -> None: + if len(devices) < 2: + return + print_header("GPU topology and peer-to-peer") + for i, d0 in enumerate(devices): + for d1 in devices[i + 1 :]: + try: + level = system.get_topology_common_ancestor(d0, d1) + level_name = level.name + except Exception as e: # noqa: BLE001 + level_name = f"unavailable ({e})" + try: + read = system.get_p2p_status( + d0, d1, GpuP2PCapsIndex.P2P_CAPS_INDEX_READ + ) + write = system.get_p2p_status( + d0, d1, GpuP2PCapsIndex.P2P_CAPS_INDEX_WRITE + ) + read_name = read.name + write_name = write.name + except Exception as e: # noqa: BLE001 + read_name = write_name = f"unavailable ({e})" + print( + f"Device {d0.index} <-> Device {d1.index}: " + f"topology={level_name}, p2p_read={read_name}, p2p_write={write_name}" + ) + + +def main() -> int: + import argparse + + parser = argparse.ArgumentParser( + description="Print CUDA system / NVML information via cuda.core.system" + ) + parser.add_argument( + "--no-topology", + action="store_true", + help="Skip cross-device topology/P2P queries", + ) + args = parser.parse_args() + + print_driver_info() + + num_devices = system.get_num_devices() + print_header(f"Devices detected: {num_devices}") + if num_devices == 0: + print("No CUDA-capable devices found.") + return 0 + if not CUDA_BINDINGS_NVML_IS_COMPATIBLE: + print( + "NVML is not compatible with the installed driver; skipping device detail." + ) + return 0 + + devices = [system.Device(index=i) for i in range(num_devices)] + for device in devices: + print_device_info(device) + + if not args.no_topology: + print_topology(devices) + + print("\nDone") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/1_GettingStarted/vectorAdd/README.md b/python/1_GettingStarted/vectorAdd/README.md new file mode 100644 index 00000000..6ceee879 --- /dev/null +++ b/python/1_GettingStarted/vectorAdd/README.md @@ -0,0 +1,130 @@ +# Sample: Vector Addition (Python) + +## Description + +Run your first GPU kernel: add two vectors element-wise on the GPU using the [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) API with runtime compilation. + +## What You'll Learn + +- Writing CUDA kernels in C++ with template support +- Runtime compilation of CUDA kernels from Python +- Using `cuda.core` for device management, programs, and launches +- Configuring and launching kernels with grid and block dimensions +- Using CuPy for GPU memory management +- Verifying GPU results against CPU computation + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) — Pythonic access to CUDA runtime and compilation +- `cupy` — GPU array library for Python + +## Key APIs + +### From `cuda.core` + +- `Device` — Initialize and manage CUDA device +- `Program` — Create program from kernel source code +- `ProgramOptions` — Set compilation options (C++ standard, architecture) +- `LaunchConfig` — Configure kernel launch parameters +- `launch` — Execute kernel on specified stream + +Import stable symbols from the top-level package (not `cuda.core.experimental`). See the [cuda.core documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/). + +### From CuPy + +- `cp.random.rand()` — Generate random arrays on GPU +- `cp.empty()` — Allocate uninitialized GPU arrays +- `cp.allclose()` — Verify results with tolerance + +### From `cuda_samples_utils` + +- `verify_array_result()` — Verify computation results + +## Kernel Techniques + +- **1D Grid-Stride Loop** — Handle arbitrary array sizes with fixed grid +- **Template Programming** — Generic kernel for different data types +- **Bounds Checking** — Prevent out-of-bounds memory access + +## Requirements + +### Hardware + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Minimum GPU memory: 512 MB + +### Software + +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from requirements.txt: + +```bash +cd /path/to/cuda-samples/python/1_GettingStarted/vectorAdd +pip install -r requirements.txt +``` + +The requirements.txt installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd samples/python/1_GettingStarted/vectorAdd +python vectorAdd.py +``` + +### With custom parameters + +```bash +# Custom vector size +python vectorAdd.py --elements 1000000 + +# Use specific GPU +python vectorAdd.py --device 1 + +# Skip verification for benchmarking +python vectorAdd.py --no-verify +``` + +## Expected Output + +``` +[Vector addition using CUDA Core API] +Device: +Compute Capability: sm_ +Compiling kernel 'vectorAdd'... +Kernel compiled successfully +[Vector addition of 50000 elements] +CUDA kernel launch with 196 blocks of 256 threads +Verifying result... +Test PASSED + +Done +``` + +**Note:** Device name and compute capability will vary based on your GPU. + +## Files + +- `vectorAdd.py` — Python implementation using cuda.core API +- `README.md` — This file +- `requirements.txt` — Sample dependencies +- `../../Utilities/cuda_samples_utils.py` — Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [cuda.core API](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/1_GettingStarted/vectorAdd/requirements.txt b/python/1_GettingStarted/vectorAdd/requirements.txt new file mode 100644 index 00000000..06b950e4 --- /dev/null +++ b/python/1_GettingStarted/vectorAdd/requirements.txt @@ -0,0 +1,5 @@ +# Vector Addition Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/1_GettingStarted/vectorAdd/vectorAdd.py b/python/1_GettingStarted/vectorAdd/vectorAdd.py new file mode 100755 index 00000000..8f3f7ff0 --- /dev/null +++ b/python/1_GettingStarted/vectorAdd/vectorAdd.py @@ -0,0 +1,196 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Vector Addition using CUDA Core API + +This sample demonstrates element-wise vector addition: C = A + B +using cuda.core for runtime compilation and kernel launch. +""" + +import sys +from pathlib import Path + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result # noqa: E402 + +try: + import cupy as cp + from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# CUDA kernel source code +VECTOR_ADD_KERNEL = """ +/** + * CUDA Kernel for vector addition + * Computes the vector addition of A and B into C. + */ +template +__global__ void vectorAdd(const T *A, const T *B, T *C, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + C[i] = A[i] + B[i]; + } +} +""" + + +def vector_add_cuda_core(num_elements=50000, device_id=0, verify=True): + """ + Perform vector addition using cuda.core API. + + Parameters + ---------- + num_elements : int + Number of elements in each vector + device_id : int + CUDA device ID to use + verify : bool + Whether to verify the result + + Returns + ------- + bool + True if successful, False otherwise + """ + try: + # Initialize device + print("[Vector addition using CUDA Core API]") + device = Device(device_id) + device.set_current() + + print(f"Device: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + + stream = device.create_stream() + + # Compile kernel + print("Compiling kernel 'vectorAdd'...") + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + program = Program(VECTOR_ADD_KERNEL, code_type="c++", options=program_options) + module = program.compile("cubin", name_expressions=("vectorAdd",)) + kernel = module.get_kernel("vectorAdd") + print("Kernel compiled successfully") + + # Allocate and initialize vectors + print(f"[Vector addition of {num_elements} elements]") + dtype = cp.float32 + + a = cp.random.rand(num_elements).astype(dtype) + b = cp.random.rand(num_elements).astype(dtype) + c = cp.empty(num_elements, dtype=dtype) + + # Synchronize before kernel launch + device.sync() + + # Configure and launch kernel + threads_per_block = 256 + blocks_per_grid = (num_elements + threads_per_block - 1) // threads_per_block + + print( + f"CUDA kernel launch with {blocks_per_grid} blocks " + f"of {threads_per_block} threads" + ) + + config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) + + # Launch kernel + launch( + stream, + config, + kernel, + a.data.ptr, + b.data.ptr, + c.data.ptr, + cp.int32(num_elements), + ) + stream.sync() + + # Verify result + if verify: + print("Verifying result...") + expected = a + b + if not verify_array_result(c, expected): + return False + + return True + + except Exception as e: + print(f"Error: {e}") + import traceback + + traceback.print_exc() + return False + + +def main(): + """ + Main entry point for the vector addition sample. + """ + import argparse + + parser = argparse.ArgumentParser(description="Vector Addition using CUDA Core API") + parser.add_argument( + "--elements", + type=int, + default=50000, + help="Number of elements in vectors (default: 50000)", + ) + parser.add_argument( + "--device", type=int, default=0, help="CUDA device ID (default: 0)" + ) + parser.add_argument( + "--no-verify", action="store_true", help="Skip result verification" + ) + + args = parser.parse_args() + + if args.elements <= 0: + print("Error: Number of elements must be positive") + return 1 + + success = vector_add_cuda_core( + num_elements=args.elements, device_id=args.device, verify=not args.no_verify + ) + + if success: + print("\nDone") + return 0 + else: + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/blockwiseSum/README.md b/python/2_CoreConcepts/blockwiseSum/README.md new file mode 100644 index 00000000..19fcc922 --- /dev/null +++ b/python/2_CoreConcepts/blockwiseSum/README.md @@ -0,0 +1,102 @@ +# Sample: Block-wise Array Sum (Python) + +## Description + +Demonstrates fundamental CUDA thread cooperation: thread/block indexing, strided loops, and block-wise reduction using shared memory. This sample shows three progressively complex kernel patterns using the **cuda.core API**: + +1. **Simple indexing** - One thread per element +2. **Strided loop** - Each thread processes multiple elements +3. **Block partial sum** - Shared memory reduction within each block + +## What You'll Learn + +- How to calculate global thread ID from block and thread indices +- Strided loop pattern for processing arrays larger than grid size +- Block-level cooperation using shared memory and `__syncthreads()` + +## Key Concepts + +### Thread and Block Indexing + +``` +Global Thread ID = blockIdx.x * blockDim.x + threadIdx.x +Stride = blockDim.x * gridDim.x +``` + +### Strided Loop Pattern + +Each thread processes multiple elements, enabling fixed grid size for arbitrary array lengths: + +```c +for (size_t i = tid; i < N; i += stride) { + output[i] = input[i] * 2.0f; +} +``` + +## Key APIs + +### From `cuda.core`: + +- `Device` - Device management and context +- `Program` - Compile CUDA C++ kernels +- `ProgramOptions` - Kernel compilation options (architecture target) +- `LaunchConfig` - Configure grid/block dimensions and shared memory +- `launch()` - Execute kernel +- `EventOptions` - GPU timing configuration + +### From CuPy: + +- `cp.asarray()` - Transfer data to GPU +- `cp.zeros_like()` - Allocate GPU arrays + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See `requirements.txt` for Python packages + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python blockwiseSum.py +``` + +## Expected Output + +``` +Device: +Compute Capability: sm_XX +Array size: 1,048,576 elements + +Simple indexing: Test PASSED +Strided loop: Test PASSED +Block-wise sum: Test PASSED + +Kernel time: X.XXX ms, Bandwidth: XXX.X GB/s + +Done +``` + +## Files + +- `blockwiseSum.py` - Python implementation with CUDA kernels +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CUDA Shared Memory](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/2_CoreConcepts/blockwiseSum/blockwiseSum.py b/python/2_CoreConcepts/blockwiseSum/blockwiseSum.py new file mode 100644 index 00000000..8471bd14 --- /dev/null +++ b/python/2_CoreConcepts/blockwiseSum/blockwiseSum.py @@ -0,0 +1,259 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Block-wise Array Sum with Threaded Access + +Demonstrates thread/block indexing, strided loops, and block-wise reduction. + +Key Concepts: + Global Thread ID = blockIdx.x * blockDim.x + threadIdx.x + Stride = blockDim.x * gridDim.x +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Install with: pip install -r requirements.txt") + sys.exit(1) + + +KERNELS_CODE: str = r""" +// Each thread processes one element +extern "C" __global__ +void simple_indexing(const float* input, float* output, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < N) { + output[tid] = input[tid] * 2.0f; + } +} + +// Each thread processes multiple elements via strided access +extern "C" __global__ +void strided_loop(const float* input, float* output, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)blockDim.x * gridDim.x; + for (size_t i = tid; i < N; i += stride) { + output[i] = input[i] * 2.0f; + } +} + +// Block-wise partial sum with shared memory reduction +extern "C" __global__ +void block_partial_sum(const float* input, float* partial_sums, size_t N) { + extern __shared__ float sdata[]; + + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int local_tid = threadIdx.x; + size_t stride = (size_t)blockDim.x * gridDim.x; + + // Each thread accumulates multiple elements (strided) + float sum = 0.0f; + for (size_t i = tid; i < N; i += stride) { + sum += input[i]; + } + sdata[local_tid] = sum; + __syncthreads(); + + // Block-level tree reduction + for (int s = blockDim.x / 2; s > 0; s >>= 1) { + if (local_tid < s) { + sdata[local_tid] += sdata[local_tid + s]; + } + __syncthreads(); + } + + if (local_tid == 0) { + partial_sums[blockIdx.x] = sdata[0]; + } +} +""" + + +def run_sample(num_elements: int = 1024 * 1024, device_id: int = 0) -> bool: + """ + Run block-wise sum demonstration. + + Parameters + ---------- + num_elements : int + Number of array elements + device_id : int + CUDA device ID + + Returns + ------- + bool + True if all tests passed + """ + threads_per_block = 256 + num_blocks = 64 + + device = Device(device_id) + device.set_current() + stream = device.create_stream() + + arch = f"sm_{device.arch}" + print(f"Device: {device.name}") + print(f"Compute Capability: {arch}") + print(f"Array size: {num_elements:,} elements\n") + + try: + # Make CuPy use our stream + cp.cuda.ExternalStream(int(stream.handle)).use() + + # Compile kernels + program = Program( + KERNELS_CODE, code_type="c++", options=ProgramOptions(arch=arch) + ) + module = program.compile(target_type="cubin") + kernel_simple = module.get_kernel("simple_indexing") + kernel_strided = module.get_kernel("strided_loop") + kernel_sum = module.get_kernel("block_partial_sum") + + # Test data + h_input = np.arange(num_elements, dtype=np.float32) + d_input = cp.asarray(h_input) + d_output = cp.zeros_like(d_input) + expected = cp.asarray(h_input * 2.0) + + # Demo 1: Simple indexing (1 thread = 1 element) + full_blocks = (num_elements + threads_per_block - 1) // threads_per_block + config = LaunchConfig(grid=full_blocks, block=threads_per_block) + launch( + stream, + config, + kernel_simple, + d_input.data.ptr, + d_output.data.ptr, + cp.uint64(num_elements), + ) + stream.sync() + print("Simple indexing: ", end="") + test1 = verify_array_result(d_output, expected) + + # Demo 2: Strided loop (threads process multiple elements) + d_output.fill(0) + config = LaunchConfig(grid=num_blocks, block=threads_per_block) + launch( + stream, + config, + kernel_strided, + d_input.data.ptr, + d_output.data.ptr, + cp.uint64(num_elements), + ) + stream.sync() + print("Strided loop: ", end="") + test2 = verify_array_result(d_output, expected) + + # Demo 3: Block-wise sum with shared memory + d_ones = cp.ones(num_elements, dtype=cp.float32) + d_partial = cp.zeros(num_blocks, dtype=cp.float32) + shared_mem = threads_per_block * 4 + + config = LaunchConfig( + grid=num_blocks, block=threads_per_block, shmem_size=shared_mem + ) + launch( + stream, + config, + kernel_sum, + d_ones.data.ptr, + d_partial.data.ptr, + cp.uint64(num_elements), + ) + stream.sync() + + # Each block sums num_elements/num_blocks elements (strided access). + # Requires num_elements % num_blocks == 0 for correct expected values. + assert ( + num_elements % num_blocks == 0 + ), "num_elements must be divisible by num_blocks for block_partial_sum" + expected_partial = cp.full( + num_blocks, num_elements / num_blocks, dtype=cp.float32 + ) + print("Block-wise sum: ", end="") + test3 = verify_array_result(d_partial, expected_partial) + + # Performance timing + event_opts = EventOptions(enable_timing=True) + iterations = 100 + + stream.sync() + start = stream.record(options=event_opts) + for _ in range(iterations): + launch( + stream, + config, + kernel_sum, + d_ones.data.ptr, + d_partial.data.ptr, + cp.uint64(num_elements), + ) + end = stream.record(options=event_opts) + end.sync() + + time_ms = (end - start) / iterations + bandwidth = (num_elements * 4) / (time_ms * 1e6) + print(f"\nKernel time: {time_ms:.3f} ms, Bandwidth: {bandwidth:.1f} GB/s") + + return test1 and test2 and test3 + + finally: + # Explicit resource cleanup + cp.cuda.Stream.null.use() + stream.close() + + +def main() -> None: + """Entry point.""" + success = run_sample() + if success: + print("\nDone") + else: + print("\nSome tests failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/blockwiseSum/requirements.txt b/python/2_CoreConcepts/blockwiseSum/requirements.txt new file mode 100644 index 00000000..f7bdef83 --- /dev/null +++ b/python/2_CoreConcepts/blockwiseSum/requirements.txt @@ -0,0 +1,6 @@ +# Block-wise Array Sum Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/cudaGraphs/README.md b/python/2_CoreConcepts/cudaGraphs/README.md new file mode 100644 index 00000000..60f1d8ba --- /dev/null +++ b/python/2_CoreConcepts/cudaGraphs/README.md @@ -0,0 +1,140 @@ +# cudaGraphs (Python) + +## Description + +This sample demonstrates how to capture a multi-stage kernel pipeline as a +CUDA graph with `cuda.core` and replay it with a single driver call. + +The sample runs a three-stage elementwise pipeline +`r3 = (a + b) * c - a` in two modes: + +1. **Individual launches** - one `launch(stream, ...)` per stage, repeated + for every iteration of the pipeline. +2. **CUDA graph replay** - the same three launches are recorded into a + `Graph` once and replayed with `graph.launch(stream)` on each + iteration. + +Both paths are timed over N iterations and their results are verified +against a reference computation. The sample also re-launches the graph +after mutating the input buffers to show that the graph captures +pointers (not data), so the same graph can process new inputs without +rebuilding. + +## What You'll Learn + +- Creating a `GraphBuilder` from a stream with `stream.create_graph_builder()` +- Capturing launches with `begin_building()` and `end_building()` +- Completing a graph with `builder.complete()` and uploading it to a stream +- Replaying the graph with `graph.launch(stream)` +- Measuring the launch-overhead savings for small kernels +- Re-running the same graph against updated input data + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - Pythonic access to CUDA runtime, programs, and graphs +- `cupy` - input buffers and result verification +- `numpy` - scalar kernel arguments + +## Key APIs + +### From `cuda.core` + +- `Stream.create_graph_builder()` - obtain a `GraphBuilder` +- `GraphBuilder.begin_building()` / `end_building()` - begin and finish recording launches issued against the builder +- `GraphBuilder.complete()` - produce an executable `Graph` +- `Graph.upload(stream)` - upload the graph structure to the device +- `Graph.launch(stream)` - replay the entire graph +- `launch(graph_builder, config, kernel, ...)` - record a kernel launch into the graph being built + +### From `cuda_samples_utils` + +- `print_gpu_info()` - print device name and compute capability + +## Requirements + +### Hardware + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Minimum GPU memory: 512 MB + +### Software + +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/cudaGraphs +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/2_CoreConcepts/cudaGraphs +python cudaGraphs.py +``` + +### With custom parameters + +```bash +# Larger vectors and more iterations +python cudaGraphs.py --elements 4096 --iters 2000 + +# Use a specific GPU +python cudaGraphs.py --device 1 +``` + +Short vectors exaggerate the launch-overhead savings; larger vectors +will show the two approaches converging because per-launch overhead +becomes negligible next to kernel runtime. + +## Expected Output + +Speedup numbers vary with GPU and host CPU. + +``` +Device: +Compute Capability: + +Individual launches: 1000 iters in 0.0085s (8.49 us/iter) + +Building CUDA graph... +Graph replay: 1000 iters in 0.0034s (3.41 us/iter) +Graph speedup: 2.49x + +Graph replay on updated data verified (same graph, new buffer contents) + +Done +``` + +**Note:** Device name, compute capability, and speedup will vary based on +your GPU and host CPU. + +## Files + +- `cudaGraphs.py` - Python implementation using `cuda.core` CUDA graphs +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`cuda.core` graphs API](https://nvidia.github.io/cuda-python/cuda-core/latest/api.html#cuda-graphs) +- Upstream `cuda.core` example: [`cuda_graphs.py`](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/examples/cuda_graphs.py) +- [CUDA Graphs programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#cuda-graphs) diff --git a/python/2_CoreConcepts/cudaGraphs/cudaGraphs.py b/python/2_CoreConcepts/cudaGraphs/cudaGraphs.py new file mode 100644 index 00000000..204bc4f9 --- /dev/null +++ b/python/2_CoreConcepts/cudaGraphs/cudaGraphs.py @@ -0,0 +1,266 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +CUDA Graphs with cuda.core + +CUDA graphs let you record a DAG of operations once, then replay the entire +graph with a single driver call. For workflows that issue many small kernels +this can significantly reduce CPU-side launch overhead. + +This sample runs a three-stage elementwise pipeline (add -> multiply -> +subtract) in two modes: + + 1. Individually launched kernels on a stream. + 2. A single CUDA graph that captures the same three launches and is + replayed with ``graph.launch(stream)``. + +We then measure the wall-clock time of each mode across many iterations to +illustrate the graph replay advantage for short kernels, and demonstrate that +a graph can be relaunched against new data (the pointers are baked in, but +the contents of those buffers are not). +""" + +import sys +import time +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch + from cuda_samples_utils import print_gpu_info # noqa: E402 +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +PIPELINE_KERNELS = r""" +extern "C" __global__ +void vec_add(const float* A, const float* B, float* C, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)gridDim.x * blockDim.x; + for (size_t i = tid; i < N; i += stride) C[i] = A[i] + B[i]; +} + +extern "C" __global__ +void vec_mul(const float* A, const float* B, float* C, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)gridDim.x * blockDim.x; + for (size_t i = tid; i < N; i += stride) C[i] = A[i] * B[i]; +} + +extern "C" __global__ +void vec_sub(const float* A, const float* B, float* C, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)gridDim.x * blockDim.x; + for (size_t i = tid; i < N; i += stride) C[i] = A[i] - B[i]; +} +""" + + +def run_pipeline_individual(stream, kernels, config, buffers, size, n_iters): + """Run the 3-stage pipeline `n_iters` times with one launch per stage.""" + add_k, mul_k, sub_k = kernels + a, b, c, r1, r2, r3 = buffers + stream.sync() + t0 = time.perf_counter() + for _ in range(n_iters): + launch( + stream, config, add_k, a.data.ptr, b.data.ptr, r1.data.ptr, np.uint64(size) + ) + launch( + stream, config, mul_k, r1.data.ptr, c.data.ptr, r2.data.ptr, np.uint64(size) + ) + launch( + stream, config, sub_k, r2.data.ptr, a.data.ptr, r3.data.ptr, np.uint64(size) + ) + stream.sync() + return time.perf_counter() - t0 + + +def build_graph(stream, kernels, config, buffers, size): + """Capture the 3-stage pipeline into a CUDA graph and return it.""" + add_k, mul_k, sub_k = kernels + a, b, c, r1, r2, r3 = buffers + + graph_builder = stream.create_graph_builder() + graph_builder.begin_building() + launch( + graph_builder, + config, + add_k, + a.data.ptr, + b.data.ptr, + r1.data.ptr, + np.uint64(size), + ) + launch( + graph_builder, + config, + mul_k, + r1.data.ptr, + c.data.ptr, + r2.data.ptr, + np.uint64(size), + ) + launch( + graph_builder, + config, + sub_k, + r2.data.ptr, + a.data.ptr, + r3.data.ptr, + np.uint64(size), + ) + graph_builder.end_building() + graph = graph_builder.complete() + graph.upload(stream) + return graph_builder, graph + + +def run_pipeline_graph(stream, graph, n_iters): + """Launch the compiled graph `n_iters` times.""" + stream.sync() + t0 = time.perf_counter() + for _ in range(n_iters): + graph.launch(stream) + stream.sync() + return time.perf_counter() - t0 + + +def main() -> int: + import argparse + + parser = argparse.ArgumentParser(description="CUDA Graphs demo with cuda.core") + parser.add_argument( + "--elements", + type=int, + default=1 << 12, + help="Elements per vector (default: 4096 - small to emphasize launch overhead)", + ) + parser.add_argument( + "--iters", + type=int, + default=1000, + help="Number of pipeline iterations to time (default: 1000)", + ) + parser.add_argument("--device", type=int, default=0, help="CUDA device id") + args = parser.parse_args() + + device = Device(args.device) + device.set_current() + print_gpu_info(device) + + stream = device.create_stream() + # Tell CuPy to order its allocations on our stream so buffer initialization + # below is serialized with the kernels we launch. + cp.cuda.ExternalStream(int(stream.handle)).use() + + graph_builder = graph = None + try: + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + program = Program(PIPELINE_KERNELS, code_type="c++", options=program_options) + module = program.compile("cubin") + add_k = module.get_kernel("vec_add") + mul_k = module.get_kernel("vec_mul") + sub_k = module.get_kernel("vec_sub") + kernels = (add_k, mul_k, sub_k) + + N = args.elements + rng = cp.random.default_rng(seed=0) + a = rng.random(N, dtype=cp.float32) + b = rng.random(N, dtype=cp.float32) + c = rng.random(N, dtype=cp.float32) + r1 = cp.empty_like(a) + r2 = cp.empty_like(a) + r3 = cp.empty_like(a) + buffers = (a, b, c, r1, r2, r3) + + expected = (a + b) * c - a + + config = LaunchConfig(grid=(N + 255) // 256, block=256) + device.sync() + + # Warm up compilation/caches, then measure individual launches. + run_pipeline_individual(stream, kernels, config, buffers, N, n_iters=5) + t_individual = run_pipeline_individual( + stream, kernels, config, buffers, N, n_iters=args.iters + ) + assert cp.allclose(r3, expected, rtol=1e-5, atol=1e-5), ( + "Individual pipeline produced incorrect results" + ) + print( + f"\nIndividual launches: {args.iters} iters in {t_individual:.4f}s" + f" ({t_individual * 1e6 / args.iters:.2f} us/iter)" + ) + + # Capture the same pipeline as a graph and measure the replay. + print("\nBuilding CUDA graph...") + graph_builder, graph = build_graph(stream, kernels, config, buffers, N) + + run_pipeline_graph(stream, graph, n_iters=5) # warm up + t_graph = run_pipeline_graph(stream, graph, n_iters=args.iters) + assert cp.allclose(r3, expected, rtol=1e-5, atol=1e-5), ( + "Graph pipeline produced incorrect results" + ) + print( + f"Graph replay: {args.iters} iters in {t_graph:.4f}s" + f" ({t_graph * 1e6 / args.iters:.2f} us/iter)" + ) + if t_graph > 0: + print(f"Graph speedup: {t_individual / t_graph:.2f}x") + + # Demonstrate that the graph replays against current buffer contents. + a[:] = cp.ones(N, dtype=cp.float32) + b[:] = cp.full(N, 2.0, dtype=cp.float32) + c[:] = cp.full(N, 3.0, dtype=cp.float32) + device.sync() + # r3 = (a + b) * c - a = (1 + 2) * 3 - 1 = 8 + graph.launch(stream) + stream.sync() + assert cp.allclose(r3, 8.0), "Graph replay with new data produced wrong result" + print( + "\nGraph replay on updated data verified (same graph, new buffer contents)" + ) + + print("\nDone") + return 0 + finally: + if graph is not None: + graph.close() + if graph_builder is not None: + graph_builder.close() + stream.close() + cp.cuda.Stream.null.use() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/cudaGraphs/requirements.txt b/python/2_CoreConcepts/cudaGraphs/requirements.txt new file mode 100644 index 00000000..063b924f --- /dev/null +++ b/python/2_CoreConcepts/cudaGraphs/requirements.txt @@ -0,0 +1,5 @@ +# CUDA Graphs Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/2_CoreConcepts/fftSignalAnalysis/README.md b/python/2_CoreConcepts/fftSignalAnalysis/README.md new file mode 100644 index 00000000..a90816b4 --- /dev/null +++ b/python/2_CoreConcepts/fftSignalAnalysis/README.md @@ -0,0 +1,136 @@ +# Sample: FFT Signal Analysis (Python) + +## Description + +Analyze signal frequencies using Fast Fourier Transform (FFT) on the GPU. This sample demonstrates CuPy's cuFFT for GPU-accelerated frequency analysis: generating composite signals, computing magnitude spectrum, detecting dominant frequencies via peak detection, and comparing GPU vs CPU FFT performance. + +## What You'll Learn + +- Using CuPy's `cp.fft.rfft()` for real-to-complex FFT on GPU +- Computing magnitude spectrum from FFT results +- Peak detection to identify dominant frequencies +- Comparing GPU (cuFFT) vs CPU (NumPy) FFT performance +- Uses cuda.core APIs for device management and CUDA event timing + +## Key Concepts + +- **FFT (Fast Fourier Transform)**: Efficiently computes the Discrete Fourier Transform +- **Magnitude Spectrum**: `|FFT(signal)| * 2 / N` gives amplitude at each frequency +- **rfft**: Real FFT - optimized for real-valued input signals +- **Peak Detection**: Finding local maxima to identify dominant frequencies + +### Stream Interop + +This sample demonstrates CuPy integration with cuda.core streams: + +```python +# Create stream with cuda.core +stream = device.create_stream() + +# Use with CuPy operations +cp.cuda.ExternalStream(int(stream.handle)).use() +``` + +## Key APIs + +### From `cuda.core`: + +- `Device` - Device management and context +- `EventOptions` - Configure events for GPU timing +- `stream.record()` - Record events for timing + +### From CuPy: + +- `cp.fft.rfft()` - Real-to-complex FFT (GPU-accelerated via cuFFT) +- `cp.fft.rfftfreq()` - Generate frequency bins for rfft +- `cp.cuda.ExternalStream()` - Interop with cuda.core streams + +### From NumPy: + +- `np.fft.rfft()` - CPU FFT for comparison + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See `requirements.txt` for Python packages + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python fftSignalAnalysis.py +``` + +## Expected Output + +``` +============================================================ +FFT Signal Analysis +============================================================ + +Device: +Compute Capability: sm_XX + +Signal Parameters: + Samples: 1,048,576 + Sample Rate: 44,100 Hz + ... + +------------------------------------------------------------ +GPU FFT (cuFFT) +------------------------------------------------------------ +Time: X.XXX ms + +Detected Frequencies: + 440.0 Hz (magnitude: X.XXXX) + ... + +------------------------------------------------------------ +CPU FFT (NumPy) +------------------------------------------------------------ +Time: XX.XXX ms + +------------------------------------------------------------ +PERFORMANCE SUMMARY +------------------------------------------------------------ +GPU (cuFFT): X.XXX ms +CPU (NumPy): XX.XXX ms +Speedup: XXx + +------------------------------------------------------------ +VERIFICATION +------------------------------------------------------------ +GPU vs CPU FFT magnitude: Test PASSED + +Frequency Detection Accuracy: + 440 Hz: ✓ + ... + +Done +``` + +**Note:** Times and speedup vary by hardware. + +## Files + +- `fftSignalAnalysis.py` - Main sample using cuda.core and CuPy +- `README.md` - This file +- `requirements.txt` - Dependencies + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CuPy FFT Documentation](https://docs.cupy.dev/en/stable/reference/fft.html) +- [cuFFT Documentation](https://docs.nvidia.com/cuda/cufft/) diff --git a/python/2_CoreConcepts/fftSignalAnalysis/fftSignalAnalysis.py b/python/2_CoreConcepts/fftSignalAnalysis/fftSignalAnalysis.py new file mode 100644 index 00000000..d1582ad5 --- /dev/null +++ b/python/2_CoreConcepts/fftSignalAnalysis/fftSignalAnalysis.py @@ -0,0 +1,318 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +FFT Signal Analysis + +Demonstrates how to analyze signal frequencies using Fast Fourier Transform (FFT): +- Generate composite signals with multiple frequency components +- Use CuPy's cuFFT for GPU-accelerated frequency analysis +- Detect dominant frequencies (peak detection) +- Compare GPU vs CPU FFT performance + +Uses cuda.core APIs for device management and timing. +""" + +import sys +import time +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + +try: + import cupy as cp + import numpy as np + from cuda.core import Device, EventOptions +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Install with: pip install -r requirements.txt") + sys.exit(1) + + +def generate_composite_signal( + num_samples: int, + sample_rate: float, + frequencies: list[float], + amplitudes: list[float], +) -> np.ndarray: + """ + Generate a composite signal with multiple frequency components. + + Parameters + ---------- + num_samples : int + Number of samples in the signal + sample_rate : float + Sampling rate in Hz + frequencies : list[float] + List of frequency components in Hz + amplitudes : list[float] + List of amplitudes for each frequency component + + Returns + ------- + np.ndarray + Signal array + """ + t = np.arange(num_samples, dtype=np.float32) / sample_rate + signal = np.zeros(num_samples, dtype=np.float32) + + for freq, amp in zip(frequencies, amplitudes): + signal += amp * np.sin(2 * np.pi * freq * t) + + return signal + + +def find_dominant_frequencies( + fft_magnitude: cp.ndarray, + frequencies: cp.ndarray, + num_peaks: int = 5, + threshold_ratio: float = 0.1, +) -> list[tuple[float, float]]: + """ + Find dominant frequencies from FFT magnitude spectrum. + + Uses CPU-based peak detection (transfers magnitude/frequencies via cp.asnumpy). + Suitable for small-to-medium spectra; for large-scale analysis, consider + GPU-native peak detection. + + Parameters + ---------- + fft_magnitude : cp.ndarray + Magnitude of FFT (positive frequencies only) + frequencies : cp.ndarray + Frequency bins + num_peaks : int + Maximum number of peaks to return + threshold_ratio : float + Minimum peak height as ratio of max peak + + Returns + ------- + list[tuple[float, float]] + List of (frequency, magnitude) tuples for detected peaks + """ + # Find peaks above threshold + max_magnitude = float(cp.max(fft_magnitude)) + threshold = max_magnitude * threshold_ratio + + # Simple peak detection: find local maxima above threshold + magnitude_cpu = cp.asnumpy(fft_magnitude) + freq_cpu = cp.asnumpy(frequencies) + + peaks = [] + for i in range(1, len(magnitude_cpu) - 1): + if magnitude_cpu[i] > threshold: + if ( + magnitude_cpu[i] > magnitude_cpu[i - 1] + and magnitude_cpu[i] > magnitude_cpu[i + 1] + ): + peaks.append((freq_cpu[i], magnitude_cpu[i])) + + # Sort by magnitude and return top peaks + peaks.sort(key=lambda x: x[1], reverse=True) + return peaks[:num_peaks] + + +def run_fft_analysis( + num_samples: int = 2**20, + sample_rate: float = 44100.0, + device_id: int = 0, + num_iterations: int = 10, +) -> bool: + """ + Run FFT signal analysis benchmark. + + device_id and num_iterations are not exposed via CLI; modify defaults + or call this function directly for customization. + + Parameters + ---------- + num_samples : int + Number of samples (power of 2 recommended for FFT) + sample_rate : float + Sampling rate in Hz + device_id : int + CUDA device ID + num_iterations : int + Number of iterations for timing + + Returns + ------- + bool + True if analysis succeeded + """ + print("=" * 60) + print("FFT Signal Analysis") + print("=" * 60) + + # Initialize device + device = Device(device_id) + device.set_current() + stream = device.create_stream() + + try: + print(f"\nDevice: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + + # Make CuPy use our cuda.core stream + cp.cuda.ExternalStream(int(stream.handle)).use() + + # Define test signal: composite of multiple frequencies + test_frequencies = [440.0, 880.0, 1320.0, 2000.0, 5000.0] # Hz + test_amplitudes = [1.0, 0.5, 0.3, 0.7, 0.4] + + print("\nSignal Parameters:") + print(f" Samples: {num_samples:,}") + print(f" Sample Rate: {sample_rate:,.0f} Hz") + print(f" Duration: {num_samples / sample_rate:.3f} seconds") + print(f" Input Frequencies: {test_frequencies} Hz") + print(f" Input Amplitudes: {test_amplitudes}") + + # Generate composite signal on CPU + h_signal = generate_composite_signal( + num_samples, sample_rate, test_frequencies, test_amplitudes + ) + + # Transfer to GPU + d_signal = cp.asarray(h_signal) + + # --------------------------------------------------------------------- + # GPU FFT (cuFFT via CuPy) + # --------------------------------------------------------------------- + print("\n" + "-" * 60) + print("GPU FFT (cuFFT)") + print("-" * 60) + + event_opts = EventOptions(enable_timing=True) + + # Warmup + d_fft_result = cp.fft.rfft(d_signal) + stream.sync() + + # Timed runs + start = stream.record(options=event_opts) + for _ in range(num_iterations): + d_fft_result = cp.fft.rfft(d_signal) + end = stream.record(options=event_opts) + end.sync() + + gpu_time_ms = (end - start) / num_iterations + print(f"Time: {gpu_time_ms:.3f} ms") + + # Compute magnitude spectrum + d_magnitude = cp.abs(d_fft_result) * 2 / num_samples + d_frequencies = cp.fft.rfftfreq(num_samples, 1 / sample_rate) + + # Find dominant frequencies + detected_peaks = find_dominant_frequencies(d_magnitude, d_frequencies) + + print("\nDetected Frequencies:") + for freq, mag in detected_peaks: + print(f" {freq:8.1f} Hz (magnitude: {mag:.4f})") + + # --------------------------------------------------------------------- + # CPU FFT (NumPy) for comparison + # --------------------------------------------------------------------- + print("\n" + "-" * 60) + print("CPU FFT (NumPy)") + print("-" * 60) + + # Warmup + h_fft_result = np.fft.rfft(h_signal) + + # Timed runs + cpu_start = time.perf_counter() + for _ in range(num_iterations): + h_fft_result = np.fft.rfft(h_signal) + cpu_end = time.perf_counter() + + cpu_time_ms = (cpu_end - cpu_start) * 1000 / num_iterations + print(f"Time: {cpu_time_ms:.3f} ms") + + # --------------------------------------------------------------------- + # Performance Summary + # --------------------------------------------------------------------- + print("\n" + "-" * 60) + print("PERFORMANCE SUMMARY") + print("-" * 60) + speedup = cpu_time_ms / gpu_time_ms + print(f"GPU (cuFFT): {gpu_time_ms:.3f} ms") + print(f"CPU (NumPy): {cpu_time_ms:.3f} ms") + print(f"Speedup: {speedup:.1f}x") + + # --------------------------------------------------------------------- + # Verification + # --------------------------------------------------------------------- + print("\n" + "-" * 60) + print("VERIFICATION") + print("-" * 60) + + # Compare GPU and CPU results + h_magnitude = ( + cp.asarray(np.abs(h_fft_result).astype(np.float32)) * 2 / num_samples + ) + + print("GPU vs CPU FFT magnitude: ", end="") + success = verify_array_result( + d_magnitude, + h_magnitude, + rtol=1e-4, + atol=1e-6, + ) + + # Verify detected frequencies match input + print("\nFrequency Detection Accuracy:") + detected_freqs = [freq for freq, _ in detected_peaks] + all_found = True + for expected_freq in test_frequencies: + found = any(abs(f - expected_freq) < 10 for f in detected_freqs) + status = "✓" if found else "✗" + print(f" {expected_freq:6.0f} Hz: {status}") + all_found = all_found and found + + success = success and all_found + return success + + finally: + # Cleanup - always close resources + cp.cuda.Stream.null.use() + stream.close() + + +def main() -> None: + """Entry point.""" + success = run_fft_analysis() + if success: + print("\nDone") + else: + print("\nAnalysis completed with errors") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/fftSignalAnalysis/requirements.txt b/python/2_CoreConcepts/fftSignalAnalysis/requirements.txt new file mode 100644 index 00000000..655b86c2 --- /dev/null +++ b/python/2_CoreConcepts/fftSignalAnalysis/requirements.txt @@ -0,0 +1,6 @@ +# FFT Signal Analysis Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/greenContext/README.md b/python/2_CoreConcepts/greenContext/README.md new file mode 100644 index 00000000..3afd8083 --- /dev/null +++ b/python/2_CoreConcepts/greenContext/README.md @@ -0,0 +1,250 @@ +# greenContext (Python) + +## Description + +This sample demonstrates how to use **green contexts** with +`cuda.core` to statically partition a GPU's streaming multiprocessors +(SMs) so that independent kernels can run on dedicated subsets of the +device. + +This examples takes A long-running kernel that fills the GPU's SMs, +and a short but latency-sensitive "critical" kernel is launched shortly after. +Without green contexts, the critical kernel must wait for SMs to +free up. With green contexts, the GPU's SMs are partitioned so the +critical kernel has its own dedicated SMs and can start immediately. + +Three timed scenarios are compared: + +1. **Reference**: the critical kernel alone on the primary context, + with no competing work. Establishes the pure compute time of the + critical kernel when every SM on the device is available to it. +2. **Baseline**: both kernels run on the device's primary context, + on two non-blocking streams that contend for all SMs. +3. **Green contexts**: the SMs are split into two disjoint groups + (e.g. 112 + 16). Each kernel runs on a stream that belongs to its + own green context, so the critical kernel never waits for SMs + held by the long-running kernel. + +The headline metric is the total wall time of the critical kernel +from launch to completion. In the baseline it is dominated by time +spent waiting behind the long-running kernel. With green contexts it +reflects the kernel's own compute time on its (smaller) SM +partition. The reference row lets you separate those two effects: + +- `baseline - reference` is roughly the time the critical kernel + spent waiting for SMs in the baseline run (the cost that green + contexts eliminate). +- `green / reference` is the compute slowdown caused by running on + a smaller SM partition (the cost that green contexts introduce). + +## What You'll Learn + +- Querying a device's SM resources via `Device.resources.sm` and + reading `sm_count`, `min_partition_size`, `coscheduled_alignment` +- Splitting an `SMResource` into disjoint partitions with + `sm.split(SMResourceOptions(count=(A, B)))` +- Creating a green context from an SM partition via + `Device.create_context(ContextOptions(resources=[group]))` +- Creating a non-blocking stream on a green context with + `ctx.create_stream()` +- Using CUDA events with timing enabled to measure kernel wall time + across streams +- Cleaning up green contexts safely with `ctx.close()` + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - device management, SM partitioning, green contexts, compilation, and launching +- `numpy` - scalar kernel arguments + +## Key APIs + +### From `cuda.core` + +- `Device.resources.sm` - the device's SM-type device resource +- `SMResource.split(SMResourceOptions(count=(A, B)))` - partition SMs + into disjoint groups (plus an optional remainder) +- `Device.create_context(ContextOptions(resources=[sm_group]))` - + create a green context provisioned with a specific SM partition +- `Context.is_green` / `Context.resources` - introspect a green + context +- `Context.create_stream()` - create a non-blocking stream that is + tied to the green context's SM partition +- `Context.close()` - destroy a green context (must not be the + thread's current context when closed) +- `Device.create_event(EventOptions(timing_enabled=True))` / + `Stream.record(event)` / `event2 - event1` - measure elapsed time + in milliseconds between two events on the device +- `Program(..., ProgramOptions(std="c++17", arch=f"sm_{device.arch}"))` + / `program.compile("cubin", name_expressions=(...))` - compile the + delay and critical kernels in one TU +- `launch(stream, LaunchConfig(grid=..., block=...), kernel, ...)` - + submit a kernel on a specific stream + +## Requirements + +### Hardware + +- Any NVIDIA GPU supported by green contexts. +- Green-context SM partitioning is designed for larger server GPUs + (H100, H200, B200, ...) but works on any supported GPU as long as + the SM count is large enough to split meaningfully. + +### Software + +- NVIDIA driver >= 12.4 +- CUDA Toolkit 13.0 or newer. +- Python 3.10 or newer. +- `cuda-core` (`>=1.0.0`) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/greenContext +pip install -r requirements.txt +``` + +## How to Run + +### Basic usage + +The auto-default split reserves a small partition (~16 SMs) for the +critical kernel and gives the rest to the long-running kernel. The +exact sizes are chosen by probing the driver with a dry-run `sm.split`, +escalating the alignment granularity in powers of two until the driver +accepts the pair. This handles architectures where the driver enforces +stricter alignment (e.g. TPC/GPC-pair alignment on Blackwell) than the +reported `min_partition_size`. When that happens the sample prints a +`Note:` line with the granularity it landed on. + +```bash +cd cuda-samples/python/2_CoreConcepts/greenContext +python greenContext.py +``` + +### Match the CUDA programming guide example (112 + 16) + +```bash +python greenContext.py --split 112,16 +``` + +### Tune the workload + +```bash +# Longer long-running kernel, larger host launch gap +python greenContext.py --delay-us 3000 --launch-gap-ms 2.0 + +# Smaller/lighter critical kernel so its own compute time is negligible +python greenContext.py --critical-n 65536 --critical-iters 128 + +# Symmetric split: maximum SMs for the critical kernel, long kernel is +# roughly 2x slower but the critical kernel runs close to its reference time. +python greenContext.py --split 64,64 + +# Use a specific GPU +python greenContext.py --device 1 +``` + +### All options + +``` +--device CUDA device ID (default: 0) +--split SM split as 'LONG,CRITICAL', e.g. '112,16'. + Each side must be a multiple of the device's + min_partition_size, and the driver may enforce additional + architecture-specific alignment (e.g. TPC/partition-grid + alignment on Blackwell). Omit --split to auto-select a + driver-accepted split. +--delay-us Per-block busy-wait of the delay kernel, in us (default: 2000) +--delay-waves Number of waves of the delay kernel on the long + partition. Drives the default --delay-blocks (default: 16) +--delay-blocks Number of blocks for the delay kernel. Overrides + --delay-waves if set. + (default: --delay-waves * device SM count) +--critical-n Work size of the critical kernel (default: 4194304) +--critical-iters Inner math-loop iterations inside the critical kernel. + Higher values make the critical kernel's own compute + time more substantial relative to its wait time + (default: 1024) +--launch-gap-ms Host delay between launching the long and critical + kernels (default: 1.0 ms) +``` + +## Expected Output + +The output depends on the GPU and the number of SMs. +On an RTX 4090 (128 SMs) with the default auto split: + +``` +[Green Context Sample using CUDA Core API] +Device: NVIDIA GeForce RTX 4090 +Compute Capability: sm_89 +Total SMs: 128 +Min. SM partition size: 2 +SM co-scheduled alignment: 2 +SM split (long/critical): 112 / 16 +Workload parameters: + delay kernel: 2048 blocks, 2000 us/block (~32.0 ms on 128 SMs) + critical kernel: 4194304 elements, 1024 inner iterations + host launch gap: 1.0 ms + +Compiling kernels ... +Running reference scenario (critical kernel alone) ... +Running baseline scenario (primary context) ... +Running green context scenario ... + +scenario SMs (long/crit) long (ms) crit total (ms) crit offset (ms) +------------------------------------------------------------------------------------------------------- +crit alone (primary ctx) -/128 - 0.425 - +baseline (primary ctx) 128/128 32.034 30.024 1.090 +green ctx (112+16 SMs) 112/16 38.017 2.696 1.075 + +long (ms) : wall time of the delay kernel +crit total (ms) : launch-to-complete wall time of the critical kernel +crit offset (ms) : when the critical stream started, relative to the long stream start + +Critical-kernel latency speedup (baseline vs green ctx): 11.14x +Green-ctx compute cost vs unconstrained (crit alone): 6.34x +Baseline time spent waiting for SMs (not computing): ~29.60 ms + +Done +``` + +**What to look for:** + +- The critical kernel alone (reference row) takes only a fraction of + a millisecond; almost all of the baseline's `crit total` is time + spent queued waiting for SMs, not compute. +- The **critical kernel's wall time drops sharply** in the + green-context scenario (from ~30 ms to a few ms in the example + above) because it no longer waits for SMs held by the long-running + kernel. +- The **long-running kernel's duration may increase** proportional + to the reduction in SMs available to it (128 -> 112 SMs ~= 14% + slower; 128 -> 64 SMs ~= 2x slower). This is an expected tradeoff: + you reserve SMs for a critical kernel by taking them away from the + background workload. +- The **compute cost** ratio (`green / reference`) shows how close + the critical kernel is to ideal linear scaling with its SM count. + A 112/16 split gives the critical kernel only 12.5% of the SMs and + costs it roughly 6-7x its reference time; a 64/64 split gives it + half the SMs and costs roughly 1.5-2x. +- The `crit offset` column is approximately `--launch-gap-ms` in + both full scenarios; it confirms the host launched the critical + kernel the same amount of time after the long kernel in both runs. + +Exact timings vary with GPU model, driver version, clock state, and +other concurrent GPU work. + +## Files + +- `greenContext.py` - Python implementation using `cuda.core` green-context APIs +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [Green Contexts in the CUDA C++ Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#green-contexts) +- [`cuda.core` green-context test suite](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/tests/test_green_context.py) - the authoritative API reference diff --git a/python/2_CoreConcepts/greenContext/greenContext.py b/python/2_CoreConcepts/greenContext/greenContext.py new file mode 100644 index 00000000..008a3807 --- /dev/null +++ b/python/2_CoreConcepts/greenContext/greenContext.py @@ -0,0 +1,752 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Green Context Sample using CUDA Core API. + +Three scenarios are timed with CUDA events and compared: + + 1. Reference: the critical kernel alone on the primary context, + with no competing work. Establishes the pure compute time of + the critical kernel with access to every SM on the device. + 2. Baseline: both kernels run on the device's primary context, + on two non-blocking streams. They contend for all SMs. + 3. Green contexts: SMs are split into two disjoint groups; each + kernel runs on a stream belonging to its own green context. + +The headline metric is the total wall time of the critical kernel +from launch to completion on its stream. In the baseline it is +dominated by waiting behind the long-running kernel; with green +contexts it reflects only the kernel's own compute time on a +smaller SM partition. The reference row separates those effects. + +Note: Parallel execution on the GPU is never guaranteed. Green +contexts remove one common source of contention (shared SMs) but +they are not a hard scheduling promise. +""" + +import argparse +import sys +import time +from dataclasses import dataclass +from typing import List, Optional, Tuple + +import numpy as np +from cuda.core import ( + ContextOptions, + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + SMResourceOptions, + launch, +) + +# Two CUDA kernels: +# 1. The delay kernel spins until `cycles` SM clock ticks have elapsed. +# 2. The critical kernel does a small amount of useful work. + +KERNEL_SRC = r""" +extern "C" __global__ void delay_kernel(unsigned long long cycles) +{ + unsigned long long start = clock64(); + while ((unsigned long long)(clock64() - start) < cycles) { } +} + +extern "C" __global__ void critical_kernel(float *out, int n, int iters) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i < n) { + // Two dependent accumulators so the compiler cannot collapse the + // loop into a closed-form expression. `iters` is a runtime argument + // for the same reason. + float v = (float)i * 1e-6f + 1.0f; + float u = (float)i * 1e-7f + 0.5f; + for (int k = 0; k < iters; ++k) { + v = v * 1.000001f + u; + u = u * 0.999999f + v * 1e-7f; + } + out[i] = v + u; + } +} +""" + + +@dataclass +class ScenarioResult: + name: str + critical_total_ms: float + critical_sm_count: int + long_ms: Optional[float] = None + critical_offset_ms: Optional[float] = None + long_sm_count: Optional[int] = None + + +def print_sm_topology(device: Device) -> None: + sm = device.resources.sm + print("[Green Context Sample using CUDA Core API]") + print(f"Device: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + print(f"Total SMs: {sm.sm_count}") + print(f"Min. SM partition size: {sm.min_partition_size}") + print(f"SM co-scheduled alignment: {sm.coscheduled_alignment}") + + +def _align_down(n: int, k: int) -> int: + if k <= 0: + return n + return (n // k) * k + + +def _driver_accepts_split(sm, long_count: int, critical_count: int) -> bool: + if long_count <= 0 or critical_count <= 0: + return False + try: + groups, _ = sm.split( + SMResourceOptions(count=(long_count, critical_count)), + dry_run=True, + ) + except Exception: + return False + actual = tuple(g.sm_count for g in groups) + return actual == (long_count, critical_count) + + +def _find_working_split( + sm, prefer_critical: Optional[int] = None +) -> Optional[Tuple[int, int, int]]: + """ + Probe the driver for a (long, critical) split it actually accepts. + + Escalates the alignment granularity from `min_partition_size` upward in + powers of two, requiring BOTH sides to be multiples of the current + granularity. This handles architectures where the driver's true + allocation granularity is larger than the reported + `min_partition_size` (e.g. TPC/GPC-pair alignment on Blackwell: on a + 188-SM part `min_partition_size` is 8 but the driver actually requires + each side to be a multiple of 16). + + Returns (long_count, critical_count, granularity) or None. The + granularity is the smallest power-of-two multiple of + `min_partition_size` at which both sides are aligned and the driver + accepts the pair. + """ + total = sm.sm_count + min_part = sm.min_partition_size + if min_part <= 0: + return None + + if prefer_critical is None or prefer_critical <= 0: + prefer_critical = max(min_part, min(16, total // 8)) + + # Escalate granularity in powers of two. The upper bound is half of + # `total` because below that we cannot fit two partitions of size + # >= granularity. + granularity = min_part + while granularity * 2 <= total: + base = max(granularity, _align_down(prefer_critical, granularity)) + + candidates: List[int] = [] + seen = set() + + def push(c: int) -> None: + if c >= granularity and c <= total - granularity and c not in seen: + seen.add(c) + candidates.append(c) + + # Walk outward from `base` (the preferred critical size, aligned + # down to the current granularity) in steps of granularity. + push(base) + max_steps = max(total // granularity, 1) + for step in range(1, max_steps + 1): + push(base + step * granularity) + push(base - step * granularity) + + for critical in candidates: + long_count = _align_down(total - critical, granularity) + if long_count < granularity: + continue + if _driver_accepts_split(sm, long_count, critical): + return long_count, critical, granularity + + granularity *= 2 + + return None + + +def _format_suggestion(sm, prefer_critical: Optional[int]) -> Optional[str]: + """ + Return a '--split A,B' string the driver is known to accept, or None + if we couldn't find one. + """ + found = _find_working_split(sm, prefer_critical=prefer_critical) + if found is None: + return None + long_count, critical_count, _granularity = found + return f"--split {long_count},{critical_count}" + + +def parse_split(arg: Optional[str], device: Device) -> Tuple[int, int]: + """ + Parse the --split "A,B" CLI argument and validate it against the device. + + Returns (long_count, critical_count). + """ + sm = device.resources.sm + total = sm.sm_count + min_part = sm.min_partition_size + + if arg is None: + # Auto: reserve a small aligned slice for the critical kernel and + # hand the rest (also aligned) to the long-running kernel. We + # can't trust `min_partition_size` alone: on some GPUs (e.g. + # 188-SM Blackwell) the driver requires stricter alignment than + # it reports. Escalate the granularity until the driver accepts + # a pair. + prefer_critical = max(min_part, min(16, total // 8)) + found = _find_working_split(sm, prefer_critical=prefer_critical) + if found is None: + print( + "Error: could not find an SM split that the driver accepts " + f"on this device (total SMs={total}, " + f"min_partition_size={min_part})." + ) + print( + " The driver enforces architecture-specific alignment " + "rules beyond min_partition_size; try passing an explicit " + "--split." + ) + sys.exit(1) + long_count, critical_count, granularity = found + if granularity > min_part: + print( + f"Note: driver required stricter alignment than " + f"min_partition_size={min_part}; selected split uses " + f"granularity={granularity} SMs." + ) + return long_count, critical_count + + # User-provided split. + try: + parts = [int(x.strip()) for x in arg.split(",")] + except ValueError: + print(f"Error: --split must look like 'A,B', got: {arg!r}") + sys.exit(1) + if len(parts) != 2: + print( + "Error: --split must contain exactly two comma-separated " + f"integers, got: {arg!r}" + ) + sys.exit(1) + long_count, critical_count = parts + + errors = [] + if long_count <= 0 or critical_count <= 0: + errors.append("both partition sizes must be positive") + if long_count % min_part != 0 or critical_count % min_part != 0: + errors.append(f"each size must be a multiple of min_partition_size={min_part}") + if long_count + critical_count > total: + errors.append( + f"sum {long_count + critical_count} exceeds device total of {total} SMs" + ) + + if errors: + print("Error: invalid --split value:") + for e in errors: + print(f" - {e}") + suggestion = _format_suggestion( + sm, prefer_critical=critical_count if critical_count > 0 else None + ) + if suggestion is not None: + print(f"Tip: a driver-accepted split on this device is {suggestion}") + sys.exit(1) + + # Confirm the driver itself accepts the split. The well-known alignment + # checks above are necessary but not sufficient on every architecture. + try: + groups, _ = sm.split( + SMResourceOptions(count=(long_count, critical_count)), + dry_run=True, + ) + except Exception as e: + print(f"Error: driver rejected the requested split: {e}") + print( + " The sample's own alignment checks are not exhaustive on " + "every architecture; the driver enforces additional hardware " + "constraints (for example TPC/partition-grid alignment)." + ) + suggestion = _format_suggestion(sm, prefer_critical=critical_count) + if suggestion is not None: + print(f"Tip: a driver-accepted split on this device is {suggestion}") + sys.exit(1) + + actual = tuple(g.sm_count for g in groups) + if actual != (long_count, critical_count): + print(f"Error: driver adjusted the requested split to {actual}.") + suggestion = _format_suggestion(sm, prefer_critical=critical_count) + if suggestion is not None: + print(f"Tip: a driver-accepted split on this device is {suggestion}") + else: + print(" Pick a different --split, or omit it for the auto default.") + sys.exit(1) + + return long_count, critical_count + + +def compile_kernels(device: Device): + options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + program = Program(KERNEL_SRC, code_type="c++", options=options) + module = program.compile( + "cubin", + name_expressions=("delay_kernel", "critical_kernel"), + ) + return module.get_kernel("delay_kernel"), module.get_kernel("critical_kernel") + + +def microseconds_to_cycles(device: Device, microseconds: float) -> int: + """ + Convert microseconds to SM clock cycles, using the reported GPU clock rate. + clock_rate is in kHz, so 1 us = clock_rate_kHz / 1000 cycles. + """ + clock_khz = device.properties.clock_rate + return int(microseconds * clock_khz / 1000.0) + + +def _run_one( + device: Device, + name: str, + long_stream, + critical_stream, + long_sm_count: int, + critical_sm_count: int, + delay_kernel, + critical_kernel, + delay_cycles: int, + delay_blocks: int, + critical_out_ptr: int, + critical_n: int, + critical_iters: int, + launch_gap_s: float, +) -> ScenarioResult: + """ + Launch the delay kernel on `long_stream`, wait `launch_gap_s` on the host, + launch the critical kernel on `critical_stream`, and time both with events. + """ + + # Create events with timing enabled. + opts = EventOptions(timing_enabled=True) + e_long_start = device.create_event(opts) + e_long_end = device.create_event(opts) + e_crit_start = device.create_event(opts) + e_crit_end = device.create_event(opts) + + # 1024 threads/block ensures at most one delay block is resident per SM + # on current architectures, so grid size directly controls the number of + # waves: delay_blocks / sm_count_visible_to_stream. + delay_block = 1024 + delay_cfg = LaunchConfig(grid=delay_blocks, block=delay_block) + critical_block = 256 + critical_grid = (critical_n + critical_block - 1) // critical_block + critical_cfg = LaunchConfig(grid=critical_grid, block=critical_block) + + # Start of timed region + long_stream.record(e_long_start) + launch(long_stream, delay_cfg, delay_kernel, np.uint64(delay_cycles)) + long_stream.record(e_long_end) + + time.sleep(launch_gap_s) + + critical_stream.record(e_crit_start) + launch( + critical_stream, + critical_cfg, + critical_kernel, + critical_out_ptr, + np.int32(critical_n), + np.int32(critical_iters), + ) + critical_stream.record(e_crit_end) + + # Sync both streams so every event has completed and is measurable. + long_stream.sync() + critical_stream.sync() + # End of timed region + + return ScenarioResult( + name=name, + long_ms=e_long_end - e_long_start, + critical_total_ms=e_crit_end - e_crit_start, + critical_offset_ms=e_crit_start - e_long_start, + long_sm_count=long_sm_count, + critical_sm_count=critical_sm_count, + ) + + +def run_critical_alone( + device: Device, + critical_kernel, + critical_n: int, + critical_iters: int, +) -> ScenarioResult: + """ + Critical kernel alone on the primary context, no competing work. + Establishes the pure compute time with every SM on the device available. + """ + stream = device.create_stream() + out = device.allocate(critical_n * 4) + total_sm = device.resources.sm.sm_count + try: + opts = EventOptions(timing_enabled=True) + e_start = device.create_event(opts) + e_end = device.create_event(opts) + block = 256 + grid = (critical_n + block - 1) // block + cfg = LaunchConfig(grid=grid, block=block) + + stream.record(e_start) + launch( + stream, + cfg, + critical_kernel, + int(out.handle), + np.int32(critical_n), + np.int32(critical_iters), + ) + stream.record(e_end) + stream.sync() + + return ScenarioResult( + name="crit alone (primary ctx)", + critical_total_ms=e_end - e_start, + critical_sm_count=total_sm, + ) + finally: + out.close() + + +def run_baseline( + device: Device, + delay_kernel, + critical_kernel, + delay_cycles: int, + delay_blocks: int, + critical_n: int, + critical_iters: int, + launch_gap_s: float, +) -> ScenarioResult: + """Both kernels on the primary context, two non-blocking streams.""" + long_stream = device.create_stream() + critical_stream = device.create_stream() + out = device.allocate(critical_n * 4) + total_sm = device.resources.sm.sm_count + try: + return _run_one( + device, + name="baseline (primary ctx)", + long_stream=long_stream, + critical_stream=critical_stream, + long_sm_count=total_sm, + critical_sm_count=total_sm, + delay_kernel=delay_kernel, + critical_kernel=critical_kernel, + delay_cycles=delay_cycles, + delay_blocks=delay_blocks, + critical_out_ptr=int(out.handle), + critical_n=critical_n, + critical_iters=critical_iters, + launch_gap_s=launch_gap_s, + ) + finally: + out.close() + + +def run_green_context( + device: Device, + split: Tuple[int, int], + delay_kernel, + critical_kernel, + delay_cycles: int, + delay_blocks: int, + critical_n: int, + critical_iters: int, + launch_gap_s: float, +) -> ScenarioResult: + """Each kernel on its own green context, with disjoint SM partitions.""" + long_count, critical_count = split + sm = device.resources.sm + groups, _remainder = sm.split(SMResourceOptions(count=(long_count, critical_count))) + assert len(groups) == 2 + long_group, critical_group = groups + + # Create the large ctx last so it's closed first: order matters only for + # ensuring we never try to close a ctx that's currently the thread's + # active ctx. + ctx_long = device.create_context(ContextOptions(resources=[long_group])) + ctx_crit = None + out = None + try: + ctx_crit = device.create_context(ContextOptions(resources=[critical_group])) + + long_stream = ctx_long.create_stream() + critical_stream = ctx_crit.create_stream() + out = device.allocate(critical_n * 4) + + return _run_one( + device, + name=f"green ctx ({long_count}+{critical_count} SMs)", + long_stream=long_stream, + critical_stream=critical_stream, + long_sm_count=ctx_long.resources.sm.sm_count, + critical_sm_count=ctx_crit.resources.sm.sm_count, + delay_kernel=delay_kernel, + critical_kernel=critical_kernel, + delay_cycles=delay_cycles, + delay_blocks=delay_blocks, + critical_out_ptr=int(out.handle), + critical_n=critical_n, + critical_iters=critical_iters, + launch_gap_s=launch_gap_s, + ) + finally: + if out is not None: + out.close() + # Streams must be released before their owning ctx; letting them go out + # of scope here is sufficient since no references escape this frame. + if ctx_crit is not None: + ctx_crit.close() + ctx_long.close() + + +def _fmt_ms(value: Optional[float], width: int) -> str: + if value is None: + return f"{'-':>{width}}" + return f"{value:>{width}.3f}" + + +def print_results(results: List[ScenarioResult]) -> None: + print() + header = ( + f"{'scenario':<32}{'SMs (long/crit)':>20}" + f"{'long (ms)':>14}{'crit total (ms)':>18}{'crit offset (ms)':>19}" + ) + print(header) + print("-" * len(header)) + for r in results: + long_sm = "-" if r.long_sm_count is None else str(r.long_sm_count) + sms = f"{long_sm}/{r.critical_sm_count}" + print( + f"{r.name:<32}{sms:>20}" + f"{_fmt_ms(r.long_ms, 14)}{_fmt_ms(r.critical_total_ms, 18)}" + f"{_fmt_ms(r.critical_offset_ms, 19)}" + ) + print() + print("long (ms) : wall time of the delay kernel") + print("crit total (ms) : launch-to-complete wall time of the critical kernel") + print( + "crit offset (ms) : when the critical stream started, relative to the" + " long stream start" + ) + + +def report_speedup( + alone: ScenarioResult, + baseline: ScenarioResult, + green: ScenarioResult, +) -> None: + """ + Print three headline numbers that put the raw scenario timings in context: + """ + if baseline.critical_total_ms <= 0 or alone.critical_total_ms <= 0: + return + latency_speedup = baseline.critical_total_ms / max(green.critical_total_ms, 1e-9) + compute_cost = green.critical_total_ms / alone.critical_total_ms + wait_ms = max(0.0, baseline.critical_total_ms - alone.critical_total_ms) + print() + print( + f"Critical-kernel latency speedup (baseline vs green ctx): " + f"{latency_speedup:.2f}x" + ) + print( + f"Green-ctx compute cost vs unconstrained (crit alone): {compute_cost:.2f}x" + ) + print(f"Baseline time spent waiting for SMs (not computing): ~{wait_ms:.2f} ms") + + +def main(): + parser = argparse.ArgumentParser( + description="Green Context sample using CUDA Core API", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--device", type=int, default=0, help="CUDA device ID (default: 0)" + ) + parser.add_argument( + "--split", + type=str, + default=None, + help="SM split as 'LONG,CRITICAL', e.g. '112,16'. Default: auto.", + ) + parser.add_argument( + "--delay-us", + type=int, + default=2000, + help=( + "Per-block busy-wait duration of the delay kernel, " + "in microseconds (default: 2000)" + ), + ) + parser.add_argument( + "--delay-waves", + type=int, + default=16, + help=( + "Number of waves of the delay kernel on the long partition. " + "Drives the default --delay-blocks (default: 16)." + ), + ) + parser.add_argument( + "--delay-blocks", + type=int, + default=None, + help=( + "Number of blocks launched for the delay kernel. " + "Overrides --delay-waves if set. " + "Default: --delay-waves * device SM count." + ), + ) + parser.add_argument( + "--critical-n", + type=int, + default=1 << 22, + help="Work size of the critical kernel (default: 4194304)", + ) + parser.add_argument( + "--critical-iters", + type=int, + default=1024, + help=( + "Iterations of the inner math loop inside the critical kernel. " + "Higher values make the critical kernel's compute time more " + "substantial (default: 1024)." + ), + ) + parser.add_argument( + "--launch-gap-ms", + type=float, + default=1.0, + help=( + "Host delay between launching the long and critical kernels, " + "in ms (default: 1.0)" + ), + ) + args = parser.parse_args() + + try: + device = Device(args.device) + device.set_current() + except Exception as e: + print(f"Error: failed to initialize CUDA device {args.device}: {e}") + return 1 + + print_sm_topology(device) + + long_count, critical_count = parse_split(args.split, device) + print(f"SM split (long/critical): {long_count} / {critical_count}") + + sm_count = device.resources.sm.sm_count + delay_blocks = args.delay_blocks or args.delay_waves * sm_count + delay_cycles = microseconds_to_cycles(device, args.delay_us) + launch_gap_s = max(0.0, args.launch_gap_ms / 1000.0) + + # Rough estimate of the long kernel's duration on the full device. Mostly + # informational; the real value is reported after the run. + est_long_ms = (delay_blocks / sm_count) * (args.delay_us / 1000.0) + + print("Workload parameters:") + print( + f" delay kernel: {delay_blocks} blocks, {args.delay_us} us/block " + f"(~{est_long_ms:.1f} ms on {sm_count} SMs)" + ) + print( + f" critical kernel: {args.critical_n} elements, " + f"{args.critical_iters} inner iterations" + ) + print(f" host launch gap: {args.launch_gap_ms} ms") + + print() + print("Compiling kernels ...") + delay_k, crit_k = compile_kernels(device) + + try: + print("Running reference scenario (critical kernel alone) ...") + alone = run_critical_alone( + device, + crit_k, + args.critical_n, + args.critical_iters, + ) + + print("Running baseline scenario (primary context) ...") + baseline = run_baseline( + device, + delay_k, + crit_k, + delay_cycles, + delay_blocks, + args.critical_n, + args.critical_iters, + launch_gap_s, + ) + + print("Running green context scenario ...") + green = run_green_context( + device, + (long_count, critical_count), + delay_k, + crit_k, + delay_cycles, + delay_blocks, + args.critical_n, + args.critical_iters, + launch_gap_s, + ) + except Exception as e: + print(f"Error: scenario failed: {e}") + import traceback + + traceback.print_exc() + return 1 + + print_results([alone, baseline, green]) + report_speedup(alone, baseline, green) + + print("\nDone") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/greenContext/requirements.txt b/python/2_CoreConcepts/greenContext/requirements.txt new file mode 100644 index 00000000..c79eb06c --- /dev/null +++ b/python/2_CoreConcepts/greenContext/requirements.txt @@ -0,0 +1,3 @@ +cuda-python>=13.0.0 +cuda-core>=0.7.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/jitLtoLinking/README.md b/python/2_CoreConcepts/jitLtoLinking/README.md new file mode 100644 index 00000000..0e2bfc76 --- /dev/null +++ b/python/2_CoreConcepts/jitLtoLinking/README.md @@ -0,0 +1,133 @@ +# JIT Compilation and Link-Time Optimization (Python) + +## Description + +This sample demonstrates how to build a kernel out of two independently +compiled translation units and link them at runtime with +`cuda.core.Linker`. This is the pattern a library would use to accept +user-supplied device code as a plug-in without recompiling its own +kernels from scratch. + +The sample runs the same program in two linking modes: + +1. **PTX linking** - each module is compiled with + `ProgramOptions(relocatable_device_code=True)` down to PTX, and the + `Linker` emits a final cubin. The two modules stay independently + compiled (no cross-module inlining). +2. **Link-Time Optimization (LTO)** - each module is compiled with + `ProgramOptions(link_time_optimization=True)` down to LTO IR, and the + `Linker` is configured with `LinkerOptions(link_time_optimization=True)` + so the optimizer runs again across both modules, typically matching + the code generation of a single-source build. + +The "main" kernel `apply_transform` calls a `user_transform` device +function that lives in a separate source string, and the results of both +linking modes are verified against a NumPy reference. + +## What You'll Learn + +- Compiling multiple `Program` objects into PTX or LTO IR +- Linking independent object codes into a single cubin with `Linker` +- Choosing between `relocatable_device_code` and `link_time_optimization` +- How a library's main kernel can call into user-supplied device code +- When to prefer LTO over plain PTX linking + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - Pythonic access to CUDA runtime, programs, and the JIT linker +- `cupy` - input and output buffers on the GPU +- `numpy` - reference computation on the host + +## Key APIs + +### From `cuda.core` + +- `ProgramOptions(relocatable_device_code=True)` + `Program.compile("ptx")` - produce relocatable PTX +- `ProgramOptions(link_time_optimization=True)` + `Program.compile("ltoir")` - produce LTO IR +- `Linker(*object_codes, options=LinkerOptions(...))` - create a JIT linker over multiple object codes +- `LinkerOptions(link_time_optimization=True)` - opt into LTO during linking +- `Linker.link("cubin")` - produce a loadable module +- `ObjectCode.get_kernel(name)` - fetch a kernel from the linked module + +### From `cuda_samples_utils` + +- `print_gpu_info()` - print device name and compute capability + +## Requirements + +### Hardware + +- NVIDIA GPU with Compute Capability 7.0 or higher + +### Software + +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/jitLtoLinking +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/2_CoreConcepts/jitLtoLinking +python jitLtoLinking.py +``` + +### With custom parameters + +```bash +# Larger element count +python jitLtoLinking.py --elements 1048576 + +# Use a specific GPU +python jitLtoLinking.py --device 1 +``` + +## Expected Output + +``` +Device: +Compute Capability: + +[1] PTX linking (no LTO) + [ptx] result verified against NumPy reference + +[2] LTO linking (link-time optimization) + [lto] result verified against NumPy reference + +Both PTX and LTO linked kernels produced matching results. Done +``` + +**Note:** Device name and compute capability will vary based on your GPU. + +## Files + +- `jitLtoLinking.py` - Python implementation using `cuda.core.Linker` +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`cuda.core` compilation API](https://nvidia.github.io/cuda-python/cuda-core/latest/api.html#cuda-compilation-toolchain) +- Upstream `cuda.core` example: [`jit_lto_fractal.py`](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/examples/jit_lto_fractal.py) +- [NVIDIA nvJitLink documentation](https://docs.nvidia.com/cuda/nvjitlink/index.html) diff --git a/python/2_CoreConcepts/jitLtoLinking/jitLtoLinking.py b/python/2_CoreConcepts/jitLtoLinking/jitLtoLinking.py new file mode 100644 index 00000000..56831000 --- /dev/null +++ b/python/2_CoreConcepts/jitLtoLinking/jitLtoLinking.py @@ -0,0 +1,223 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +JIT Compilation and Link-Time Optimization with cuda.core + +Real-world GPU code is rarely a single source string. Libraries ship a +"main" kernel that is compiled once, then link in user-supplied device +functions at runtime to customize behavior without recompiling the whole +program. + +cuda.core exposes this pattern through ``Program`` (NVRTC compilation) +and ``Linker`` (JIT linking of multiple object codes). Two modes are +shown here: + + * **PTX linking**: compile each translation unit with + ``relocatable_device_code=True`` to PTX and link to a CUBIN. + The two modules remain independently compiled: no cross-module + inlining. + + * **LTO (Link-Time Optimization)**: compile each translation unit + with ``link_time_optimization=True`` to LTO IR, then link with + ``LinkerOptions(link_time_optimization=True)``. The linker reruns + the optimizer across both modules and can inline the device function + into the main kernel, typically matching a single-source build. + +The same kernel math runs in both modes and is verified against a +NumPy reference. +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + LaunchConfig, + Linker, + LinkerOptions, + Program, + ProgramOptions, + launch, + ) + from cuda_samples_utils import print_gpu_info # noqa: E402 +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# -------------------------------------------------------------------------- +# Module A: the "library" main kernel. It forwards each element through a +# user-supplied device function (resolved at link time) and writes the result. +# -------------------------------------------------------------------------- +MAIN_SRC = r""" +// Forward declare the user-supplied hook. Its definition lives in a separate +// translation unit and is resolved by the Linker at runtime. +extern "C" __device__ float user_transform(float x); + +extern "C" __global__ +void apply_transform(const float* __restrict__ in, + float* __restrict__ out, + size_t N) +{ + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)gridDim.x * blockDim.x; + for (size_t i = tid; i < N; i += stride) { + out[i] = user_transform(in[i]); + } +} +""" + +# -------------------------------------------------------------------------- +# Module B: the user-supplied "plug-in" device function. A different +# implementation of ``user_transform`` here produces different results without +# rebuilding MAIN_SRC. +# -------------------------------------------------------------------------- +USER_SRC = r""" +extern "C" __device__ +float user_transform(float x) +{ + // A deliberately non-trivial expression so LTO has something to inline / + // optimize across the module boundary. + float y = x * x + 3.0f * x - 1.0f; + return y > 0.0f ? y : 0.0f; +} +""" + + +def host_reference(x: np.ndarray) -> np.ndarray: + y = x * x + 3.0 * x - 1.0 + return np.where(y > 0.0, y, 0.0).astype(np.float32) + + +def link_ptx(device): + """Compile both modules to PTX and link them into a cubin (no LTO).""" + prog_opts = ProgramOptions( + std="c++17", arch=f"sm_{device.arch}", relocatable_device_code=True + ) + main_obj = Program(MAIN_SRC, "c++", options=prog_opts).compile("ptx") + user_obj = Program(USER_SRC, "c++", options=prog_opts).compile("ptx") + + linker = Linker(main_obj, user_obj, options=LinkerOptions(arch=f"sm_{device.arch}")) + return linker.link("cubin") + + +def link_lto(device): + """Compile both modules to LTO IR and link with LTO enabled.""" + prog_opts = ProgramOptions( + std="c++17", arch=f"sm_{device.arch}", link_time_optimization=True + ) + main_obj = Program(MAIN_SRC, "c++", options=prog_opts).compile("ltoir") + user_obj = Program(USER_SRC, "c++", options=prog_opts).compile("ltoir") + + linker_opts = LinkerOptions( + arch=f"sm_{device.arch}", link_time_optimization=True + ) + linker = Linker(main_obj, user_obj, options=linker_opts) + return linker.link("cubin") + + +def run_one_mode(mode, module, stream, d_in, d_out, size, expected): + kernel = module.get_kernel("apply_transform") + config = LaunchConfig(grid=(size + 255) // 256, block=256) + launch( + stream, + config, + kernel, + d_in.data.ptr, + d_out.data.ptr, + np.uint64(size), + ) + stream.sync() + actual = cp.asnumpy(d_out) + if not np.allclose(actual, expected, rtol=1e-5, atol=1e-5): + max_err = np.max(np.abs(actual - expected)) + print(f" [{mode}] verification FAILED (max_err={max_err})") + return False + print(f" [{mode}] result verified against NumPy reference") + return True + + +def main() -> int: + import argparse + + parser = argparse.ArgumentParser( + description="JIT + LTO linking of two device modules with cuda.core" + ) + parser.add_argument( + "--elements", type=int, default=1 << 16, + help="Number of float32 elements (default: 65536)", + ) + parser.add_argument("--device", type=int, default=0, help="CUDA device id") + args = parser.parse_args() + + device = Device(args.device) + device.set_current() + print_gpu_info(device) + + stream = device.create_stream() + cp.cuda.ExternalStream(int(stream.handle)).use() + + try: + N = args.elements + rng = np.random.default_rng(seed=0) + host_in = rng.standard_normal(N).astype(np.float32) + expected = host_reference(host_in) + + d_in = cp.asarray(host_in) + d_out = cp.empty(N, dtype=cp.float32) + device.sync() + + print("\n[1] PTX linking (no LTO)") + ptx_module = link_ptx(device) + ok_ptx = run_one_mode("ptx", ptx_module, stream, d_in, d_out, N, expected) + + d_out.fill(0) + device.sync() + + print("\n[2] LTO linking (link-time optimization)") + lto_module = link_lto(device) + ok_lto = run_one_mode("lto", lto_module, stream, d_in, d_out, N, expected) + + print() + if ok_ptx and ok_lto: + print("Both PTX and LTO linked kernels produced matching results. Done") + return 0 + return 1 + finally: + stream.close() + cp.cuda.Stream.null.use() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/jitLtoLinking/requirements.txt b/python/2_CoreConcepts/jitLtoLinking/requirements.txt new file mode 100644 index 00000000..ff318f02 --- /dev/null +++ b/python/2_CoreConcepts/jitLtoLinking/requirements.txt @@ -0,0 +1,5 @@ +# JIT + LTO Linking Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/2_CoreConcepts/launchConfigTuning/README.md b/python/2_CoreConcepts/launchConfigTuning/README.md new file mode 100644 index 00000000..56b83bf2 --- /dev/null +++ b/python/2_CoreConcepts/launchConfigTuning/README.md @@ -0,0 +1,184 @@ +# Sample: Launch Configuration Tuning (Python) + +## Description + +Benchmark different CUDA kernel launch configurations to find the optimal block-size setting using `cuda.core` APIs. This sample demonstrates **performance tuning** by measuring execution time across various thread block sizes. + +## What You'll Learn + +- Compiling CUDA kernels at runtime with `cuda.core.Program` +- Launching kernels with different `LaunchConfig` settings +- Benchmarking kernel performance with precise timing +- Understanding how thread block size affects performance +- Tuning for memory-bound vs compute-bound kernels + +## Key Concepts + +### Launch Configuration with cuda.core + +```python +# Configure kernel launch with specific thread block size +config = LaunchConfig( + grid=(grid_size,), + block=(block_size,), + shmem_size=shared_memory_bytes +) + +# Launch kernel +launch(stream, config, kernel, *args) +stream.sync() +``` + +### Thread Block Sizing + +Thread block size significantly impacts performance due to: + +| Factor | Impact | +|--------|--------| +| **Occupancy** | More active warps can hide memory latency | +| **Registers** | More threads/block = fewer registers/thread | +| **Shared Memory** | Divided among blocks on each SM | +| **Warp Efficiency** | Block size should be multiple of 32 | + +### Benchmarking Approach + +```python +# Use CUDA events for accurate GPU timing (not CPU wall-clock) +start_event = device.create_event(options=EventOptions(enable_timing=True)) +end_event = device.create_event(options=EventOptions(enable_timing=True)) + +stream.record(start_event) +for _ in range(n_iterations): + launch(stream, config, kernel, *args) +stream.record(end_event) +end_event.sync() +elapsed_ms = (end_event - start_event) / n_iterations +``` + +## Key APIs + +### From `cuda.core`: + +- `Device` - CUDA device management +- `Program` - Runtime kernel compilation (NVRTC) +- `ProgramOptions` - Compilation options (architecture target) +- `LaunchConfig` - Kernel launch configuration (grid/block dimensions) +- `launch` - Execute compiled kernel (accepts Buffer objects directly) +- `EventOptions` - GPU timing with CUDA events +- `ManagedMemoryResource` - Device-preferred unified memory +- `ManagedMemoryResourceOptions` - Set preferred_location for representative benchmarks + +### From `numpy`: + +- `np.from_dlpack()` - Zero-copy view of GPU buffers via DLPack + +### Benchmarked Kernels: + +- **vector_add** - Simple memory-bound kernel (C = A + B) - low sensitivity to block size +- **reduce_sum** - Shared memory reduction - high sensitivity to block size + +## Requirements + +### Hardware: + +- NVIDIA GPU with CUDA support +- Minimum GPU memory: 512 MB + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See `requirements.txt` for Python packages + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python launchConfigTuning.py +``` + +## Expected Output + +``` +============================================================ +Launch Configuration Tuning (cuda.core) +Finding the Best Block Size for Your Kernel +============================================================ + +Device: +Compute Capability: X.X + +Compiling CUDA kernels with cuda.core.Program... + Target architecture: sm_XX + ✓ vector_add kernel compiled + ✓ reduce_sum kernel compiled + +============================================================ +VECTOR ADDITION - Launch Configuration Tuning +============================================================ + +Problem size: 10,000,000 elements +Kernel: vector_add (C = A + B) + +Testing thread configurations: [32, 64, 128, 256, 512, 1024] +------------------------------------------------------------ +Block Size: 32 | Blocks: 312500 | Time: X.XXXX ± X.XXXX ms +Block Size: 64 | Blocks: 156250 | Time: X.XXXX ± X.XXXX ms +... +------------------------------------------------------------ + +✓ OPTIMAL: block_size=XXX (X.XXXX ms) +✗ WORST: block_size=XXX (X.XXXX ms) + Speedup: X.XXx + +✓ Results verified correct! + +... + +============================================================ +SAMPLE COMPLETE +============================================================ + +Key Takeaway: The optimal thread configuration depends on your +specific kernel characteristics. Always benchmark to find the best! +``` + +## Tuning Guidelines + +### Start Here +- **128-256 threads/block** is a good starting point for most kernels +- Always use **multiples of 32** (warp size) + +### Memory-Bound Kernels +- Less sensitive to thread configuration +- Focus on memory access patterns +- Higher thread counts help hide latency + +### Compute-Bound Kernels +- More sensitive to thread configuration +- Watch for register pressure at high thread counts +- Profile with Nsight Compute + +### Reduction Kernels +- Block size affects shared memory usage +- Power-of-2 sizes simplify reduction logic +- Often 256-512 threads works well + +## Files + +- `launchConfigTuning.py` - Python implementation using cuda.core +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [cuda.core.LaunchConfig](https://nvidia.github.io/cuda-python/cuda-core/latest/generated/cuda.core.LaunchConfig.html) +- [CUDA Occupancy Calculator](https://docs.nvidia.com/cuda/cuda-occupancy-calculator/) +- [CUDA Best Practices Guide - Execution Configuration](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#execution-configuration-optimizations) +- [Nsight Compute Profiler](https://developer.nvidia.com/nsight-compute) diff --git a/python/2_CoreConcepts/launchConfigTuning/launchConfigTuning.py b/python/2_CoreConcepts/launchConfigTuning/launchConfigTuning.py new file mode 100644 index 00000000..8ba9f5b5 --- /dev/null +++ b/python/2_CoreConcepts/launchConfigTuning/launchConfigTuning.py @@ -0,0 +1,388 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Launch Configuration Tuning + +Demonstrates how to find the optimal threads-per-block configuration for CUDA +kernels using cuda.core APIs. Benchmarks different thread layouts to answer: +"What is the best threads-per-block for my kernel?" +""" + +import sys + +try: + import numpy as np + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + ManagedMemoryResource, + ManagedMemoryResourceOptions, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# ============================================================================= +# CUDA Kernel Source Code +# ============================================================================= + +# Vector Addition Kernel - Simple memory-bound kernel (grid-stride loop) +VECTOR_ADD_KERNEL = r""" +extern "C" __global__ +void vector_add(const float* __restrict__ a, + const float* __restrict__ b, + float* __restrict__ c, + int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = idx; i < n; i += stride) { + c[i] = a[i] + b[i]; + } +} +""" + +# Reduction Kernel - Sensitive to block size due to shared memory (grid-stride load) +REDUCTION_KERNEL = r""" +extern "C" __global__ +void reduce_sum(const float* __restrict__ input, + float* __restrict__ partial_sums, + int n) { + extern __shared__ float sdata[]; + + unsigned int tid = threadIdx.x; + unsigned int stride = blockDim.x * gridDim.x; + + // Load data into shared memory (grid-stride loop) + float sum = 0.0f; + for (unsigned int i = blockIdx.x * blockDim.x + tid; i < n; i += stride) { + sum += input[i]; + } + sdata[tid] = sum; + __syncthreads(); + + // Perform reduction in shared memory + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata[tid] += sdata[tid + s]; + } + __syncthreads(); + } + + // Write result for this block + if (tid == 0) { + partial_sums[blockIdx.x] = sdata[0]; + } +} +""" + + +# ============================================================================= +# Utility Functions +# ============================================================================= + + +def compile_kernel(device, kernel_code, kernel_name): + """Compile a CUDA kernel using cuda.core.Program.""" + arch = f"sm_{device.arch}" + options = ProgramOptions(arch=arch) + program = Program(kernel_code, code_type="c++", options=options) + compiled = program.compile(target_type="cubin") + return compiled.get_kernel(kernel_name) + + +def benchmark_kernel_1d( + device, + stream, + kernel, + args, + n_elements, + block_size, + n_iterations=100, + shared_mem_bytes=0, +): + """ + Benchmark a 1D kernel with given threads-per-block configuration. + Uses CUDA events for accurate GPU timing. + + Returns timing statistics as a dictionary. + """ + grid_size = (n_elements + block_size - 1) // block_size + + config = LaunchConfig( + grid=(grid_size,), block=(block_size,), shmem_size=shared_mem_bytes + ) + + # Warm-up run + launch(stream, config, kernel, *args) + stream.sync() + + # Timed runs with CUDA events + event_opts = EventOptions(enable_timing=True) + start_event = device.create_event(options=event_opts) + end_event = device.create_event(options=event_opts) + + stream.record(start_event) + for _ in range(n_iterations): + launch(stream, config, kernel, *args) + stream.record(end_event) + end_event.sync() + + elapsed_ms = (end_event - start_event) / n_iterations + + return { + "block_size": block_size, + "grid_size": grid_size, + "mean_time_ms": elapsed_ms, + "std_time_ms": 0.0, # Single measurement with events + } + + +def print_gpu_info(device): + """Print GPU information relevant to launch configuration.""" + print(f"\nDevice: {device.name}") + cc = device.compute_capability + print(f"Compute Capability: {cc.major}.{cc.minor}") + + +def allocate_managed_array(mr, stream, n_elements, dtype=np.float32): + """Allocate device-preferred unified memory and return buffer with numpy view.""" + n_bytes = n_elements * np.dtype(dtype).itemsize + buffer = mr.allocate(n_bytes, stream) + stream.sync() + + # Zero-copy numpy view via DLPack (holds reference to buffer) + np_view = np.from_dlpack(buffer).view(dtype).reshape(n_elements) + return buffer, np_view + + +# ============================================================================= +# Benchmark Demonstrations +# ============================================================================= + + +def demo_vector_add_tuning(device, stream, mr, kernel): + """Demonstrate launch configuration tuning for vector addition.""" + print("\n" + "=" * 60) + print("VECTOR ADDITION - Launch Configuration Tuning") + print("=" * 60) + + N = 10_000_000 # 10 million elements + print(f"\nProblem size: {N:,} elements") + print("Kernel: vector_add (C = A + B)") + + # Allocate device-preferred unified memory via cuda.core + d_a, np_a = allocate_managed_array(mr, stream, N) + d_b, np_b = allocate_managed_array(mr, stream, N) + d_c, np_c = allocate_managed_array(mr, stream, N) + try: + # Initialize data via numpy views + np_a[:] = np.random.rand(N).astype(np.float32) + np_b[:] = np.random.rand(N).astype(np.float32) + stream.sync() + + # Thread configurations to test (multiples of warp size = 32) + thread_configs = [32, 64, 128, 256, 512, 1024] + + print(f"\nTesting thread configurations: {thread_configs}") + print("-" * 60) + + results = [] + for tpb in thread_configs: + result = benchmark_kernel_1d( + device, + stream, + kernel, + (d_a, d_b, d_c, np.int32(N)), + N, + tpb, + n_iterations=100, + ) + results.append(result) + print( + f"Block Size: {tpb:4d} | Blocks: {result['grid_size']:6d} | " + f"Time: {result['mean_time_ms']:.4f} ms" + ) + + # Find optimal and worst configurations + best = min(results, key=lambda x: x["mean_time_ms"]) + worst = max(results, key=lambda x: x["mean_time_ms"]) + + print("-" * 60) + print( + f"\n✓ OPTIMAL: block_size={best['block_size']} " + f"({best['mean_time_ms']:.4f} ms)" + ) + print( + f"✗ WORST: block_size={worst['block_size']} " + f"({worst['mean_time_ms']:.4f} ms)" + ) + print(f" Speedup: {worst['mean_time_ms']/best['mean_time_ms']:.2f}x") + + # Verify result + stream.sync() + expected = np_a + np_b + if np.allclose(np_c, expected): + print("\n✓ Results verified correct!") + + return results + finally: + d_a.close() + d_b.close() + d_c.close() + + +def demo_reduction_tuning(device, stream, mr, kernel): + """Demonstrate launch config tuning for reduction (shared memory).""" + print("\n" + "=" * 60) + print("REDUCTION - Launch Configuration Tuning") + print("=" * 60) + + N = 16_777_216 # 16M elements (power of 2) + + print(f"\nProblem size: {N:,} elements") + print("Kernel: reduce_sum (parallel reduction)") + print("Note: Reduction uses shared memory - more sensitive to block size!") + + # Allocate device-preferred unified memory via cuda.core + d_input, np_input = allocate_managed_array(mr, stream, N) + try: + np_input[:] = np.random.rand(N).astype(np.float32) + stream.sync() + + thread_configs = [32, 64, 128, 256, 512, 1024] + + print(f"\nTesting thread configurations: {thread_configs}") + print("-" * 60) + + results = [] + for tpb in thread_configs: + # Allocate partial sums array + n_blocks = (N + tpb - 1) // tpb + d_partial, _ = allocate_managed_array(mr, stream, n_blocks) + try: + # Shared memory size = block_size * sizeof(float) + shared_mem_bytes = tpb * 4 + + result = benchmark_kernel_1d( + device, + stream, + kernel, + (d_input, d_partial, np.int32(N)), + N, + tpb, + n_iterations=50, + shared_mem_bytes=shared_mem_bytes, + ) + results.append(result) + print( + f"Block Size: {tpb:4d} | Blocks: {result['grid_size']:6d} | " + f"Time: {result['mean_time_ms']:.4f} ms" + ) + finally: + d_partial.close() + + best = min(results, key=lambda x: x["mean_time_ms"]) + worst = max(results, key=lambda x: x["mean_time_ms"]) + + print("-" * 60) + print(f"\n✓ OPTIMAL: block_size={best['block_size']}") + print( + f" Speedup over worst: {worst['mean_time_ms']/best['mean_time_ms']:.2f}x" + ) + + return results + finally: + d_input.close() + + +# ============================================================================= +# Main +# ============================================================================= + + +def main(): + """ + Complete demonstration of CUDA launch configuration tuning. + + This sample shows: + 1. Device initialization with cuda.core.Device + 2. Kernel compilation with cuda.core.Program + 3. Benchmarking different thread block configurations + 4. Finding optimal threads-per-block for various kernel types + """ + print("=" * 60) + print("Launch Configuration Tuning (cuda.core)") + print("Finding the Best Block Size for Your Kernel") + print("=" * 60) + + # Initialize CUDA device + device = Device(0) + device.set_current() + + # Print GPU information + print_gpu_info(device) + + # Create stream and device-preferred memory resource + stream = device.create_stream() + mr_options = ManagedMemoryResourceOptions(preferred_location=device.device_id) + mr = ManagedMemoryResource(mr_options) + + try: + # Compile kernels + print("\nCompiling CUDA kernels with cuda.core.Program...") + arch = f"sm_{device.arch}" + print(f" Target architecture: {arch}") + + vec_add_kernel = compile_kernel(device, VECTOR_ADD_KERNEL, "vector_add") + print(" ✓ vector_add kernel compiled") + + reduction_kernel = compile_kernel(device, REDUCTION_KERNEL, "reduce_sum") + print(" ✓ reduce_sum kernel compiled") + + # Run demonstrations + demo_vector_add_tuning(device, stream, mr, vec_add_kernel) + demo_reduction_tuning(device, stream, mr, reduction_kernel) + + print("\n" + "=" * 60) + print("SAMPLE COMPLETE") + print("=" * 60) + print("\nKey Takeaway: The optimal thread configuration depends on your") + print("specific kernel characteristics. Always benchmark to find the best!") + print() + finally: + stream.close() + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/launchConfigTuning/requirements.txt b/python/2_CoreConcepts/launchConfigTuning/requirements.txt new file mode 100644 index 00000000..f493d29a --- /dev/null +++ b/python/2_CoreConcepts/launchConfigTuning/requirements.txt @@ -0,0 +1,6 @@ +# Launch Configuration Tuning Sample Requirements +# Requires Python 3.10+, CUDA Toolkit 13.0+ + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/matrixMulSharedMem/README.md b/python/2_CoreConcepts/matrixMulSharedMem/README.md new file mode 100644 index 00000000..37affdf2 --- /dev/null +++ b/python/2_CoreConcepts/matrixMulSharedMem/README.md @@ -0,0 +1,163 @@ +# Matrix Multiplication with Shared Memory (GEMM) + +Demonstrates efficient matrix multiplication using nvmath-python APIs and custom CUDA kernels with tiling, shared memory, and loop unrolling. + +## Overview + +- Uses nvmath.linalg.advanced.Matmul for high-performance GEMM via cuBLASLt +- Compares with custom CUDA kernel using tiling and shared memory +- Shows how tiling reduces global memory bandwidth requirements +- Demonstrates shared memory for data reuse within thread blocks +- Uses loop unrolling to improve instruction-level parallelism + +## What You'll Learn + +- How to use nvmath stateful API for optimized matrix multiplication +- How to tile matrix operations for better cache locality +- Using shared memory to reduce redundant global memory accesses +- Loop unrolling techniques for GPU kernels +- Benchmarking and comparing kernel performance + +## Key Libraries + +- `nvmath-python` - NVIDIA math library with cuBLASLt access +- `cuda.core` - Modern CUDA Python API for custom kernel compilation +- `cupy` - GPU array library for Python + +## Key APIs + +### From `nvmath.linalg.advanced`: + +- `Matmul()` - Stateful matrix multiplication with planning and execution phases +- `MatmulComputeType` - Compute type options for mixed-precision + +### From `cuda.core`: + +- `Device()` - CUDA device management and properties +- `Program()` - Runtime kernel compilation (NVRTC) +- `LaunchConfig()` - Kernel launch configuration (grid/block dimensions) +- `launch()` - Kernel execution on a stream +- `Stream.record_event()` / `Event.elapsed_time()` - GPU timing + +## Requirements + +### Hardware: + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Minimum GPU memory: 256 MB (for 1024×1024 matrices) + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See requirements.txt for package dependencies + +## Installation + +```bash +cd cuda-samples/python/2_CoreConcepts/matrixMulSharedMem +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## How to Run + +```bash +python matrixMulSharedMem.py +``` + +## Expected Output + +``` +====================================================================== +Matrix Multiplication with Shared Memory (GEMM) +Using nvmath and cuda.core APIs +====================================================================== + +Device: NVIDIA GeForce RTX 4090 +Compute Capability: sm_89 + +Custom kernel compiled ✓ + +Matrix dimensions: A(1024x1024) × B(1024x1024) = C(1024x1024) +Custom kernel tile size: 16x16 + +---------------------------------------------------------------------- +NVMATH MATMUL (cuBLASLt) +---------------------------------------------------------------------- +Using nvmath.linalg.advanced.Matmul stateful API +Average time: X.XXX ms +Performance: XXXX.XX GFLOPS + +---------------------------------------------------------------------- +CUSTOM KERNEL (Tiled + Shared Memory + Loop Unrolling) +---------------------------------------------------------------------- +Grid: (64, 64), Block: (16, 16) +Average time: X.XXX ms +Performance: XXX.XX GFLOPS + +---------------------------------------------------------------------- +VERIFICATION +---------------------------------------------------------------------- +nvmath : PASSED (max error: X.XXe-XX) +Custom kernel : PASSED (max error: X.XXe-XX) + +====================================================================== +PERFORMANCE SUMMARY +====================================================================== +Implementation Time (ms) GFLOPS +---------------------------------------------------------------------- +nvmath (cuBLASLt) X.XXX XXXX.XX +Custom (shared mem + unroll) X.XXX XXX.XX +``` + +## Tiling Concept + +``` + Matrix A (M×K) Matrix B (K×N) Matrix C (M×N) + ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ + │ T00 │ T01 │...│ │ T00 │ T01 │...│ │ │ │ │ + ├─────┼─────┼───┤ ├─────┼─────┼───┤ ├─────┼─────┼───┤ + │ T10 │ T11 │...│ × │ T10 │ T11 │...│ = │ │ Cij │ │ + ├─────┼─────┼───┤ ├─────┼─────┼───┤ ├─────┼─────┼───┤ + │ ... │ ... │...│ │ ... │ ... │...│ │ │ │ │ + └───────────────┘ └───────────────┘ └───────────────┘ + + Cij = Σ (A_tile_row × B_tile_col) for all tiles along K +``` + +## nvmath Stateful API + +```python +import nvmath.linalg.advanced as nvmath_advanced + +# Create matrices (CuPy arrays) +A = cp.random.rand(m, k).astype(cp.float32) +B = cp.random.rand(k, n).astype(cp.float32) + +# Use stateful API for fine-grained control +with nvmath_advanced.Matmul(A, B) as mm: + mm.plan() # Find optimal algorithm + C = mm.execute() # Execute computation +``` + +## Memory Access Optimization (Custom Kernel) + +| Implementation | Global Reads per C element | Reduction | +|---------------|---------------------------|-----------| +| Naive | 2 × K | (baseline)| +| Tiled (16×16) | 2 × K / 16 | 16× | + +## Files + +- `matrixMulSharedMem.py` - Python implementation comparing nvmath vs custom kernel +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [nvmath-python Documentation](https://docs.nvidia.com/cuda/nvmath-python/) +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [cuda.core API Guide](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/2_CoreConcepts/matrixMulSharedMem/matrixMulSharedMem.py b/python/2_CoreConcepts/matrixMulSharedMem/matrixMulSharedMem.py new file mode 100644 index 00000000..8f809f3e --- /dev/null +++ b/python/2_CoreConcepts/matrixMulSharedMem/matrixMulSharedMem.py @@ -0,0 +1,243 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Matrix Multiplication with Shared Memory (GEMM) + +Demonstrates efficient matrix multiplication using: +- nvmath.linalg.advanced.Matmul for high-performance GEMM via cuBLASLt +- Custom CUDA kernel with tiling, shared memory, and loop unrolling + +Uses cuda.core APIs with CuPy arrays via ExternalStream. +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + import nvmath.linalg.advanced as nvmath_advanced + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Install with: pip install -r requirements.txt") + sys.exit(1) + + +TILE_SIZE: int = 16 + +MATMUL_KERNEL: str = r""" +#define TILE_SIZE 16 + +extern "C" __global__ +void matmul_shared(const float* A, const float* B, float* C, + int M, int N, int K) { + __shared__ float As[TILE_SIZE][TILE_SIZE]; + __shared__ float Bs[TILE_SIZE][TILE_SIZE]; + + int bx = blockIdx.x, by = blockIdx.y; + int tx = threadIdx.x, ty = threadIdx.y; + int row = by * TILE_SIZE + ty; + int col = bx * TILE_SIZE + tx; + + float sum = 0.0f; + int numTiles = (K + TILE_SIZE - 1) / TILE_SIZE; + + for (int t = 0; t < numTiles; t++) { + int aCol = t * TILE_SIZE + tx; + int bRow = t * TILE_SIZE + ty; + + As[ty][tx] = (row < M && aCol < K) ? A[row * K + aCol] : 0.0f; + Bs[ty][tx] = (bRow < K && col < N) ? B[bRow * N + col] : 0.0f; + __syncthreads(); + + #pragma unroll + for (int k = 0; k < TILE_SIZE; k += 4) { + sum += As[ty][k] * Bs[k][tx]; + sum += As[ty][k + 1] * Bs[k + 1][tx]; + sum += As[ty][k + 2] * Bs[k + 2][tx]; + sum += As[ty][k + 3] * Bs[k + 3][tx]; + } + __syncthreads(); + } + + if (row < M && col < N) { + C[row * N + col] = sum; + } +} +""" + + +def run_matmul_benchmark( + m: int = 1024, + n: int = 1024, + k: int = 1024, + device_id: int = 0, + num_iterations: int = 10, +) -> bool: + """Run matrix multiplication benchmark comparing nvmath vs custom kernel.""" + print("=" * 60) + print("Matrix Multiplication with Shared Memory (GEMM)") + print("=" * 60) + + # Initialize device and stream + device = Device(device_id) + device.set_current() + stream = device.create_stream() + print(f"\nDevice: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + + # Make CuPy use our cuda.core stream + cp.cuda.ExternalStream(int(stream.handle)).use() + + # Compile custom kernel + arch = f"sm_{device.arch}" + program = Program(MATMUL_KERNEL, code_type="c++", options=ProgramOptions(arch=arch)) + kernel = program.compile(target_type="cubin").get_kernel("matmul_shared") + print("Custom kernel compiled ✓") + + # Setup + print(f"\nMatrix: A({m}x{k}) × B({k}x{n}) = C({m}x{n})") + total_ops = 2 * m * n * k + event_opts = EventOptions(enable_timing=True) + + # Allocate matrices + rng = cp.random.default_rng(42) + d_A = rng.random((m, k), dtype=cp.float32) + d_B = rng.random((k, n), dtype=cp.float32) + d_C_custom = cp.zeros((m, n), dtype=cp.float32) + + success = True + try: + # ------------------------------------------------------------------------- + # nvmath GEMM (cuBLASLt) + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("NVMATH (cuBLASLt) - plan once, execute many") + print("-" * 60) + + with nvmath_advanced.Matmul(d_A, d_B, stream=int(stream.handle)) as mm: + mm.plan() + d_C_nvmath = mm.execute() + stream.sync() + + start = stream.record(options=event_opts) + for _ in range(num_iterations): + d_C_nvmath = mm.execute() + end = stream.record(options=event_opts) + end.sync() + + nvmath_ms = (end - start) / num_iterations + nvmath_gflops = (total_ops / 1e9) / (nvmath_ms / 1e3) + print(f"Time: {nvmath_ms:.3f} ms | {nvmath_gflops:.2f} GFLOPS") + + # ------------------------------------------------------------------------- + # Custom kernel (tiled + shared memory + unroll) + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("CUSTOM KERNEL (tiled + shared memory + unroll)") + print("-" * 60) + + block = (TILE_SIZE, TILE_SIZE) + grid = ((n + TILE_SIZE - 1) // TILE_SIZE, (m + TILE_SIZE - 1) // TILE_SIZE) + config = LaunchConfig(grid=grid, block=block) + + launch( + stream, + config, + kernel, + d_A.data.ptr, + d_B.data.ptr, + d_C_custom.data.ptr, + np.int32(m), + np.int32(n), + np.int32(k), + ) + stream.sync() + + start = stream.record(options=event_opts) + for _ in range(num_iterations): + launch( + stream, + config, + kernel, + d_A.data.ptr, + d_B.data.ptr, + d_C_custom.data.ptr, + np.int32(m), + np.int32(n), + np.int32(k), + ) + end = stream.record(options=event_opts) + end.sync() + + custom_ms = (end - start) / num_iterations + custom_gflops = (total_ops / 1e9) / (custom_ms / 1e3) + print(f"Time: {custom_ms:.3f} ms | {custom_gflops:.2f} GFLOPS") + + # ------------------------------------------------------------------------- + # Verification + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("VERIFICATION") + print("-" * 60) + + d_C_ref = d_A @ d_B + + # Host-side verification: cp.allclose triggers NVRTC failure on sm_120 + # (ldexp_cexp undefined). Use asnumpy + np.allclose instead. + ref_host = cp.asnumpy(d_C_ref) + for name, d_C in [("nvmath", d_C_nvmath), ("custom", d_C_custom)]: + print(f"{name}: ", end="") + passed = np.allclose(cp.asnumpy(d_C), ref_host, rtol=1e-4, atol=1e-4) + print("Test PASSED" if passed else "Test FAILED") + success = success and passed + + return success + finally: + cp.cuda.Stream.null.use() + stream.close() + + +def main() -> bool: + """Entry point. Returns True if benchmark passed.""" + return run_matmul_benchmark() + + +if __name__ == "__main__": + success = main() + if not success: + sys.exit(1) diff --git a/python/2_CoreConcepts/matrixMulSharedMem/requirements.txt b/python/2_CoreConcepts/matrixMulSharedMem/requirements.txt new file mode 100644 index 00000000..2c6cf2b2 --- /dev/null +++ b/python/2_CoreConcepts/matrixMulSharedMem/requirements.txt @@ -0,0 +1,7 @@ +# Matrix Multiplication with Shared Memory (GEMM) Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 +nvmath-python[cu13]>=0.3.0 diff --git a/python/2_CoreConcepts/memoryResources/README.md b/python/2_CoreConcepts/memoryResources/README.md new file mode 100644 index 00000000..cc563fe5 --- /dev/null +++ b/python/2_CoreConcepts/memoryResources/README.md @@ -0,0 +1,139 @@ +# Sample: Memory Resources and Buffers (Python) + +## Description + +This sample demonstrates the `cuda.core` memory management model: a +`MemoryResource` owns a pool of memory and hands out `Buffer` objects that +can be passed to kernels, copied between resources with +`Buffer.copy_to()`, and viewed as NumPy or CuPy arrays through DLPack. The +script exercises three common resources side-by-side: + +1. **`DeviceMemoryResource`** - device-local GPU memory. Every `Device` + exposes a default pool via `Device.memory_resource`, and applications + can create additional pools explicitly. +2. **`PinnedMemoryResource`** - page-locked host memory, used here as the + input and output staging buffers around a GPU kernel (the canonical + pinned-H2D / compute / pinned-D2H pattern). +3. **`ManagedMemoryResource`** - unified memory that the driver migrates + between host and device on demand; host views see the GPU's writes + without an explicit copy. + +The same `scale_and_bias` kernel runs on each resource and every result is +verified on the host. + +## What You'll Learn + +- Creating and using `DeviceMemoryResource`, `PinnedMemoryResource`, and + `ManagedMemoryResource` +- Allocating `Buffer` objects from a resource with a bound stream +- Copying between buffers across resources with `Buffer.copy_to()` +- Taking zero-copy NumPy or CuPy views of a `Buffer` via DLPack +- Releasing buffers with stream-ordered `close(stream)` semantics + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - Pythonic access to CUDA runtime, programs, and memory resources +- `cupy` - GPU array views of device buffers +- `numpy` - host array views of pinned and managed buffers + +## Key APIs + +### From `cuda.core` + +- `Device.memory_resource` - default memory pool attached to a device +- `DeviceMemoryResource`, `PinnedMemoryResource`, `ManagedMemoryResource` - allocate buffers of the corresponding memory kind +- `MemoryResource.allocate(nbytes, stream=...)` - returns a `Buffer` +- `Buffer.copy_to(dst_buffer, stream=...)` - async, stream-ordered copy +- `Buffer.close(stream)` - stream-ordered deallocation +- `Buffer` supports `__dlpack__` for zero-copy views + +### From CuPy and NumPy + +- `cp.from_dlpack()` / `np.from_dlpack()` - zero-copy array view of a `Buffer` + +### From `cuda_samples_utils` + +- `print_gpu_info()` - print device name and compute capability + +## Requirements + +### Hardware + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Managed memory support (most discrete GPUs on Linux and Windows) + +### Software + +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/memoryResources +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/2_CoreConcepts/memoryResources +python memoryResources.py +``` + +### With custom parameters + +```bash +# Larger buffer size +python memoryResources.py --elements 1048576 + +# Use a specific GPU +python memoryResources.py --device 1 +``` + +## Expected Output + +``` +Device: +Compute Capability: + +[1] DeviceMemoryResource + PinnedMemoryResource (staging) + Pinned staging, device kernel, and copy_to verified + +[2] ManagedMemoryResource (unified memory) + GPU writes observed directly through the host-visible mapping + +[3] Explicit DeviceMemoryResource + Explicit DeviceMemoryResource allocation verified + +All memory resource demos passed. +``` + +**Note:** Device name and compute capability will vary based on your GPU. + +## Files + +- `memoryResources.py` - Python implementation using `cuda.core` memory resources +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`cuda.core` memory API](https://nvidia.github.io/cuda-python/cuda-core/latest/api.html#memory-management) +- Upstream `cuda.core` example: [`memory_ops.py`](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/examples/memory_ops.py) +- Upstream `cuda.core` example: [`memory_pool_resources.py`](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/examples/memory_pool_resources.py) diff --git a/python/2_CoreConcepts/memoryResources/memoryResources.py b/python/2_CoreConcepts/memoryResources/memoryResources.py new file mode 100644 index 00000000..a4573a49 --- /dev/null +++ b/python/2_CoreConcepts/memoryResources/memoryResources.py @@ -0,0 +1,248 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Memory management with cuda.core: Buffers and Memory Resources + +Demonstrates the Memory Resource / Buffer abstraction in cuda.core: + + * ``DeviceMemoryResource`` - GPU-only memory (device pool) + * ``PinnedMemoryResource`` - page-locked host memory accessible by the GPU + * ``ManagedMemoryResource`` - unified memory that migrates between + host and device on demand + +Each resource hands out ``Buffer`` objects that can be: + * passed to kernels as pointers + * copied between each other with ``buffer.copy_to(...)`` + * viewed as NumPy or CuPy arrays via DLPack (``__dlpack__``) + +The kernel below performs a fused scale + bias on both a device buffer +and a pinned buffer, then we copy the result across resources to confirm +each pathway works end-to-end. +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + DeviceMemoryResource, + LaunchConfig, + ManagedMemoryResource, + PinnedMemoryResource, + Program, + ProgramOptions, + launch, + ) + from cuda_samples_utils import print_gpu_info # noqa: E402 +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +SCALE_BIAS_KERNEL = r""" +extern "C" __global__ +void scale_and_bias(float* data, size_t N, float scale, float bias) { + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + const unsigned int stride = blockDim.x * gridDim.x; + for (size_t i = tid; i < N; i += stride) { + data[i] = data[i] * scale + bias; + } +} +""" + + +def demo_device_and_pinned(device, stream, kernel, size): + """Use pinned host memory as a staging area for a device-side kernel. + + Canonical H2D / compute / D2H pattern: + host (pinned) -> device -> launch -> device -> host (pinned) + """ + print("\n[1] DeviceMemoryResource + PinnedMemoryResource (staging)") + dtype = np.float32 + nbytes = size * dtype().itemsize + + # The device's built-in memory resource is a good default for GPU memory. + device_mr = device.memory_resource + pinned_mr = PinnedMemoryResource() + + pinned_in = pinned_mr.allocate(nbytes, stream=stream) + pinned_out = pinned_mr.allocate(nbytes, stream=stream) + device_buffer = device_mr.allocate(nbytes, stream=stream) + try: + # Wrap each Buffer as a typed array via DLPack (no copies). + pinned_in_view = np.from_dlpack(pinned_in).view(dtype=dtype) + pinned_out_view = np.from_dlpack(pinned_out).view(dtype=dtype) + + # Initialize host-side input. + pinned_in_view[:] = np.arange(size, dtype=dtype) + original = pinned_in_view.copy() + + # Stage H2D: pinned -> device. + pinned_in.copy_to(device_buffer, stream=stream) + + # Launch kernel on the device buffer. + config = LaunchConfig(grid=(size + 255) // 256, block=256) + launch( + stream, + config, + kernel, + device_buffer, + np.uint64(size), + np.float32(3.0), + np.float32(-0.5), + ) + + # Stage D2H: device -> pinned. + device_buffer.copy_to(pinned_out, stream=stream) + stream.sync() + + expected = original * 3.0 - 0.5 + assert np.allclose(pinned_out_view, expected), "H2D -> kernel -> D2H mismatch" + print(" Pinned staging, device kernel, and copy_to verified") + finally: + device_buffer.close(stream) + pinned_out.close(stream) + pinned_in.close(stream) + + +def demo_managed(device, stream, kernel, size): + """Allocate a managed (unified) buffer; kernel writes are visible on host.""" + print("\n[2] ManagedMemoryResource (unified memory)") + dtype = np.float32 + nbytes = size * dtype().itemsize + + managed_mr = ManagedMemoryResource() + managed_buffer = managed_mr.allocate(nbytes, stream=stream) + try: + managed_view = np.from_dlpack(managed_buffer).view(dtype=dtype) + + managed_view[:] = np.arange(size, dtype=dtype) + original = managed_view.copy() + # Before launching, make sure host writes have reached the GPU. + device.sync() + + config = LaunchConfig(grid=(size + 255) // 256, block=256) + launch( + stream, + config, + kernel, + managed_buffer, + np.uint64(size), + np.float32(0.5), + np.float32(10.0), + ) + stream.sync() + + # No explicit copy: the same numpy view observes the GPU's writes. + assert np.allclose(managed_view, original * 0.5 + 10.0), ( + "Managed memory result mismatch" + ) + print(" GPU writes observed directly through the host-visible mapping") + finally: + managed_buffer.close(stream) + + +def demo_explicit_device_pool(device, stream, kernel, size): + """Allocate from a user-created DeviceMemoryResource with default options.""" + print("\n[3] Explicit DeviceMemoryResource") + dtype = np.float32 + nbytes = size * dtype().itemsize + + # Explicitly create a pool tied to this device. Use .close() to tear it down. + explicit_mr = DeviceMemoryResource(device) + buffer = explicit_mr.allocate(nbytes, stream=stream) + try: + view = cp.from_dlpack(buffer).view(dtype=cp.float32) + view[:] = cp.arange(size, dtype=cp.float32) + device.sync() + + config = LaunchConfig(grid=(size + 255) // 256, block=256) + launch( + stream, + config, + kernel, + buffer, + np.uint64(size), + np.float32(1.0), + np.float32(100.0), + ) + stream.sync() + + expected = cp.arange(size, dtype=cp.float32) + 100.0 + assert cp.allclose(view, expected), "Explicit device pool result mismatch" + print(" Explicit DeviceMemoryResource allocation verified") + finally: + buffer.close(stream) + explicit_mr.close() + + +def main(): + import argparse + + parser = argparse.ArgumentParser( + description="Demonstrate cuda.core memory resources (Buffer + MR)" + ) + parser.add_argument( + "--elements", + type=int, + default=1 << 16, + help="Number of float32 elements per buffer (default: 65536)", + ) + parser.add_argument("--device", type=int, default=0, help="CUDA device id") + args = parser.parse_args() + + device = Device(args.device) + device.set_current() + print_gpu_info(device) + + stream = device.create_stream() + + try: + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + program = Program(SCALE_BIAS_KERNEL, code_type="c++", options=program_options) + module = program.compile("cubin") + kernel = module.get_kernel("scale_and_bias") + + demo_device_and_pinned(device, stream, kernel, args.elements) + demo_managed(device, stream, kernel, args.elements) + demo_explicit_device_pool(device, stream, kernel, args.elements) + + print("\nDone") + return 0 + finally: + stream.close() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/memoryResources/requirements.txt b/python/2_CoreConcepts/memoryResources/requirements.txt new file mode 100644 index 00000000..5aa8ca4c --- /dev/null +++ b/python/2_CoreConcepts/memoryResources/requirements.txt @@ -0,0 +1,5 @@ +# Memory Resources Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 diff --git a/python/2_CoreConcepts/pageRank/README.md b/python/2_CoreConcepts/pageRank/README.md new file mode 100644 index 00000000..64ac4b48 --- /dev/null +++ b/python/2_CoreConcepts/pageRank/README.md @@ -0,0 +1,158 @@ +# Sample: PageRank Algorithm (Python) + +## Description + +Demonstrates GPU-accelerated PageRank computation for graph analysis using RAPIDS cuGraph, with cuda.core for device, stream, and GPU timing. This sample focuses on cuda.core integration with high-level libraries (cuGraph/cuDF); for custom kernel programming (Program, LaunchConfig, launch), see the blockwiseSum sample. + +## What You'll Learn + +- Graph representation using cuDF DataFrames for edge lists +- GPU-optimized PageRank via RAPIDS cuGraph library +- Performance comparison between cuGraph GPU and CPU reference implementation +- cuda.core device/stream management and GPU timing + +## Key Libraries + +- `cugraph` - RAPIDS GPU-accelerated graph analytics +- `cudf` - RAPIDS GPU DataFrame library +- `cuda.core` - Device, stream, and event APIs for GPU timing +- `cupy` - GPU array library (ExternalStream for cuDF/cuGraph) +- `numpy` - CPU reference implementation + +## Key APIs + +### From cuda.core: + +- `Device(0)` - Create device, `device.set_current()`, `device.create_stream()` +- `EventOptions(enable_timing=True)` - GPU timing via `stream.record()` +- `cp.cuda.ExternalStream(stream.handle).use()` - Make cuDF/cuGraph use cuda.core stream + +### From cuGraph: + +- `cugraph.Graph(directed=True)` - Create directed graph structure +- `Graph.from_cudf_edgelist()` - Build graph from edge list DataFrame +- `cugraph.pagerank()` - GPU-accelerated PageRank algorithm + +### From cuDF: + +- `cudf.DataFrame()` - GPU DataFrame for edge lists + +## Requirements + +### Hardware: + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Minimum GPU memory: 512 MB (for 10K node graph) + +### Software: + +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See requirements.txt for package dependencies + +## Installation + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/pageRank +python3 -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## How to Run + +```bash +python pageRank.py +``` + +## Algorithm + +The PageRank formula iteratively computes node importance: + +``` +PR(v) = (1-d)/N + d * Σ PR(u)/out_degree(u) +``` + +Where: +- `d` = damping factor (typically 0.85) +- `N` = total number of nodes +- Sum is over all nodes `u` that link to `v` + +## Expected Output + +``` +============================================================ +PageRank Algorithm (using RAPIDS cuGraph) +============================================================ + +Device: NVIDIA GeForce RTX ... +Compute Capability: sm_XX + +Graph Parameters: + Nodes: 10,000 + Avg edges/node: 15 + Total edges: ~150,000 + Avg in-degree: 14.9 + +------------------------------------------------------------ +GPU PageRank (RAPIDS cuGraph) +------------------------------------------------------------ +Time: X.XXX ms + +Top 5 nodes by PageRank: + 1. Node XXXXX: 0.XXXXXX + ... + +------------------------------------------------------------ +CPU PageRank (Reference) +------------------------------------------------------------ +Time: XXXX.XXX ms +Iterations: XX + +------------------------------------------------------------ +PERFORMANCE SUMMARY +------------------------------------------------------------ +GPU (cuGraph): X.XXX ms +CPU (Reference): XXXX.XXX ms +Speedup: XXXX.Xx + +------------------------------------------------------------ +VERIFICATION +------------------------------------------------------------ +GPU vs CPU PageRank scores: Test PASSED + +PageRank Properties: + Sum of scores: 1.000000 (should be ~1.0) + Sum check: ✓ + +Done +``` + +## Files + +- `pageRank.py` - Python implementation using RAPIDS cuGraph +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## Why cuGraph? + +RAPIDS cuGraph provides production-grade, GPU-accelerated graph analytics: + +- **Highly optimized** - Uses advanced GPU parallelization techniques +- **Scalable** - Handles graphs with billions of edges +- **Easy to use** - Simple Python API similar to NetworkX +- **Integrated** - Works seamlessly with cuDF, cuML, and other RAPIDS libraries + +## Applications + +- Web page ranking (original Google PageRank) +- Social network influence analysis +- Citation network analysis +- Recommendation systems +- Fraud detection in financial networks + +## See Also + +- [RAPIDS cuGraph Documentation](https://docs.rapids.ai/api/cugraph/stable/) +- [cuGraph GitHub Repository](https://github.com/rapidsai/cugraph) +- [RAPIDS Installation Guide](https://rapids.ai/start.html) diff --git a/python/2_CoreConcepts/pageRank/pageRank.py b/python/2_CoreConcepts/pageRank/pageRank.py new file mode 100644 index 00000000..97a74b94 --- /dev/null +++ b/python/2_CoreConcepts/pageRank/pageRank.py @@ -0,0 +1,346 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +PageRank Algorithm + +Demonstrates GPU-accelerated PageRank computation for graph analysis: +- Graph representation using edge lists and cuDF DataFrames +- GPU-optimized PageRank via RAPIDS cuGraph library +- Performance comparison: cuGraph GPU vs CPU reference + +Uses RAPIDS cuGraph for production-grade graph analytics on GPU. + +PageRank Algorithm: + PR(v) = (1-d)/N + d * sum(PR(u)/out_degree(u)) for all u linking to v + where d = damping factor (typically 0.85), N = number of nodes +""" + +import sys +import time +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import print_gpu_info, verify_array_result # noqa: E402 + +try: + import cudf + import cugraph + import cupy as cp + import numpy as np + from cuda.core import Device, EventOptions, Stream +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Install with: pip install -r requirements.txt") + sys.exit(1) + + +def generate_random_graph( + num_nodes: int, + avg_edges_per_node: int = 10, + seed: int = 42, +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + """ + Generate a random directed graph as edge list. + + Parameters + ---------- + num_nodes : int + Number of nodes in the graph + avg_edges_per_node : int + Average number of outgoing edges per node + seed : int + Random seed for reproducibility + + Returns + ------- + tuple[np.ndarray, np.ndarray, np.ndarray] + (sources, destinations, out_degree) arrays + """ + rng = np.random.default_rng(seed) + + sources_list: list[int] = [] + destinations_list: list[int] = [] + out_degree = np.zeros(num_nodes, dtype=np.int32) + + for src in range(num_nodes): + # Random number of outgoing edges (Poisson distribution) + n_edges = max(1, rng.poisson(avg_edges_per_node)) + n_edges = min(n_edges, num_nodes - 1) + # Random destinations (no self-loops); rejection sampling avoids O(N²) memory + dests: set[int] = set() + while len(dests) < n_edges: + d = int(rng.integers(0, num_nodes)) + if d != src: + dests.add(d) + dests = np.array(list(dests), dtype=np.int32) + for dst in dests: + sources_list.append(src) + destinations_list.append(dst) + out_degree[src] = len(dests) + + sources = np.array(sources_list, dtype=np.int32) + destinations = np.array(destinations_list, dtype=np.int32) + + return sources, destinations, out_degree + + +def pagerank_cpu( + sources: np.ndarray, + destinations: np.ndarray, + out_degree: np.ndarray, + num_nodes: int, + damping: float = 0.85, + max_iterations: int = 100, + tolerance: float = 1e-6, +) -> tuple[np.ndarray, int]: + """ + Compute PageRank on CPU using iterative method. + + Parameters + ---------- + sources : np.ndarray + Source nodes of edges + destinations : np.ndarray + Destination nodes of edges + out_degree : np.ndarray + Outgoing degree for each node + num_nodes : int + Number of nodes + damping : float + Damping factor (default: 0.85) + max_iterations : int + Maximum iterations + tolerance : float + Convergence tolerance + + Returns + ------- + tuple[np.ndarray, int] + (PageRank scores, iterations until convergence) + """ + # Build incoming edges list for each node + incoming: list[list[int]] = [[] for _ in range(num_nodes)] + for src, dst in zip(sources, destinations): + incoming[dst].append(src) + + # Initialize PageRank uniformly + pr = np.ones(num_nodes, dtype=np.float32) / num_nodes + pr_new = np.zeros(num_nodes, dtype=np.float32) + + base_score = (1.0 - damping) / num_nodes + + for iteration in range(max_iterations): + # Handle dangling nodes (nodes with no outgoing edges) + dangling_sum = np.sum(pr[out_degree == 0]) + dangling_contrib = damping * dangling_sum / num_nodes + + for v in range(num_nodes): + # Sum contributions from incoming neighbors + incoming_sum = 0.0 + for u in incoming[v]: + if out_degree[u] > 0: + incoming_sum += pr[u] / out_degree[u] + + pr_new[v] = base_score + damping * incoming_sum + dangling_contrib + + # Check convergence + diff = np.sum(np.abs(pr_new - pr)) + pr, pr_new = pr_new, pr + + if diff < tolerance: + return pr, iteration + 1 + + return pr, max_iterations + + +def run_pagerank_benchmark( + num_nodes: int = 10000, + avg_edges: int = 15, + max_iterations: int = 100, +) -> bool: + """ + Run PageRank benchmark comparing cuGraph GPU and CPU performance. + + Parameters + ---------- + num_nodes : int + Number of nodes in the graph + avg_edges : int + Average edges per node + max_iterations : int + Maximum PageRank iterations + + Returns + ------- + bool + True if benchmark succeeded + """ + print("=" * 60) + print("PageRank Algorithm (using RAPIDS cuGraph)") + print("=" * 60) + + # Initialize cuda.core device and stream + device = Device(0) + device.set_current() + stream: Stream = device.create_stream() + print() + print_gpu_info(device) + + # Make CuPy/cuDF use our cuda.core stream + cp.cuda.ExternalStream(int(stream.handle)).use() + + # Generate random graph + print("\nGraph Parameters:") + print(f" Nodes: {num_nodes:,}") + print(f" Avg edges/node: {avg_edges}") + + sources, destinations, out_degree = generate_random_graph( + num_nodes, avg_edges, seed=42 + ) + + total_edges = len(sources) + print(f" Total edges: {total_edges:,}") + print(f" Avg in-degree: {total_edges / num_nodes:.1f}") + + # ------------------------------------------------------------------------- + # GPU PageRank (cuGraph) + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("GPU PageRank (RAPIDS cuGraph)") + print("-" * 60) + + # Create cuGraph graph from edge list with store_transposed for optimal perf + gdf = cudf.DataFrame( + { + "src": sources, + "dst": destinations, + } + ) + G = cugraph.Graph(directed=True) + G.from_cudf_edgelist(gdf, source="src", destination="dst", store_transposed=True) + + event_opts = EventOptions(enable_timing=True) + + try: + # Warmup + _ = cugraph.pagerank(G, alpha=0.85, max_iter=100, tol=1e-5) + stream.sync() + + # Timed run using cuda.core events + start = stream.record(options=event_opts) + pr_result = cugraph.pagerank(G, alpha=0.85, max_iter=max_iterations, tol=1e-6) + end = stream.record(options=event_opts) + end.sync() + + gpu_time_ms = end - start + print(f"Time: {gpu_time_ms:.3f} ms") + + # Extract results sorted by vertex ID (to numpy for verification) + pr_df = pr_result.sort_values("vertex").reset_index(drop=True) + pr_gpu = pr_df["pagerank"].to_numpy() + + # Show top PageRank nodes + top_k = 5 + top_df = pr_result.nlargest(top_k, "pagerank") + print(f"\nTop {top_k} nodes by PageRank:") + for i, row in enumerate(top_df.to_pandas().itertuples()): + print(f" {i + 1}. Node {row.vertex:5d}: {row.pagerank:.6f}") + + # ------------------------------------------------------------------------- + # CPU PageRank + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("CPU PageRank (Reference)") + print("-" * 60) + + cpu_start = time.perf_counter() + pr_cpu, cpu_iters = pagerank_cpu( + sources, destinations, out_degree, num_nodes, max_iterations=max_iterations + ) + cpu_end = time.perf_counter() + + cpu_time_ms = (cpu_end - cpu_start) * 1000 + print(f"Time: {cpu_time_ms:.3f} ms") + print(f"Iterations: {cpu_iters}") + + # ------------------------------------------------------------------------- + # Performance Summary + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("PERFORMANCE SUMMARY") + print("-" * 60) + speedup = cpu_time_ms / gpu_time_ms + print(f"GPU (cuGraph): {gpu_time_ms:.3f} ms") + print(f"CPU (Reference): {cpu_time_ms:.3f} ms") + print(f"Speedup: {speedup:.1f}x") + + # ------------------------------------------------------------------------- + # Verification + # ------------------------------------------------------------------------- + print("\n" + "-" * 60) + print("VERIFICATION") + print("-" * 60) + + # Compare GPU and CPU results (cuGraph and CPU ref may converge differently) + print("GPU vs CPU PageRank scores: ", end="") + success = verify_array_result( + pr_gpu, pr_cpu, rtol=1e-2, atol=1e-4, verbose=True + ) + + # Verify PageRank properties + print("\nPageRank Properties:") + pr_sum = float(np.sum(pr_gpu)) + print(f" Sum of scores: {pr_sum:.6f} (should be ~1.0)") + + pr_min = float(np.min(pr_gpu)) + pr_max = float(np.max(pr_gpu)) + print(f" Min score: {pr_min:.6f}") + print(f" Max score: {pr_max:.6f}") + + # Check that sum is approximately 1 + sum_ok = abs(pr_sum - 1.0) < 0.01 + print(f" Sum check: {'✓' if sum_ok else '✗'}") + + success = success and sum_ok + return success + finally: + cp.cuda.Stream.null.use() + stream.close() + + +def main() -> None: + """Entry point.""" + success = run_pagerank_benchmark() + if success: + print("\nDone") + else: + print("\nBenchmark completed with errors") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/pageRank/requirements.txt b/python/2_CoreConcepts/pageRank/requirements.txt new file mode 100644 index 00000000..7ed31698 --- /dev/null +++ b/python/2_CoreConcepts/pageRank/requirements.txt @@ -0,0 +1,8 @@ +# PageRank Requirements (RAPIDS cuGraph) + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cugraph-cu13>=25.0.0 +cudf-cu13>=25.0.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/parallelHistogram/README.md b/python/2_CoreConcepts/parallelHistogram/README.md new file mode 100644 index 00000000..2820102d --- /dev/null +++ b/python/2_CoreConcepts/parallelHistogram/README.md @@ -0,0 +1,117 @@ +# Sample: Parallel Histogram with Atomics (Python) + +## Description + +Compute histograms on the GPU using atomic operations to handle concurrent updates from multiple threads. This sample demonstrates the modern **cuda.core API** for kernel compilation and launch, comparing two approaches: + +1. **Global Atomics** - All threads atomically update a single global histogram +2. **Privatized Histograms** - Each block uses shared memory, then merges to global + +## What You'll Learn + +- Compiling CUDA C kernels with `cuda.core.Program` +- Configuring kernel launches with `cuda.core.LaunchConfig` +- Launching kernels with `cuda.core.launch()` +- Using **atomic operations** (`atomicAdd`) for thread-safe updates +- Optimizing with **shared memory privatization** +- GPU timing with `cuda.core` Events + +## Key Concepts + +### Atomic Operations + +When multiple threads update the same histogram bin, a race condition occurs. Atomic operations ensure thread-safe updates: + +```cuda +atomicAdd(&histogram[data[i]], 1); // Thread-safe increment +``` + +### Global vs Privatized Atomics + +| Approach | Pros | Cons | +|----------|------|------| +| Global | Simple | High contention on popular bins | +| Privatized | Significantly faster | Extra shared memory, synchronization | + +## Key APIs + +### From `cuda.core`: + +- `Device` - Device management and context +- `Program` - Compile CUDA C source code +- `ProgramOptions` - Set architecture, optimization flags +- `LaunchConfig` - Configure grid and block dimensions +- `launch()` - Launch compiled kernel +- `Stream` - Async stream management +- `EventOptions` - Configure events for GPU timing +- `stream.record()` - Record events for timing + +### From `cupy`: + +- `cp.random.randint()` - Generate random data directly on GPU +- `cp.zeros()` - Allocate zeroed GPU arrays + +### CUDA Atomic Functions (in kernel): + +- `atomicAdd()` - Thread-safe addition + +## Requirements + +### Hardware: +- NVIDIA GPU with CUDA support + +### Software: +- CUDA Toolkit 13.0 or newer +- Python 3.10 or newer +- See `requirements.txt` for Python packages + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python parallelHistogram.py +``` + +## Expected Output + +``` +============================================================ +Parallel Histogram with Atomics (cuda.core) +============================================================ + +Device: +Compute Capability: ComputeCapability(major=X, minor=Y) + +Compiling CUDA kernels with cuda.core.Program... + Compiled for architecture: sm_XY + +Generating 10,000,000 random values on GPU... + +Verifying correctness... + Global atomics: PASSED + Privatized atomics: PASSED + +Benchmarking (100 iterations)... + Global atomics: X.XXX ms + Privatized atomics: X.XXX ms + Speedup: XXx + +Test PASSED +``` + +## Files + +- `parallelHistogram.py` - Main sample using cuda.core +- `README.md` - This file +- `requirements.txt` - Dependencies + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CUDA Atomic Functions](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions) +- [CUDA Shared Memory](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory) diff --git a/python/2_CoreConcepts/parallelHistogram/parallelHistogram.py b/python/2_CoreConcepts/parallelHistogram/parallelHistogram.py new file mode 100644 index 00000000..d613ef5b --- /dev/null +++ b/python/2_CoreConcepts/parallelHistogram/parallelHistogram.py @@ -0,0 +1,237 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Parallel Histogram with Atomics using cuda.core + +This sample demonstrates GPU histogram computation using atomic operations, +showcasing the modern cuda.core API for: +- Kernel compilation (Program, ProgramOptions) +- Kernel launch configuration (LaunchConfig) +- Stream management (Stream) +- Event timing (EventOptions) + +Two histogram approaches are compared: +1. Global Atomics - All threads atomically update global memory +2. Privatized Histograms - Shared memory reduces global atomic contention +""" + +import sys + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install: pip install -r requirements.txt") + sys.exit(1) + + +NUM_BINS = 256 + +# CUDA C source code for both histogram kernels +HISTOGRAM_KERNELS = r""" +// Global Atomics - simple but high contention on popular bins +extern "C" __global__ +void histogram_global(const unsigned char* data, unsigned int* histogram, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + for (int i = idx; i < n; i += stride) { + atomicAdd(&histogram[data[i]], 1); + } +} + +// Privatized - uses shared memory to reduce global atomic contention +extern "C" __global__ +void histogram_privatized(const unsigned char* data, unsigned int* histogram, int n) { + __shared__ unsigned int local_hist[256]; + + int tid = threadIdx.x; + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + // Initialize shared memory + for (int i = tid; i < 256; i += blockDim.x) + local_hist[i] = 0; + __syncthreads(); + + // Accumulate into shared memory (fast) + for (int i = idx; i < n; i += stride) + atomicAdd(&local_hist[data[i]], 1); + __syncthreads(); + + // Merge to global (fewer atomics) + for (int i = tid; i < 256; i += blockDim.x) + if (local_hist[i] > 0) + atomicAdd(&histogram[i], local_hist[i]); +} +""" + + +def main(): + print("=" * 60) + print("Parallel Histogram with Atomics (cuda.core)") + print("=" * 60) + + # Initialize device using cuda.core + device = Device(0) + device.set_current() + print(f"\nDevice: {device.name}") + print(f"Compute Capability: {device.compute_capability}") + + # Create stream using cuda.core + stream = device.create_stream() + + # Make CuPy use the same stream for correct ordering (avoids null-stream sync) + cp.cuda.Stream.from_external(stream).use() + + try: + _run_histogram(device, stream) + finally: + cp.cuda.Stream.null.use() # Restore CuPy to default stream before closing + stream.close() + + +def _run_histogram(device, stream): + """Run histogram computation and benchmarking.""" + # Compile CUDA kernels using cuda.core.Program + print("\nCompiling CUDA kernels with cuda.core.Program...") + arch = f"sm_{device.arch}" + options = ProgramOptions(arch=arch) + program = Program(HISTOGRAM_KERNELS, code_type="c++", options=options) + object_code = program.compile("cubin") + + kernel_global = object_code.get_kernel("histogram_global") + kernel_privatized = object_code.get_kernel("histogram_privatized") + print(f" Compiled for architecture: {arch}") + + # Generate test data directly on GPU (more efficient than CPU->GPU copy) + n = 10_000_000 + print(f"\nGenerating {n:,} random values on GPU...") + data_gpu = cp.random.randint(0, 256, size=n, dtype=cp.uint8) + hist_gpu = cp.zeros(NUM_BINS, dtype=cp.uint32) + + # Compute reference histogram on CPU for verification + data_cpu = cp.asnumpy(data_gpu) + hist_cpu, _ = np.histogram(data_cpu, bins=NUM_BINS, range=(0, 256)) + hist_cpu = hist_cpu.astype(np.uint32) + + # Configure kernel launch using cuda.core.LaunchConfig + block_size = 256 + grid_size = min((n + block_size - 1) // block_size, 1024) + config = LaunchConfig(grid=(grid_size,), block=(block_size,)) + + print("\nVerifying correctness...") + + # Ensure CuPy allocations complete before kernel launch on our stream + stream.sync() + + # Launch global atomics kernel (hist_gpu is already zeros from cp.zeros) + launch( + stream, config, kernel_global, data_gpu.data.ptr, hist_gpu.data.ptr, np.int32(n) + ) + stream.sync() + + hist_global = cp.asnumpy(hist_gpu) + global_ok = np.array_equal(hist_cpu, hist_global) + print(f" Global atomics: {'PASSED' if global_ok else 'FAILED'}") + + # Reset histogram and launch privatized kernel (fill on same stream) + hist_gpu.fill(0) + launch( + stream, + config, + kernel_privatized, + data_gpu.data.ptr, + hist_gpu.data.ptr, + np.int32(n), + ) + stream.sync() + + hist_privatized = cp.asnumpy(hist_gpu) + privatized_ok = np.array_equal(hist_cpu, hist_privatized) + print(f" Privatized atomics: {'PASSED' if privatized_ok else 'FAILED'}") + + if not (global_ok and privatized_ok): + sys.exit(1) + + # Benchmark using cuda.core Events (explicit Event objects recorded on stream) + print("\nBenchmarking (100 iterations)...") + num_iterations = 100 + event_opts = EventOptions(enable_timing=True) + start_event = device.create_event(options=event_opts) + end_event = device.create_event(options=event_opts) + + # Benchmark global atomics + stream.record(start_event) + for _ in range(num_iterations): + hist_gpu.fill(0) + launch( + stream, + config, + kernel_global, + data_gpu.data.ptr, + hist_gpu.data.ptr, + np.int32(n), + ) + stream.record(end_event) + end_event.sync() + time_global = (end_event - start_event) / num_iterations + + # Benchmark privatized + stream.record(start_event) + for _ in range(num_iterations): + hist_gpu.fill(0) + launch( + stream, + config, + kernel_privatized, + data_gpu.data.ptr, + hist_gpu.data.ptr, + np.int32(n), + ) + stream.record(end_event) + end_event.sync() + time_privatized = (end_event - start_event) / num_iterations + + print(f" Global atomics: {time_global:.3f} ms") + print(f" Privatized atomics: {time_privatized:.3f} ms") + print(f" Speedup: {time_global / time_privatized:.1f}x") + + print("\nTest PASSED") + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/parallelHistogram/requirements.txt b/python/2_CoreConcepts/parallelHistogram/requirements.txt new file mode 100644 index 00000000..da3c7a95 --- /dev/null +++ b/python/2_CoreConcepts/parallelHistogram/requirements.txt @@ -0,0 +1,7 @@ +# Parallel Histogram with Atomics Sample Requirements +# Requires Python 3.10+, CUDA Toolkit 13.0+ + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +numpy>=2.3.2 +cupy-cuda13x>=13.0.0 diff --git a/python/2_CoreConcepts/parallelReduction/README.md b/python/2_CoreConcepts/parallelReduction/README.md new file mode 100644 index 00000000..d25712d3 --- /dev/null +++ b/python/2_CoreConcepts/parallelReduction/README.md @@ -0,0 +1,119 @@ +# Sample: Parallel Reduction (Python) + +## Description + +Efficiently sum a large array on GPU using parallel reduction. This sample demonstrates: +1. **Custom CUDA kernel** showing reduction tree pattern and synchronization +2. **cuda.compute.reduce_into()** for production-ready reduction + +## What You'll Learn + +- **Reduction tree pattern**: Divide-and-conquer parallel algorithm +- **Thread synchronization**: Using `__syncthreads()` for coordination +- **Avoiding warp divergence**: Sequential thread IDs vs strided IDs + +## Key Concepts + +### Reduction Tree Pattern + +Parallel reduction uses a tree-based approach where each iteration halves active elements: + +``` +Initial: [a0, a1, a2, a3, a4, a5, a6, a7] +Step 1: [a0+a4, a1+a5, a2+a6, a3+a7] threads 0-3 active +Step 2: [a0+a2+a4+a6, a1+a3+a5+a7] threads 0-1 active +Step 3: [sum of all] thread 0 only +``` + +This requires only `log2(N)` steps to reduce N elements. + +### Avoiding Warp Divergence + +```c +// Good: Sequential thread IDs (warps stay coherent) +if (tid < s) { + sdata[tid] += sdata[tid + s]; +} + +// Bad: Strided IDs (causes warp divergence) +if (tid % (2 * s) == 0) { // Don't do this! + sdata[tid] += sdata[tid + s]; +} +``` + +## Requirements + +### Hardware + +- NVIDIA GPU with CUDA support + +### Software + +- CUDA Toolkit 13.0+ +- Python 3.10+ +- `cuda-python` (13.0.0+) +- `cuda-core` (>=0.6.0) +- `cuda-cccl` (1.0.0+) +- `cupy` (13.0.0+) +- `numpy` (>=2.3.2) + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python parallelReduction.py +``` + +## Expected Output + +``` +====================================================================== +Parallel Reduction - Efficient GPU Array Summation +====================================================================== + +Device: +Compute Capability: + +Array size: 1,048,576 elements (4.2 MB) +Expected sum: + +Compiling custom CUDA kernel... + +====================================================================== +PART 1: Custom Kernel (Educational) +====================================================================== + +Reduction tree kernel: +Expected: +Error: +Time: ms + +====================================================================== +PART 2: cuda.compute.reduce_into() (Production) +====================================================================== + +cuda.compute result: +Expected: +Error: +Time: ms + +Test PASSED! +``` + +Note: Exact values vary due to random input data. `cuda.compute.reduce_into()` is typically faster than the custom kernel because it calls CUB's `DeviceReduce`, which uses highly tuned, architecture‑specific kernels and optimized memory access patterns. + +## Files + +- `parallelReduction.py` - Custom kernel + cuda.compute comparison +- `README.md` - This documentation +- `requirements.txt` - Python dependencies + +## See Also + +- [Mark Harris - Optimizing Parallel Reduction in CUDA](https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf) +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) diff --git a/python/2_CoreConcepts/parallelReduction/parallelReduction.py b/python/2_CoreConcepts/parallelReduction/parallelReduction.py new file mode 100644 index 00000000..9f853034 --- /dev/null +++ b/python/2_CoreConcepts/parallelReduction/parallelReduction.py @@ -0,0 +1,375 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Parallel Reduction using cuda.core and cuda.compute + +Demonstrates efficient parallel summation of large arrays on GPU: +1. Custom CUDA kernel showing reduction tree pattern and synchronization +2. cuda.compute.reduce_into() for production-ready reduction + +Key Concepts: +- Reduction tree pattern: Divide-and-conquer parallel algorithm +- Thread synchronization: Using __syncthreads() for coordination +- Sequential thread IDs: How to avoid warp divergence +- cuda.core Stream integration with CuPy via ExternalStream +""" + +import math +import sys +from pathlib import Path + +# Add Utilities to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.compute import OpKind, reduce_into + from cuda.core import ( + Device, + Kernel, + LaunchConfig, + Program, + ProgramOptions, + Stream, + launch, + ) + from cuda_samples_utils import print_gpu_info, verify_array_result +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +# ============================================================================= +# CUDA Kernel: Parallel Reduction (optimized - no warp divergence) +# ============================================================================= +REDUCTION_KERNEL: str = r""" +extern "C" __global__ +void reduce_sum(const float* __restrict__ input, + float* __restrict__ output, int n) { + /* + * Parallel reduction using grid-stride loop (canonical pattern) and + * sequential thread IDs for the reduction tree (avoids warp divergence). + * + * Grid-stride loop: each thread processes multiple elements + * for (i = tid; i < n; i += gridDim.x * blockDim.x) + * + * Reduction tree: sequential addressing keeps warps coherent. + */ + extern __shared__ float sdata[]; + + unsigned int tid = threadIdx.x; + unsigned int grid_stride = (unsigned int)gridDim.x * blockDim.x; + + float sum = 0.0f; + for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; + i += grid_stride) { + sum += input[i]; + } + sdata[tid] = sum; + __syncthreads(); + + // Reduction in shared memory (sequential addressing - no divergence) + for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata[tid] += sdata[tid + s]; + } + __syncthreads(); // Wait for all threads before next iteration + } + + // Thread 0 writes block result + if (tid == 0) { + output[blockIdx.x] = sdata[0]; + } +} +""" + + +def compile_kernel(device: Device) -> Kernel: + """Compile the reduction kernel for the given device.""" + arch = f"sm_{device.arch}" + options = ProgramOptions(arch=arch) + program = Program(REDUCTION_KERNEL, code_type="c++", options=options) + return program.compile(target_type="cubin").get_kernel("reduce_sum") + + +def reduction_stage_output_counts(n: int, block_size: int) -> list[int]: + """Lengths of intermediate arrays for each multi-launch reduction stage.""" + counts: list[int] = [] + while n > 1: + num_blocks = math.ceil(n / block_size) + counts.append(num_blocks) + n = num_blocks + return counts + + +def reduce_custom( + stream: Stream, + kernel: Kernel, + d_input: cp.ndarray, + block_size: int = 256, + sync: bool = True, + work_buffers: list[cp.ndarray] | None = None, +) -> float | cp.ndarray: + """ + Perform parallel reduction using custom CUDA kernel. + + Uses multiple kernel launches to reduce array to single value. + Each launch reduces by factor of block_size. + + When sync=True (default), syncs and returns the scalar result. + When sync=False, returns the 1-element array without syncing; + caller must sync before reading (avoids host overhead in benchmarks). + + work_buffers: optional list of device arrays, one per stage, with length + at least each stage's output count (from ``reduction_stage_output_counts``). + When provided, avoids per-call allocation (e.g. for benchmarking). + """ + n = len(d_input) + current = d_input + stage = 0 + + if work_buffers is not None: + expected_counts = reduction_stage_output_counts(n, block_size) + if len(work_buffers) != len(expected_counts): + msg = ( + f"work_buffers length {len(work_buffers)} != " + f"{len(expected_counts)} stages" + ) + raise ValueError(msg) + + while n > 1: + num_blocks = math.ceil(n / block_size) + if work_buffers is not None: + d_output = work_buffers[stage] + if d_output.size < num_blocks: + msg = f"work_buffers[{stage}] size {d_output.size} < {num_blocks}" + raise ValueError(msg) + if d_output.size != num_blocks: + d_output = d_output[:num_blocks] + else: + d_output = cp.empty(num_blocks, dtype=cp.float32) + + config = LaunchConfig( + grid=(num_blocks, 1, 1), + block=(block_size, 1, 1), + shmem_size=block_size * 4, # float = 4 bytes + ) + + launch( + stream, + config, + kernel, + current.data.ptr, + d_output.data.ptr, + np.int32(n), + ) + + current = d_output + n = num_blocks + stage += 1 + + if sync: + stream.sync() + return float(current[0]) + return current + + +def benchmark_custom( + stream: Stream, + kernel: Kernel, + d_input: cp.ndarray, + num_runs: int = 10, + block_size: int = 256, +) -> tuple[float, float]: + """Benchmark custom reduction kernel using cuda.core events.""" + stage_counts = reduction_stage_output_counts(len(d_input), block_size) + work_buffers = [cp.empty(c, dtype=cp.float32) for c in stage_counts] + + # Warmup run (with sync to get valid result) + _ = reduce_custom( + stream, kernel, d_input, block_size=block_size, work_buffers=work_buffers + ) + + event_opts = {"enable_timing": True} + start_event = stream.device.create_event(options=event_opts) + end_event = stream.device.create_event(options=event_opts) + + times: list[float] = [] + result = 0.0 + + for _ in range(num_runs): + stream.record(start_event) + d_result = reduce_custom( + stream, + kernel, + d_input, + block_size=block_size, + sync=False, + work_buffers=work_buffers, + ) + stream.record(end_event) + end_event.sync() + result = float(d_result[0]) + + times.append(end_event - start_event) + + return result, float(np.mean(times)) + + +def benchmark_cuda_compute( + stream: Stream, + d_input: cp.ndarray, + num_runs: int = 10, +) -> tuple[float, float]: + """Benchmark cuda.compute.reduce_into() using cuda.core events.""" + h_init = np.array([0.0], dtype=np.float32) + + # Warmup (includes JIT compilation) + d_warmup = cp.empty(1, dtype=cp.float32) + reduce_into( + d_in=d_input, + d_out=d_warmup, + op=OpKind.PLUS, + num_items=len(d_input), + h_init=h_init, + stream=stream, + ) + stream.sync() + + d_output = cp.empty(1, dtype=cp.float32) + event_opts = {"enable_timing": True} + start_event = stream.device.create_event(options=event_opts) + end_event = stream.device.create_event(options=event_opts) + + times: list[float] = [] + result = 0.0 + + for _ in range(num_runs): + stream.record(start_event) + reduce_into( + d_in=d_input, + d_out=d_output, + op=OpKind.PLUS, + num_items=len(d_input), + h_init=h_init, + stream=stream, + ) + stream.record(end_event) + end_event.sync() + + result = float(d_output[0]) + times.append(end_event - start_event) + + return result, float(np.mean(times)) + + +def main() -> bool: + """Main function demonstrating parallel reduction.""" + print("=" * 70) + print("Parallel Reduction - Efficient GPU Array Summation") + print("=" * 70) + + device = Device(0) + device.set_current() + stream = device.create_stream() + cp_stream = cp.cuda.ExternalStream(int(stream.handle)) + + print() + print_gpu_info(device) + + array_size = 1 << 20 # 1M elements + h_input = np.random.rand(array_size).astype(np.float32) + expected_sum = float(np.sum(h_input)) + + print(f"\nArray size: {array_size:,} elements ({array_size * 4 / 1e6:.1f} MB)") + print(f"Expected sum: {expected_sum:.6f}") + + print("\nCompiling custom CUDA kernel...") + kernel = compile_kernel(device) + + try: + with cp_stream: + d_input = cp.asarray(h_input) + + # ====================================================================== + # Part 1: Custom Kernel + # ====================================================================== + print("\n" + "=" * 70) + print("PART 1: Custom Kernel (Educational)") + print("=" * 70) + + result, time_ms = benchmark_custom(stream, kernel, d_input) + + print(f"\nReduction tree kernel: {result:>14.2f}") + print(f"Expected: {expected_sum:>14.2f}") + print(f"Time: {time_ms:>14.3f} ms") + + # ====================================================================== + # Part 2: cuda.compute (Production) + # ====================================================================== + print("\n" + "=" * 70) + print("PART 2: cuda.compute.reduce_into() (Production)") + print("=" * 70) + + result_cc, time_cc = benchmark_cuda_compute(stream, d_input) + + print(f"\ncuda.compute result: {result_cc:>14.2f}") + print(f"Expected: {expected_sum:>14.2f}") + print(f"Time: {time_cc:>14.3f} ms") + + # Verify both results using principled rtol/atol + with cp_stream: + d_expected = cp.array([expected_sum], dtype=cp.float32) + custom_ok = verify_array_result( + cp.array([result], dtype=cp.float32), + d_expected, + rtol=1e-5, + atol=1e-8, + verbose=False, + ) + compute_ok = verify_array_result( + cp.array([result_cc], dtype=cp.float32), + d_expected, + rtol=1e-5, + atol=1e-8, + verbose=False, + ) + if custom_ok and compute_ok: + print("\nTest PASSED!") + return True + else: + print("\nTest FAILED - Error too large!") + return False + finally: + stream.close() + + +if __name__ == "__main__": + sys.exit(0 if main() else 1) diff --git a/python/2_CoreConcepts/parallelReduction/requirements.txt b/python/2_CoreConcepts/parallelReduction/requirements.txt new file mode 100644 index 00000000..d0b1eff5 --- /dev/null +++ b/python/2_CoreConcepts/parallelReduction/requirements.txt @@ -0,0 +1,7 @@ +# Parallel Reduction Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cuda-cccl>=1.0.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/prefixSum/README.md b/python/2_CoreConcepts/prefixSum/README.md new file mode 100644 index 00000000..d60ba56e --- /dev/null +++ b/python/2_CoreConcepts/prefixSum/README.md @@ -0,0 +1,83 @@ +# Prefix Sum (Scan) + +Demonstrates parallel prefix sum (scan) algorithms using cuda.compute with cuda.core stream management. + +## Overview + +- Inclusive scan: `output[i] = [init_value] + input[0] + input[1] + ... + input[i]` +- Exclusive scan: `output[i] = init_value + input[0] + input[1] + ... + input[i-1]` +- Uses cuda.compute APIs for optimized CUB-based implementations +- Uses cuda.core APIs for device and stream management +- Demonstrates CuPy integration via `ExternalStream` + +## Requirements + +### Hardware + +- NVIDIA GPU with CUDA support + +### Software + +- CUDA Toolkit 13.0+ +- Python 3.10+ +- `cuda-python` (13.0.0+) +- `cuda-core` (>=0.6.0) +- `cuda-cccl` (1.0.0+) +- `cupy-cuda13x` (13.0.0+) +- `numpy` (>=2.3.2) + +## Usage + +```bash +# Create and activate virtual environment +python -m venv venv +source venv/bin/activate # Linux/macOS +# venv\Scripts\activate # Windows + +# Install dependencies +pip install -r requirements.txt + +# Run sample +python prefixSum.py +``` + +## Key Concepts + +| Scan Type | Formula | First Element | +|-----------|---------|---------------| +| Inclusive | `output[i] = [init_value] + Σ input[0..i]` | `[init_value] + input[0]` | +| Exclusive | `output[i] = init_value + Σ input[0..i-1]` | `init_value` (typically `0`, the identity for sum) | + +### Stream Management + +This sample demonstrates proper stream usage across libraries: + +```python +# Create stream with cuda.core +stream = device.create_stream() + +# Wrap for CuPy compatibility (requires int handle) +cp_stream = cp.cuda.ExternalStream(int(stream.handle)) + +# Use with CuPy operations +with cp_stream: + d_input = cp.asarray(data) + d_output = cp.empty_like(d_input) + +# Pass to cuda.compute +inclusive_scan( + d_in=d_input, + d_out=d_output, + op=OpKind.PLUS, + init_value=None, + num_items=len(d_input), + stream=stream, +) +``` + +## Applications + +- Stream compaction +- Radix sort +- Histogram computation +- Polynomial evaluation diff --git a/python/2_CoreConcepts/prefixSum/prefixSum.py b/python/2_CoreConcepts/prefixSum/prefixSum.py new file mode 100644 index 00000000..8ca413e8 --- /dev/null +++ b/python/2_CoreConcepts/prefixSum/prefixSum.py @@ -0,0 +1,199 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Prefix Sum (Scan) + +Demonstrates parallel prefix sum algorithms using cuda.compute: +- Inclusive scan: output[i] = [init_value] + input[0] + ... + input[i] +- Exclusive scan: output[i] = init_value + input[0] + ... + input[i-1] + +Uses cuda.compute APIs for optimized CUB-based scan operations. +Uses cuda.core APIs for device and stream management. +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.compute import OpKind, exclusive_scan, inclusive_scan + from cuda.core import Device, EventOptions + from cuda_samples_utils import print_gpu_info, verify_array_result +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +def main() -> bool: + """Run prefix sum sample. Returns True if all tests passed.""" + print("=" * 60) + print("Prefix Sum (Scan) - Using cuda.compute") + print("=" * 60) + + device = Device(0) + device.set_current() + stream = device.create_stream() + cp_stream = cp.cuda.ExternalStream(int(stream.handle)) + + ok = True + try: + print() + print_gpu_info(device) + + h_input = np.array([3, 1, 4, 1, 5, 9, 2, 6], dtype=np.int32) + init_value = np.array([0], dtype=np.int32) + + # ========================================================================= + # Inclusive Scan + # ========================================================================= + print("\n" + "-" * 60) + print("INCLUSIVE SCAN") + print("-" * 60) + print( + "Formula: output[i] = [init_value] + input[0] + input[1] + ... + input[i]" + ) + + with cp_stream: + d_input = cp.asarray(h_input) + d_output = cp.empty_like(d_input) + + print(f"\nInput: {h_input.tolist()}") + + inclusive_scan( + d_in=d_input, + d_out=d_output, + op=OpKind.PLUS, + init_value=None, + num_items=len(h_input), + stream=stream, + ) + stream.sync() + print(f"Output: {cp.asnumpy(d_output).tolist()}") + + with cp_stream: + expected = cp.asarray(np.cumsum(h_input)) + ok &= verify_array_result(d_output, expected, rtol=0, atol=0) + + # ========================================================================= + # Exclusive Scan + # ========================================================================= + print("\n" + "-" * 60) + print("EXCLUSIVE SCAN") + print("-" * 60) + print("Formula: output[i] = init_value + input[0] + ... + input[i-1]") + + with cp_stream: + d_output = cp.empty_like(d_input) + + print(f"\nInput: {h_input.tolist()}") + + exclusive_scan( + d_in=d_input, + d_out=d_output, + op=OpKind.PLUS, + init_value=init_value, + num_items=len(h_input), + stream=stream, + ) + stream.sync() + print(f"Output: {cp.asnumpy(d_output).tolist()}") + + with cp_stream: + expected = cp.asarray(np.concatenate([init_value, np.cumsum(h_input)[:-1]])) + ok &= verify_array_result(d_output, expected, rtol=0, atol=0) + + # ========================================================================= + # Large Array Performance + # ========================================================================= + print("\n" + "-" * 60) + print("PERFORMANCE (10M elements)") + print("-" * 60) + + N = 10_000_000 + with cp_stream: + d_large_in = cp.ones(N, dtype=np.int32) + d_large_out = cp.empty_like(d_large_in) + + inclusive_scan( + d_in=d_large_in, + d_out=d_large_out, + op=OpKind.PLUS, + init_value=None, + num_items=N, + stream=stream, + ) + stream.sync() + + event_opts = EventOptions(enable_timing=True) + start_event = device.create_event(options=event_opts) + end_event = device.create_event(options=event_opts) + + num_iterations = 10 + stream.record(start_event) + for _ in range(num_iterations): + inclusive_scan( + d_in=d_large_in, + d_out=d_large_out, + op=OpKind.PLUS, + init_value=None, + num_items=N, + stream=stream, + ) + stream.record(end_event) + end_event.sync() + elapsed_ms = (end_event - start_event) / num_iterations + + print(f"Inclusive scan: {elapsed_ms:.3f} ms") + print(f"Throughput: {N / elapsed_ms / 1e6:.1f} M elements/ms") + + # ========================================================================= + # Summary + # ========================================================================= + print("\n" + "=" * 60) + print("KEY CONCEPTS") + print("=" * 60) + print("• Inclusive: output[i] includes input[i]") + print("• Exclusive: output[i] excludes input[i], starts with init_value") + print("• cuda.compute provides CUB-based optimized implementations") + print("• cuda.core Stream integrates with CuPy via ExternalStream") + print("• Applications: stream compaction, radix sort, histograms") + print("=" * 60) + return ok + finally: + cp.cuda.Stream.null.use() + stream.close() + + +if __name__ == "__main__": + success = main() + if not success: + sys.exit(1) diff --git a/python/2_CoreConcepts/prefixSum/requirements.txt b/python/2_CoreConcepts/prefixSum/requirements.txt new file mode 100644 index 00000000..2bb41bf7 --- /dev/null +++ b/python/2_CoreConcepts/prefixSum/requirements.txt @@ -0,0 +1,8 @@ +# Prefix Sum Sample Requirements +# Requires Python 3.10+, CUDA Toolkit 13.0+ + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cuda-cccl>=1.0.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/processCheckpoint/README.md b/python/2_CoreConcepts/processCheckpoint/README.md new file mode 100644 index 00000000..74c4f2c3 --- /dev/null +++ b/python/2_CoreConcepts/processCheckpoint/README.md @@ -0,0 +1,206 @@ +# processCheckpoint (Python) + +## Description + +This sample demonstrates how to use the **CUDA process checkpoint API** +via `cuda.core.checkpoint.Process` to suspend, capture, and restore the +CUDA state of a running Linux process. + +CUDA process checkpointing is the driver-level primitive that powers +CRIU + `cuda-checkpoint` integration. + +The sample: + +1. Allocates a GPU buffer and fills it with a deterministic pattern + via a small kernel. +2. Reads the buffer back to host and computes a SHA-256 hash. +3. Runs the full checkpoint lifecycle on its own process: + `lock → checkpoint → restore → unlock`. +4. Reads the buffer back again and verifies that the hash is + unchanged, proving that GPU memory contents survived the round + trip. + +The sample prints the CUDA process state after each step so the +full state machine is visible: + +``` + lock() checkpoint() restore() unlock() +running ---------> locked ------------> checkpointed -----------> locked ---------> running +``` + +## What You'll Learn + +- Creating a `cuda.core.checkpoint.Process` for the current process + by PID and observing its `.state` transitions. +- Running the full `lock → checkpoint → restore → unlock` cycle with + a lock timeout. +- The fact that `restore()` leaves the process in the `locked` state; + you must still call `unlock()` to return to `running`. +- Verifying that GPU memory is preserved across the checkpoint + round-trip by comparing SHA-256 hashes of the buffer before and + after. +- The rough cost of each step (checkpoint and restore dominate and + scale with the device-memory footprint being captured). + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) + - device management, memory allocation, kernel compilation and + launch, and the `checkpoint.Process` wrapper. +- [`cuda.bindings`](https://nvidia.github.io/cuda-python/cuda-bindings/latest/) + - used directly for a pageable `cuMemcpyDtoH`. + +## Key APIs + +### From `cuda.core.checkpoint` + +- `checkpoint.Process(pid)` - create a handle to a CUDA process by + PID. Accepts `os.getpid()` for the self-checkpoint case shown + here. +- `Process.state` - one of `"running"`, `"locked"`, `"checkpointed"`, + or `"failed"`. +- `Process.lock(timeout_ms=…)` - block further CUDA API calls on the + process; completes already-submitted work. Always pass a non-zero + timeout to avoid deadlocks. +- `Process.checkpoint()` - copy device memory to host-side driver + allocations and release GPU resources. Process state becomes + `checkpointed`. +- `Process.restore(gpu_mapping=None)` - re-acquire GPU resources and + copy memory back to device. Leaves the process in the `locked` + state. +- `Process.unlock()` - return the process to `running`. +- `Process.restore_thread_id` - thread ID that `restore()` must be + called from in the target process (not used in the self-checkpoint + case here). + +### From `cuda.core` + +- `Device.set_current()` / `Device.memory_resource.allocate(...)` / + `Stream`, `LaunchConfig`, `Program`, `launch` - standard device, + compile, and launch primitives used to produce the buffer + contents. + +### From `cuda.bindings.driver` + +- `cuMemcpyDtoH(host_ptr, device_handle, nbytes)` - synchronous D2H + copy into a pageable host buffer. + +## Requirements + +### Hardware + +- Any NVIDIA GPU supported by CUDA process checkpointing. CUDA + checkpointing is currently limited to x86-64 Linux. + +### Software + +- Linux (the CUDA checkpoint API is Linux-only). +- NVIDIA driver with CUDA process checkpoint support. +- CUDA Toolkit 13.0 or newer. +- Python 3.10 or newer. +- `cuda-core >= 1.0.0`. + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/processCheckpoint +pip install -r requirements.txt +``` + +## How to Run + +### Basic usage + +```bash +python processCheckpoint.py +``` + +### Larger GPU footprint to see checkpoint time scale + +```bash +python processCheckpoint.py --buffer-mib 512 +``` + +### Use a specific GPU + +```bash +python processCheckpoint.py --device 1 +``` + +### All options + +``` +--device CUDA device ID (default: 0) +--buffer-mib GPU buffer size in MiB (default: 16) +--lock-timeout-ms Timeout passed to Process.lock in ms (default: 5000) +``` + +## Expected Output + +On an RTX 4090 with a 16 MiB buffer: + +``` +[Process Checkpoint Sample using CUDA Core API] +PID: 748330 +Device: NVIDIA GeForce RTX 4090 +Compute Capability: sm_89 +Buffer size: 16 MiB +Lock timeout: 5000 ms + +Compiling kernel ... +Writing deterministic pattern to GPU buffer ... +Buffer hash (before): b045f7975dc23352 + +Running checkpoint lifecycle on self ... + +step duration (ms) state after +-------------------------------------------------- +initial - running +lock 0.578 locked +checkpoint 268.369 checkpointed +restore 235.024 locked +unlock 1.648 running +-------------------------------------------------- +total 505.618 + +Buffer hash (before): b045f7975dc23352 +Buffer hash (after): b045f7975dc23352 + +PASS: GPU buffer contents survived checkpoint/restore. + +Done +``` + +**What to look for:** + +- The **four state transitions** are all observable: `running → +locked → checkpointed → locked → running`. Note that `restore()` + leaves the process in `locked`, not `running`. +- The **checkpoint and restore steps dominate** the wall-clock time + (hundreds of ms even for a small buffer) - they copy GPU memory to + and from driver-managed host allocations. Increasing + `--buffer-mib` visibly increases the checkpoint time. +- The `lock` and `unlock` steps are essentially free (sub-ms) - they + just flip the process state. +- The SHA-256 **hashes before and after match**, proving the GPU + memory contents survived the round trip. + +Exact timings vary with GPU model, driver version, system load, and +the size of the device memory footprint being captured. + +## Files + +- `processCheckpoint.py` - Python implementation using `cuda.core.checkpoint` +- `README.md` - This file +- `requirements.txt` - Sample dependencies + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`NVIDIA/cuda-checkpoint`](https://github.com/NVIDIA/cuda-checkpoint) + - the CUDA checkpoint/restore utility, the CRIU plugin, and C + reference programs (`r570-features.c`, `r580-migration-api.c`). +- [Checkpointing CUDA Applications with CRIU](https://developer.nvidia.com/blog/checkpointing-cuda-applications-with-criu/) + - NVIDIA technical blog post on the broader CRIU workflow. diff --git a/python/2_CoreConcepts/processCheckpoint/processCheckpoint.py b/python/2_CoreConcepts/processCheckpoint/processCheckpoint.py new file mode 100644 index 00000000..4115bda9 --- /dev/null +++ b/python/2_CoreConcepts/processCheckpoint/processCheckpoint.py @@ -0,0 +1,248 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS `AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Process Checkpointing Sample using CUDA Core API. + +The sample allocates a GPU buffer, fills it with a deterministic +pattern via a kernel, hashes the contents, runs the full +lock/checkpoint/restore/unlock cycle on its own PID, and re-hashes +the buffer afterwards to verify that the GPU memory contents +survived the round trip. +""" + +import argparse +import hashlib +import os +import sys +import time +from dataclasses import dataclass +from typing import List + +import numpy as np +from cuda.bindings import driver as cudrv +from cuda.core import ( + Device, + LaunchConfig, + Program, + ProgramOptions, + checkpoint, + launch, +) + +# Small fill kernel: deterministic, non-trivial pattern so the before/after +# hashes would disagree on any bit flip. +KERNEL_SRC = r""" +extern "C" __global__ void fill_pattern(float *out, unsigned long long n) +{ + unsigned long long i = (unsigned long long)blockIdx.x * blockDim.x + threadIdx.x; + + if (i < n) { + float v = (float)(i & 0xFFFFu) * 1e-3f + 1.0f; + float u = (float)((i >> 16) & 0xFFFFu) * 1e-4f + 0.5f; + // A handful of dependent ops per element. Deterministic given i. + for (int k = 0; k < 8; ++k) { + v = v * 1.000001f + u; + u = u * 0.999999f + v * 1e-6f; + } + out[i] = v + u; + } +} +""" + + +@dataclass +class StepTiming: + label: str + duration_ms: float + state_after: str + + +def _cu_check(result) -> None: + err = result[0] + if int(err) != 0: + raise RuntimeError(f"CUDA driver call failed: {err}") + + +def compile_fill_kernel(device: Device): + options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + program = Program(KERNEL_SRC, code_type="c++", options=options) + module = program.compile("cubin", name_expressions=("fill_pattern",)) + return module.get_kernel("fill_pattern") + + +def hash_device_buffer(device_buffer, host: np.ndarray) -> str: + _cu_check( + cudrv.cuMemcpyDtoH( + host.ctypes.data, + device_buffer.handle, + host.nbytes, + ) + ) + return hashlib.sha256(host.tobytes()).hexdigest()[:16] + + +def _time_call(fn, *args, **kwargs) -> float: + t0 = time.monotonic() + fn(*args, **kwargs) + return (time.monotonic() - t0) * 1000.0 + + +def run_lifecycle(proc: checkpoint.Process, lock_timeout_ms: int) -> List[StepTiming]: + """ + Drive the full `lock -> checkpoint -> restore -> unlock` cycle on + `proc` and return per-step timings with the state observed after + each step. + + Note on state after `restore()`: the driver leaves the process in + the `locked` state. You must still call `unlock()` to return to + `running`. + """ + timings: List[StepTiming] = [StepTiming("initial", 0.0, proc.state)] + + ms = _time_call(proc.lock, timeout_ms=lock_timeout_ms) + timings.append(StepTiming("lock", ms, proc.state)) + + ms = _time_call(proc.checkpoint) + timings.append(StepTiming("checkpoint", ms, proc.state)) + + ms = _time_call(proc.restore) + timings.append(StepTiming("restore", ms, proc.state)) + + ms = _time_call(proc.unlock) + timings.append(StepTiming("unlock", ms, proc.state)) + + return timings + + +def print_timings(timings: List[StepTiming]) -> None: + print() + header = f"{'step':<14}{'duration (ms)':>18}{'state after':>18}" + print(header) + print("-" * len(header)) + total = 0.0 + for t in timings: + if t.label == "initial": + dur = "-" + else: + dur = f"{t.duration_ms:.3f}" + total += t.duration_ms + print(f"{t.label:<14}{dur:>18}{t.state_after:>18}") + print("-" * len(header)) + print(f"{'total':<14}{total:>18.3f}{'':>18}") + + +def main(): + parser = argparse.ArgumentParser( + description="CUDA process checkpoint sample using cuda.core", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--device", type=int, default=0, help="CUDA device ID (default: 0)" + ) + parser.add_argument( + "--buffer-mib", + type=int, + default=16, + help="GPU buffer size in MiB (default: 16)", + ) + parser.add_argument( + "--lock-timeout-ms", + type=int, + default=5000, + help="Timeout passed to Process.lock in ms (default: 5000)", + ) + args = parser.parse_args() + + if sys.platform != "linux": + print("Error: CUDA process checkpointing is Linux-only.") + return 1 + + if args.buffer_mib <= 0: + print("Error: --buffer-mib must be positive") + return 1 + + print("[Process Checkpoint Sample using CUDA Core API]") + print(f"PID: {os.getpid()}") + + device = Device(args.device) + device.set_current() + print(f"Device: {device.name}") + print(f"Compute Capability: sm_{device.arch}") + print(f"Buffer size: {args.buffer_mib} MiB") + print(f"Lock timeout: {args.lock_timeout_ms} ms") + + print() + print("Compiling kernel ...") + fill_kernel = compile_fill_kernel(device) + + buffer_bytes = args.buffer_mib * 1024 * 1024 + n_elements = buffer_bytes // 4 # float32 + + stream = device.create_stream() + device_buffer = device.memory_resource.allocate(buffer_bytes, stream=stream) + try: + print("Writing deterministic pattern to GPU buffer ...") + block = 256 + grid = (n_elements + block - 1) // block + cfg = LaunchConfig(grid=grid, block=block) + launch(stream, cfg, fill_kernel, device_buffer, np.uint64(n_elements)) + stream.sync() + + host = np.empty(n_elements, dtype=np.float32) + + hash_before = hash_device_buffer(device_buffer, host) + print(f"Buffer hash (before): {hash_before}") + + print() + print("Running checkpoint lifecycle on self ...") + proc = checkpoint.Process(os.getpid()) + timings = run_lifecycle(proc, args.lock_timeout_ms) + print_timings(timings) + + hash_after = hash_device_buffer(device_buffer, host) + + print() + print(f"Buffer hash (before): {hash_before}") + print(f"Buffer hash (after): {hash_after}") + + if hash_before != hash_after: + print() + print("FAIL: GPU buffer contents changed across checkpoint/restore.") + return 1 + + print() + print("PASS: GPU buffer contents survived checkpoint/restore.") + finally: + device_buffer.close(stream) + + print() + print("Done") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/2_CoreConcepts/processCheckpoint/requirements.txt b/python/2_CoreConcepts/processCheckpoint/requirements.txt new file mode 100644 index 00000000..c79eb06c --- /dev/null +++ b/python/2_CoreConcepts/processCheckpoint/requirements.txt @@ -0,0 +1,3 @@ +cuda-python>=13.0.0 +cuda-core>=0.7.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/reduction/README.md b/python/2_CoreConcepts/reduction/README.md new file mode 100644 index 00000000..a70f767d --- /dev/null +++ b/python/2_CoreConcepts/reduction/README.md @@ -0,0 +1,137 @@ +# Sample: Fast Array Sum using Shared Memory (Python) + +## Description + +Two-stage parallel reduction: each GPU block sums its chunk in **shared memory** (tree reduction, two elements per thread), writes one partial sum per block; the host combines partial sums for the final result. + +**Stack:** `cuda-core` for `Device`, stream, events, `Program` / `launch()`. **CuPy** allocates device memory and copies; `launch()` takes device pointers as `ndarray.data.ptr` (Python `int`). Copies run on the same CUDA stream as the kernel via `cp.cuda.Stream.from_external(stream)` (cuda.core `Stream` implements the CUDA stream protocol) and `with cp_stream:`. + +## What you will learn + +- Shared-memory block reduction and sequential-addressing tree reduction +- `LaunchConfig` with dynamic shared memory and `launch()` with pointer arguments +- Aligning CuPy transfers with a `cuda.core` stream (`Stream.from_external`) +- GPU timing with `EventOptions` / `device.create_event()` + +## Key libraries + +| Library | Role | +|------------|------| +| `cuda-core`| Device, stream, events, compile, launch | +| `cupy` | `cp.empty`, `cp.asarray`, `cp.asnumpy`, `Stream.from_external` | +| `numpy` | Host data and CPU reference sum | + +## Key APIs (quick reference) + +- **cuda.core:** `Device`, `create_stream`, `Program` / `ProgramOptions`, `LaunchConfig`, `launch`, `EventOptions`, `create_event` +- **CuPy:** `cp.empty`, `cp.asarray`, `cp.cuda.Stream.from_external(stream)`, `with cp_stream:`, `cp.asnumpy` + +## Requirements + +- NVIDIA GPU, CUDA-capable driver; **CUDA Toolkit 13+** (for toolchain alignment with `cuda-core`) +- **Python 3.10+** + +```bash +pip install -r requirements.txt +``` + +## How to run + +```bash +python reduction.py +``` + +Defaults: 2²⁴ elements, 256 threads/block, `float`, 100 benchmark iterations. + +**Change data type** (selects `blockReduceKernel_int` / `_float` / `_double`): + +```bash +python reduction.py --type float # default; 32-bit float +python reduction.py --type double # 64-bit float +python reduction.py --type int # 32-bit integer (exact equality check) +``` + +Combine with other flags as needed, e.g. `python reduction.py --type int --n 1048576`. + +Other main flags: `--n`, `--threads`, `--iterations`. Full list: `python reduction.py --help`. + +## Output + +Example run (`python reduction.py`, defaults) on **Tesla T10**, compute capability **7.5**: + +``` +====================================================================== +Fast Array Sum using Shared Memory - Two-Stage Reduction +====================================================================== + +Demonstrates: Efficient parallel reduction using shared memory + +Device Information: + Name: Tesla T10 + Compute Capability: sm_7.5 + +Configuration: + Array size: 16,777,216 elements + Data type: float + Memory: 64.00 MB + Threads per block: 256 + +Two-Stage Reduction Strategy: + Stage 1: GPU block reduction + - Number of blocks: 32768 + - Elements per block: 512 + - Output: 32768 partial sums + Stage 2: CPU final reduction + - Combine 32768 partial sums → 1 final result + +Compiling CUDA kernel... + Kernel 'blockReduceKernel_float' compiled successfully + +> Generating random input data... +> Computing reference result on CPU... + CPU time: 2.428208 seconds + +> Warming up GPU... + Warm-up completed + +> Benchmarking Stage 1 (GPU block reduction)... + Running 100 iterations... + +> Running Stage 2 (CPU final reduction)... + +====================================================================== +Performance Results +====================================================================== + +Stage 1 (GPU block reduction): + Average time: 0.338404 ms + Throughput: 198.31 GB/s + +Stage 2 (CPU final reduction): + Time: 0.078073 ms + (32768 partial sums) + +Total time: 0.416477 ms +Speedup vs CPU: 5830.35x + +> Validating results... + GPU result: 2147639808.00000000 + CPU result: 2147639929.62027407 +Test PASSED + +====================================================================== +Summary +====================================================================== +Key optimizations: + - Load 2 elements per thread: 8,388,608 global reads (50% savings) + - Shared memory for reduction: ~10-20x faster than global memory + - Parallel block outputs: 32768 independent writes +Result: 198.31 GB/s throughput +====================================================================== +Two-Stage Reduction completed successfully! +====================================================================== +``` + +## Files + +`reduction.py` · `requirements.txt` · `README.md` diff --git a/python/2_CoreConcepts/reduction/reduction.py b/python/2_CoreConcepts/reduction/reduction.py new file mode 100644 index 00000000..cf838e2f --- /dev/null +++ b/python/2_CoreConcepts/reduction/reduction.py @@ -0,0 +1,485 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Fast Array Sum using Shared Memory - Two-Stage Reduction + +Demonstrates efficient parallel reduction using shared memory and +two-stage approach to avoid atomic operation bottlenecks. + +Key Features: +- Block-level reduction using shared memory +- Each thread loads 2 elements to reduce global memory traffic +- Sequential addressing tree reduction pattern +- No atomic operations - eliminates serialization bottleneck +- Device memory via CuPy; ``launch()`` takes pointers as ``ndarray.data.ptr`` +- CuPy uses ``cp.cuda.Stream.from_external(stream)``. +""" + +import argparse +import os +import sys +import time + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install dependencies:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Import utilities +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "Utilities")) +from cuda_samples_utils import verify_array_result # noqa: E402 + +# Two-stage block reduction kernel +REDUCTION_KERNEL = """ +/* + * Block-level reduction kernel using shared memory + * + * Strategy: + * - Each block processes blockSize * 2 elements + * - Uses shared memory for fast intra-block reduction + * - Outputs one partial sum per block (no atomics) + * + * Key optimizations: + * - Load 2 elements per thread (reduces global memory traffic by 50%) + * - Tree reduction with sequential addressing (avoids divergence) + * - Shared memory instead of atomic operations (eliminates bottleneck) + * + * Note: This sample provides separate implementations for each data type + * for clarity. Production code typically uses templates with SharedMemory + * or reinterpret_cast to avoid duplication. See NVIDIA reduction guide for + * template-based approaches. + */ + +extern "C" __global__ void blockReduceKernel_int( + const int *__restrict__ input, + int *__restrict__ blockSums, + unsigned int n) +{ + extern __shared__ int sdata_int[]; + + unsigned int tid = threadIdx.x; + unsigned int blockSize = blockDim.x; + unsigned int gid = blockIdx.x * (blockSize * 2) + tid; + + // Load 2 elements per thread + int sum = 0; + if (gid < n) sum += input[gid]; + if (gid + blockSize < n) sum += input[gid + blockSize]; + + sdata_int[tid] = sum; + __syncthreads(); + + // Tree reduction with sequential addressing + for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata_int[tid] += sdata_int[tid + s]; + } + __syncthreads(); + } + + // Write block result + if (tid == 0) { + blockSums[blockIdx.x] = sdata_int[0]; + } +} + +extern "C" __global__ void blockReduceKernel_float( + const float *__restrict__ input, + float *__restrict__ blockSums, + unsigned int n) +{ + extern __shared__ float sdata_float[]; + + unsigned int tid = threadIdx.x; + unsigned int blockSize = blockDim.x; + unsigned int gid = blockIdx.x * (blockSize * 2) + tid; + + // Load 2 elements per thread + float sum = 0.0f; + if (gid < n) sum += input[gid]; + if (gid + blockSize < n) sum += input[gid + blockSize]; + + sdata_float[tid] = sum; + __syncthreads(); + + // Tree reduction with sequential addressing + for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata_float[tid] += sdata_float[tid + s]; + } + __syncthreads(); + } + + // Write block result + if (tid == 0) { + blockSums[blockIdx.x] = sdata_float[0]; + } +} + +extern "C" __global__ void blockReduceKernel_double( + const double *__restrict__ input, + double *__restrict__ blockSums, + unsigned int n) +{ + extern __shared__ double sdata_double[]; + + unsigned int tid = threadIdx.x; + unsigned int blockSize = blockDim.x; + unsigned int gid = blockIdx.x * (blockSize * 2) + tid; + + // Load 2 elements per thread + double sum = 0.0; + if (gid < n) sum += input[gid]; + if (gid + blockSize < n) sum += input[gid + blockSize]; + + sdata_double[tid] = sum; + __syncthreads(); + + // Tree reduction with sequential addressing + for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata_double[tid] += sdata_double[tid + s]; + } + __syncthreads(); + } + + // Write block result + if (tid == 0) { + blockSums[blockIdx.x] = sdata_double[0]; + } +} +""" + + +def reduce_cpu(data): + """Compute sum using Kahan summation for numerical accuracy.""" + if len(data) == 0: + return 0 + + sum_val = float(data[0]) + c = 0.0 + + for i in range(1, len(data)): + y = float(data[i]) - c + t = sum_val + y + c = (t - sum_val) - y + sum_val = t + + return sum_val + + +def _validate_threads_per_block(threads_per_block): + if threads_per_block <= 0 or threads_per_block > 1024: + return "threads per block must be between 1 and 1024" + if (threads_per_block & (threads_per_block - 1)) != 0: + return ( + "threads per block must be a power of 2 " + "(required by the shared-memory tree reduction kernel)" + ) + return None + + +def run( + num_elements=1 << 24, threads_per_block=256, test_iterations=100, datatype="float" +): + """Run two-stage reduction benchmark.""" + + print("\n" + "=" * 70) + print("Fast Array Sum using Shared Memory - Two-Stage Reduction") + print("=" * 70) + print("\nDemonstrates: Efficient parallel reduction using shared memory") + + # Map datatype + dtype_map = {"int": np.int32, "float": np.float32, "double": np.float64} + if datatype not in dtype_map: + print(f"Unknown datatype '{datatype}', using 'float'") + datatype = "float" + dtype = dtype_map[datatype] + itemsize = np.dtype(dtype).itemsize + + # Initialize device + device = Device() + device.set_current() + major, minor = device.compute_capability + + print("\nDevice Information:") + print(f" Name: {device.name}") + print(f" Compute Capability: sm_{major}.{minor}") + + # Configuration + print("\nConfiguration:") + print(f" Array size: {num_elements:,} elements") + print(f" Data type: {datatype}") + print(f" Memory: {num_elements * itemsize / (1024**2):.2f} MB") + print(f" Threads per block: {threads_per_block}") + + # Calculate number of blocks + # Each block processes threads_per_block * 2 elements + num_blocks = (num_elements + threads_per_block * 2 - 1) // (threads_per_block * 2) + + print("\nTwo-Stage Reduction Strategy:") + print(" Stage 1: GPU block reduction") + print(f" - Number of blocks: {num_blocks}") + print(f" - Elements per block: {threads_per_block * 2}") + print(f" - Output: {num_blocks} partial sums") + print(" Stage 2: CPU final reduction") + print(f" - Combine {num_blocks} partial sums → 1 final result") + + # Compile kernel + print("\nCompiling CUDA kernel...") + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(REDUCTION_KERNEL, code_type="c++", options=program_options) + mod = prog.compile("cubin") + kernel_name = f"blockReduceKernel_{datatype}" + kernel = mod.get_kernel(kernel_name) + print(f" Kernel '{kernel_name}' compiled successfully") + + # Generate input data + print("\n> Generating random input data...") + rng = np.random.default_rng(42) + if datatype == "int": + h_input = rng.integers(0, 256, size=num_elements, dtype=dtype) + else: + h_input = (rng.random(num_elements) * 256).astype(dtype) + + # cuda.core stream for launch/events; CuPy copies use the same stream via + # Stream.from_external. + stream = device.create_stream() + cp_stream = cp.cuda.Stream.from_external(stream) + try: + d_blockSums = cp.empty(num_blocks, dtype=dtype) + with cp_stream: + d_input = cp.asarray(h_input, dtype=dtype) + stream.sync() + + # Compute CPU reference + print("> Computing reference result on CPU...") + cpu_start = time.perf_counter() + cpu_result = reduce_cpu(h_input) + cpu_time = time.perf_counter() - cpu_start + print(f" CPU time: {cpu_time:.6f} seconds") + + # Configure launch + shared_mem_bytes = threads_per_block * itemsize + config = LaunchConfig( + grid=num_blocks, block=threads_per_block, shmem_size=shared_mem_bytes + ) + + # Warm-up + print("\n> Warming up GPU...") + launch( + stream, + config, + kernel, + d_input.data.ptr, + d_blockSums.data.ptr, + np.uint32(num_elements), + ) + stream.sync() + print(" Warm-up completed") + + # Benchmark Stage 1 (GPU) + print("\n> Benchmarking Stage 1 (GPU block reduction)...") + print(f" Running {test_iterations} iterations...") + + # cuda.core event elapsed time (end - start) is in milliseconds (CUDA API). + stage1_times_ms = [] + event_options = EventOptions(enable_timing=True) + start_event = stream.device.create_event(options=event_options) + end_event = stream.device.create_event(options=event_options) + for _ in range(test_iterations): + stream.record(start_event) + launch( + stream, + config, + kernel, + d_input.data.ptr, + d_blockSums.data.ptr, + np.uint32(num_elements), + ) + stream.record(end_event) + end_event.sync() + stage1_times_ms.append(float(end_event - start_event)) + + avg_stage1_ms = np.mean(stage1_times_ms) + avg_stage1_s = avg_stage1_ms / 1000.0 + + # Stage 2 (CPU) + print("\n> Running Stage 2 (CPU final reduction)...") + # Device → Host: after stream sync, partial sums are visible on host. + stream.sync() + with cp_stream: + h_blockSums = cp.asnumpy(d_blockSums) + stage2_start = time.perf_counter() + gpu_result = float(np.sum(h_blockSums)) + stage2_time = time.perf_counter() - stage2_start + + total_time = avg_stage1_s + stage2_time + + # Performance metrics (use seconds for throughput; CPU times are in seconds) + bytes_processed = num_elements * itemsize + throughput = bytes_processed / avg_stage1_s / 1e9 + + print("\n" + "=" * 70) + print("Performance Results") + print("=" * 70) + print("\nStage 1 (GPU block reduction):") + print(f" Average time: {avg_stage1_ms:.6f} ms") + print(f" Throughput: {throughput:.2f} GB/s") + print("\nStage 2 (CPU final reduction):") + print(f" Time: {stage2_time * 1000:.6f} ms") + print(f" ({num_blocks} partial sums)") + print(f"\nTotal time: {total_time * 1000:.6f} ms") + print(f"Speedup vs CPU: {cpu_time / total_time:.2f}x") + + # Validation + print("\n> Validating results...") + if datatype == "int": + print(f" GPU result: {int(gpu_result):,}") + print(f" CPU result: {int(cpu_result):,}") + rtol, atol = 0.0, 0.0 + else: + precision = 8 if datatype == "float" else 12 + print(f" GPU result: {gpu_result:.{precision}f}") + print(f" CPU result: {cpu_result:.{precision}f}") + if datatype == "float": + rtol, atol = 1e-5, 1e-8 * num_elements + else: # double + rtol, atol = 1e-8, 1e-12 * num_elements + + success = verify_array_result( + np.array([gpu_result]), + np.array([cpu_result]), + rtol=rtol, + atol=atol, + verbose=True, + ) + + # Summary + print("\n" + "=" * 70) + print("Summary") + print("=" * 70) + print("Key optimizations:") + half_reads = num_elements // 2 + print( + " - Load 2 elements per thread: " + f"{half_reads:,} global reads (50% savings)" + ) + print(" - Shared memory for reduction: ~10-20x faster than global memory") + print(f" - Parallel block outputs: {num_blocks} independent writes") + print(f"Result: {throughput:.2f} GB/s throughput") + + print("=" * 70) + if success: + print("Two-Stage Reduction completed successfully!") + else: + print("Two-Stage Reduction FAILED!") + print("=" * 70 + "\n") + + return 0 if success else 1 + finally: + stream.close() + + +def main(): + """Main entry point with argument parsing.""" + parser = argparse.ArgumentParser( + description="Two-Stage Reduction with Shared Memory", + epilog="See README.md for usage examples and detailed documentation.", + ) + + parser.add_argument( + "--n", + type=int, + default=1 << 24, + help="Number of elements to reduce (default: 16777216 = 2^24)", + ) + + parser.add_argument( + "--threads", + type=int, + default=256, + help="Threads per block, power of 2 in [1, 1024] (default: 256)", + ) + + parser.add_argument( + "--type", + type=str, + default="float", + choices=["int", "float", "double"], + help="Data type for reduction (default: float)", + ) + + parser.add_argument( + "--iterations", + type=int, + default=100, + help="Number of benchmark iterations (default: 100)", + ) + + args = parser.parse_args() + + # Validate arguments + if args.n <= 0: + print("Error: n must be positive") + return 1 + + err = _validate_threads_per_block(args.threads) + if err: + print(f"Error: {err}") + return 1 + + try: + exit_code = run( + num_elements=args.n, + threads_per_block=args.threads, + test_iterations=args.iterations, + datatype=args.type, + ) + sys.exit(exit_code) + except Exception as e: + print(f"\nError: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/reduction/requirements.txt b/python/2_CoreConcepts/reduction/requirements.txt new file mode 100644 index 00000000..17ed73c7 --- /dev/null +++ b/python/2_CoreConcepts/reduction/requirements.txt @@ -0,0 +1,8 @@ +# Python CUDA Reduction Sample Requirements +# Install with: pip install -r requirements.txt + +numpy>=2.3.2 +cuda-core>=0.6.0 +cuda-python>=13.0.0 +# Use cupy-cuda13x>=14.0.0 for cp.cuda.Stream.from_external(stream) +cupy-cuda13x>=14.0.0 diff --git a/python/2_CoreConcepts/reductionMultiBlockCG/README.md b/python/2_CoreConcepts/reductionMultiBlockCG/README.md new file mode 100644 index 00000000..bb359f0b --- /dev/null +++ b/python/2_CoreConcepts/reductionMultiBlockCG/README.md @@ -0,0 +1,129 @@ +# Sample: Single-Pass Multi-Block Reduction with Cooperative Groups (Python) + +## Description + +Single-kernel, two-stage reduction using **Cooperative Groups** and `grid.sync()` so all blocks synchronize inside one launch—no second kernel or CPU stage for the reduction tree. + +**Stack:** `cuda-core` (device, compile, cooperative `launch()`, stream, **CUDA events** for GPU timing). **CuPy** for H↔D copies on the same stream (`Stream.from_external(cuda.core_stream)`, `ndarray.data.ptr` to `launch()`). **`try`/`finally`** closes the stream if cooperative launch fails. Requires **compute capability > 6.0** (Pascal+). + +## What you will learn + +- `cooperative_groups::grid_group` and `grid.sync()` across the grid +- Cooperative `LaunchConfig(..., cooperative_launch=True)` and sizing blocks for residency +- Timing the GPU path with `EventOptions` / `stream.record()` / event elapsed time + +## Key libraries + +| Library | Role | +|---------|------| +| `cuda-core` | Device, stream, events, `Program` / `ProgramOptions`, cooperative `launch()` | +| `cupy` | `cp.empty`, `cp.asarray`, `cp.asnumpy`, `Stream.from_external` | +| `numpy` | Host data, reference sum, `default_rng` | + +## Requirements + +- NVIDIA GPU, **Pascal or newer**; **CUDA Toolkit 13+**; **Python 3.10+** +- NVRTC must see **`cooperative_groups.h`** and **CCCL** headers (`cuda/std/*`) + +```bash +pip install -r requirements.txt +``` + +Pick a CuPy wheel that matches your CUDA major version (e.g. `cupy-cuda13x` in `requirements.txt`). + +## How to run + +**`--cuda-include-dir` is required** (colon-separated list). Typical desktop layout: + +```bash +python reductionMultiBlockCG.py \ + --cuda-include-dir /usr/local/cuda/include/cccl:/usr/local/cuda/include +``` + +**Jetson / split include trees:** pass every directory NVRTC needs in one `--cuda-include-dir` argument, e.g. +`/usr/local/cuda/include/cccl:/usr/local/cuda/targets/sbsa-linux/include` (adjust paths to your image). If headers are scattered, you can instead merge them into one tree with symlinks and point `--cuda-include-dir` at that folder. + +Defaults: **2²⁵** elements, threads = device max (capped at 1024), auto `--maxblocks`, **100** iterations. Other flags: `--n`, `--threads`, `--maxblocks`, `--iterations`. See **`python reductionMultiBlockCG.py --help`**. + +## Output + +``` +====================================================================== +Single-Pass Multi-Block Reduction with Cooperative Groups +====================================================================== + +Demonstrates: Multi-stage reduction in a single kernel using grid.sync() + +Device Information: + Name: NVIDIA Thor + Compute Capability: sm_11.0 + +Reduction Configuration: + Number of elements: 33,554,432 + Data size: 128.00 MB + +Compiling CUDA kernel... + Kernel compiled successfully + +Launch Configuration: + Threads per block: 1024 + Number of blocks: 20 + Total threads: 20,480 + Shared memory per block: 4096 bytes + Launch mode: Cooperative (grid-wide sync enabled) + +> Generating random input data... +> Computing reference result on CPU... + CPU time: 0.008903 seconds + +> Warming up GPU... + Warm-up successful + +> Running benchmark (100 iterations)... + +> Performance Results: + Average GPU time: 0.977166 ms + Throughput: 137.35 GB/s + Speedup vs CPU: 9.11x + +> Validating results... +Test PASSED + +====================================================================== +Summary +====================================================================== + +Single-kernel two-stage reduction: + Stage 1: 20 blocks → 20 partial sums + grid.sync() ← All blocks synchronize (KEY innovation) + Stage 2: Block 0 → 1 final result + Total: 1 kernel launch, 137.35 GB/s + +Comparison: + • Traditional: 2 kernel launches or kernel + CPU + • This sample: 1 kernel with grid.sync() between stages + • Benefit: Eliminates ~5-20μs launch overhead per stage + +====================================================================== +Single-Pass Multi-Block Reduction completed successfully! +====================================================================== +``` + +## Troubleshooting (short) + +- **Cooperative launch not supported / fails:** need sm_60+; reduce `--maxblocks` or `--threads` so all blocks can be resident. +- **Compile errors missing headers:** extend `--cuda-include-dir` with the path that contains CCCL / cooperative groups (see Jetson note above). +- **Low throughput:** often block count vs occupancy; try defaults first, then tune `--threads` / `--maxblocks`. + +## Related samples + +**blockArraySum** (atomics + grid-stride) → **reduction** (two-stage shared memory) → **this sample** (single kernel + `grid.sync()`). + +## Further reading + +- [CUDA Cooperative Groups](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#cooperative-groups) +- [Reduction whitepaper (PDF)](https://developer.download.nvidia.com/assets/cuda/files/reduction.pdf) + +## Files + +`reductionMultiBlockCG.py` · `requirements.txt` · `README.md` diff --git a/python/2_CoreConcepts/reductionMultiBlockCG/reductionMultiBlockCG.py b/python/2_CoreConcepts/reductionMultiBlockCG/reductionMultiBlockCG.py new file mode 100644 index 00000000..bcb5c91a --- /dev/null +++ b/python/2_CoreConcepts/reductionMultiBlockCG/reductionMultiBlockCG.py @@ -0,0 +1,472 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Single-Pass Multi-Block Reduction with Cooperative Groups + +Demonstrates single-kernel multi-stage reduction using grid-wide +synchronization. Traditional reduction needs multiple kernel launches, +but with grid.sync() from Cooperative Groups, we can complete all +stages in ONE kernel. + +Key Features: +- Grid-wide synchronization with grid.sync() +- Two-stage reduction in a single kernel (no atomic operations) +- Requires compute capability 6.0+ and cooperative launch +- Achieves 400-700 GB/s on modern GPUs + +How it differs from other samples: +- blockArraySum.py: Basic thread/block indexing + atomicAdd +- reduction.py: High-performance shared memory, two-kernel approach +- This sample: Single-kernel multi-stage with grid.sync() + +Transfers use CuPy on the same CUDA stream as ``launch()`` (``Stream.from_external``), +not ``cuda.bindings.driver`` memcpy. GPU timing uses CUDA events. +""" + +import argparse +import os +import sys +import time + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install dependencies:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Import utilities +utilities_path = os.path.join(os.path.dirname(__file__), "..", "..", "Utilities") +sys.path.insert(0, utilities_path) +from cuda_samples_utils import verify_array_result # noqa: E402 + + +def _validate_threads_arg(threads): + if threads is None: + return None + if threads <= 0 or threads > 1024: + return "threads must be between 1 and 1024" + if (threads & (threads - 1)) != 0: + return ( + "threads must be a power of 2 " + "(required by the shared-memory tree reduction kernel)" + ) + return None + + +# Single-pass multi-block reduction kernel with grid-wide sync +REDUCTION_KERNEL = """ +/* + * Single-Kernel Multi-Stage Reduction using grid.sync() + * + * Strategy: + * Stage 1: Each block reduces its portion → partial sum + * grid.sync() ← KEY: All blocks synchronize + * Stage 2: Block 0 reduces all partial sums → final result + * + * Key feature: grid.sync() enables multi-stage within ONE kernel + */ + +#include + +namespace cg = cooperative_groups; +extern "C" __global__ void reduceSinglePassMultiBlockCG( + const float *__restrict__ g_idata, + float *__restrict__ g_odata, + unsigned int n) +{ + cg::thread_block cta = cg::this_thread_block(); + cg::grid_group grid = cg::this_grid(); + extern __shared__ float sdata[]; + + unsigned int tid = threadIdx.x; + unsigned int blockSize = blockDim.x; + + // Stage 1: Grid-stride loop + block reduction + float sum = 0.0f; + for (unsigned int i = grid.thread_rank(); i < n; i += grid.size()) { + sum += g_idata[i]; + } + + sdata[tid] = sum; + cg::sync(cta); + + // Block reduction (sequential addressing) + for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata[tid] += sdata[tid + s]; + } + cg::sync(cta); + } + + if (tid == 0) { + g_odata[blockIdx.x] = sdata[0]; + } + + // KEY: Grid-wide synchronization (all blocks wait here) + grid.sync(); + + // Stage 2: Block 0 reduces all partial sums → final result + // Use a stride loop so all gridDim.x partial sums are covered even + // when gridDim.x > blockDim.x. + if (blockIdx.x == 0) { + // mySum stays 0.0f when tid >= gridDim.x (loop never executes), + // implicitly zero-filling sdata for threads beyond the partial-sum count. + float mySum = 0.0f; + for (unsigned int i = tid; i < gridDim.x; i += blockSize) { + mySum += g_odata[i]; + } + sdata[tid] = mySum; + cg::sync(cta); + + for (unsigned int s = blockSize / 2; s > 0; s >>= 1) { + if (tid < s) { + sdata[tid] += sdata[tid + s]; + } + cg::sync(cta); + } + + if (tid == 0) { + g_odata[0] = sdata[0]; + } + } +} +""" + + +def get_max_cooperative_blocks(device, kernel, threads_per_block, shared_mem_bytes): + """ + Calculate max blocks for cooperative launch (all must be resident). + + This is a conservative estimate that ignores shared memory limits; + for precise tuning, use cudaOccupancyMaxActiveBlocksPerMultiprocessor. + """ + # Get device properties + prop = device.properties + + # Calculate maximum blocks per SM + # Note: We use cudaOccupancyMaxActiveBlocksPerMultiprocessor functionality + # For simplicity in Python, we'll use a conservative estimate + num_sms = prop.multiprocessor_count + max_threads_per_sm = prop.max_threads_per_multiprocessor + max_blocks_per_sm = max_threads_per_sm // threads_per_block + + # Total blocks = blocks per SM × number of SMs + max_blocks = max_blocks_per_sm * num_sms + + # Also respect max_grid_dim_x + max_blocks = min(max_blocks, prop.max_grid_dim_x) + + return max_blocks + + +def run( + num_elements=1 << 25, + max_threads=None, + max_blocks=None, + test_iterations=100, + cuda_include_dir=None, +): + """Run single-pass multi-block reduction benchmark.""" + + if cuda_include_dir is None: + raise ValueError("cuda_include_dir is required") + + print("\n" + "=" * 70) + print("Single-Pass Multi-Block Reduction with Cooperative Groups") + print("=" * 70) + msg = "Multi-stage reduction in a single kernel using grid.sync()" + print(f"\nDemonstrates: {msg}") + + # Initialize device + device = Device() + device.set_current() + major, minor = device.compute_capability + + print("\nDevice Information:") + print(f" Name: {device.name}") + print(f" Compute Capability: sm_{major}.{minor}") + + # Get device properties for configuration + prop = device.properties + + # Determine threads per block + if max_threads is None: + max_threads = prop.max_threads_per_block + threads_per_block = min(max_threads, 1024) + + # Define data type and itemsize + itemsize = np.dtype(np.float32).itemsize + + print("\nReduction Configuration:") + print(f" Number of elements: {num_elements:,}") + print(f" Data size: {num_elements * itemsize / (1024**2):.2f} MB") + + # Compile kernel + print("\nCompiling CUDA kernel...") + # Support colon-separated multiple include paths + include_paths = cuda_include_dir.split(":") + program_options = ProgramOptions( + std="c++17", arch=f"sm_{device.arch}", include_path=include_paths + ) + prog = Program(REDUCTION_KERNEL, code_type="c++", options=program_options) + mod = prog.compile("cubin") + kernel = mod.get_kernel("reduceSinglePassMultiBlockCG") + print(" Kernel compiled successfully") + + # Calculate blocks for cooperative launch + shared_mem_bytes = threads_per_block * itemsize + + if max_blocks is None: + max_blocks = get_max_cooperative_blocks( + device, kernel, threads_per_block, shared_mem_bytes + ) + + # Calculate optimal blocks (all must be resident) + num_blocks = min( + max_blocks, (num_elements + threads_per_block - 1) // threads_per_block + ) + + print("\nLaunch Configuration:") + print(f" Threads per block: {threads_per_block}") + print(f" Number of blocks: {num_blocks}") + print(f" Total threads: {num_blocks * threads_per_block:,}") + print(f" Shared memory per block: {shared_mem_bytes} bytes") + print(" Launch mode: Cooperative (grid-wide sync enabled)") + + # Generate random input data + print("\n> Generating random input data...") + rng = np.random.default_rng(42) + h_idata = (rng.random(num_elements) * 256).astype(np.float32) + + stream = device.create_stream() + cp_stream = cp.cuda.Stream.from_external(stream) + try: + d_odata = cp.empty(num_blocks, dtype=np.float32) + with cp_stream: + d_idata = cp.asarray(h_idata, dtype=np.float32) + stream.sync() + + # Compute CPU reference + print("> Computing reference result on CPU...") + cpu_start = time.perf_counter() + cpu_result = float(np.sum(h_idata)) + cpu_time = time.perf_counter() - cpu_start + print(f" CPU time: {cpu_time:.6f} seconds") + + # Warm-up + print("\n> Warming up GPU...") + + launch_config = LaunchConfig( + grid=(num_blocks, 1, 1), + block=(threads_per_block, 1, 1), + shmem_size=shared_mem_bytes, + cooperative_launch=True, + ) + + n_u32 = np.uint32(num_elements) + ptr_in = d_idata.data.ptr + ptr_out = d_odata.data.ptr + + try: + launch(stream, launch_config, kernel, ptr_in, ptr_out, n_u32) + except Exception as e: + print(f" Cooperative launch failed: {e}") + return 1 + + stream.sync() + print(" Warm-up successful") + + # Benchmark (CUDA events — not host wall clock around the whole loop) + print(f"\n> Running benchmark ({test_iterations} iterations)...") + event_options = EventOptions(enable_timing=True) + start_event = stream.device.create_event(options=event_options) + end_event = stream.device.create_event(options=event_options) + # cuda.core event elapsed time (end - start) is in milliseconds (CUDA API). + gpu_times_ms = [] + for _ in range(test_iterations): + try: + stream.record(start_event) + launch(stream, launch_config, kernel, ptr_in, ptr_out, n_u32) + stream.record(end_event) + end_event.sync() + gpu_times_ms.append(float(end_event - start_event)) + except Exception as e: + print(f"Benchmark iteration failed: {e}") + return 1 + + avg_gpu_ms = float(np.mean(gpu_times_ms)) + avg_gpu_s = avg_gpu_ms / 1000.0 + + stream.sync() + with cp_stream: + h_result = cp.asnumpy(d_odata[:1]) + gpu_result = float(h_result[0]) + + # Performance metrics use seconds for throughput and speedup. + # CPU time is already in seconds. + bytes_processed = num_elements * 4 + throughput_gb_s = bytes_processed / avg_gpu_s / 1e9 + + print("\n> Performance Results:") + print(f" Average GPU time: {avg_gpu_ms:.6f} ms") + print(f" Throughput: {throughput_gb_s:.2f} GB/s") + print(f" Speedup vs CPU: {cpu_time / avg_gpu_s:.2f}x") + + # Validate results + print("\n> Validating results...") + success = verify_array_result( + np.array([gpu_result]), + np.array([cpu_result]), + rtol=1e-5, + atol=1e-5, + ) + + # Summary + print("\n" + "=" * 70) + print("Summary") + print("=" * 70) + print(f""" +Single-kernel two-stage reduction: + Stage 1: {num_blocks} blocks → {num_blocks} partial sums + grid.sync() ← All blocks synchronize (KEY innovation) + Stage 2: Block 0 → 1 final result + Total: 1 kernel launch, {throughput_gb_s:.2f} GB/s + +Comparison: + • Traditional: 2 kernel launches or kernel + CPU + • This sample: 1 kernel with grid.sync() between stages + • Benefit: Eliminates ~5-20μs launch overhead per stage + """) + + print("=" * 70) + if success: + print("Single-Pass Multi-Block Reduction completed successfully!") + else: + print("Single-Pass Multi-Block Reduction FAILED!") + print("=" * 70 + "\n") + + return 0 if success else 1 + finally: + stream.close() + + +def main(): + """Main entry point with argument parsing.""" + parser = argparse.ArgumentParser( + description="Single-Pass Multi-Block Reduction with Cooperative Groups" + ) + + parser.add_argument( + "--n", + type=int, + default=1 << 25, + help="Number of elements to reduce (default: 33554432 = 2^25)", + ) + + parser.add_argument( + "--threads", + type=int, + default=None, + help=( + "Threads per block, power of 2 in [1, 1024]; " + "default: device maximum (typically 1024)" + ), + ) + + parser.add_argument( + "--maxblocks", + type=int, + default=None, + help=( + "Maximum number of blocks " + "(default: auto-calculated for cooperative launch)" + ), + ) + + parser.add_argument( + "--iterations", + type=int, + default=100, + help="Number of benchmark iterations (default: 100)", + ) + + parser.add_argument( + "--cuda-include-dir", + type=str, + required=True, + help=( + "CUDA include directory for NVRTC " + "(can use colon-separated paths, e.g., /path1:/path2)" + ), + ) + + args = parser.parse_args() + + # Validate arguments + if args.n <= 0: + print("Error: n must be positive") + return 1 + + err_threads = _validate_threads_arg(args.threads) + if err_threads: + print(f"Error: {err_threads}") + return 1 + + if args.maxblocks is not None and args.maxblocks <= 0: + print("Error: maxblocks must be positive") + return 1 + + try: + exit_code = run( + num_elements=args.n, + max_threads=args.threads, + max_blocks=args.maxblocks, + test_iterations=args.iterations, + cuda_include_dir=args.cuda_include_dir, + ) + sys.exit(exit_code) + except Exception as e: + print(f"\nError: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/reductionMultiBlockCG/requirements.txt b/python/2_CoreConcepts/reductionMultiBlockCG/requirements.txt new file mode 100644 index 00000000..8d4d1a14 --- /dev/null +++ b/python/2_CoreConcepts/reductionMultiBlockCG/requirements.txt @@ -0,0 +1,10 @@ +# Python Multi-Block Cooperative Reduction Sample Requirements +# Install with: pip install -r requirements.txt + +numpy>=2.3.2 +cuda-core>=0.6.0 +cuda-python>=13.0.0 +# Headers for NVRTC: cooperative_groups.h includes cuda/std/* (CCCL) +cuda-cccl>=1.0.0 +# Use cupy-cuda13x>=14.0.0 for cp.cuda.Stream.from_external(stream) +cupy-cuda13x>=14.0.0 diff --git a/python/2_CoreConcepts/simpleZeroCopy/README.md b/python/2_CoreConcepts/simpleZeroCopy/README.md new file mode 100644 index 00000000..ab2f91cb --- /dev/null +++ b/python/2_CoreConcepts/simpleZeroCopy/README.md @@ -0,0 +1,141 @@ +# Sample: simpleZeroCopy (Python) + +## Description + +This sample demonstrates zero-copy access using **`cuda.core`** to compile and launch a kernel, and **`cuda.bindings.runtime`** for mapped pinned host memory (`cudaHostAlloc` with `cudaHostAllocMapped`, `cudaHostGetDevicePointer`, and `cudaFreeHost`). The GPU loads and stores through **device** addresses that refer to that host memory—no `cudaMemcpy` in or out. The example is vector add with inputs and output as NumPy views of the host side of those buffers. + +## What you will learn + +- How to allocate **mapped** pinned host memory with `cudaHostAlloc` (via `cuda.bindings.runtime`) so the GPU can use `cudaHostGetDevicePointer` addresses in a kernel +- How `cuda.core.PinnedMemoryResource` differs (staging/copies; not guaranteed to be `cudaHostAllocMapped` for direct kernel access) +- How to build NumPy views of host addresses with `ctypes` and `numpy.frombuffer` +- How to launch CUDA kernels with `cuda.core`’s `Program` and `launch`, passing **device** pointers for mapped buffers +- When zero-copy is beneficial vs. device memory with explicit transfers +- How to validate results on the host without a D2H memcpy + +## Key libraries + +- `numpy` – CPU arrays and reference computation +- `cuda-core` – `Device`, stream, `Program`, `LaunchConfig`, `launch` +- `cuda-python` (`cuda.bindings.runtime`) – `cudaHostAlloc` / `cudaHostGetDevicePointer` / `cudaFreeHost` for mapped host memory + +## Key APIs + +**From cuda.core:** `Device`, `device.create_stream()`, `Program`, `ProgramOptions`, `LaunchConfig`, `launch` + +**From cuda.bindings.runtime:** `cudaHostAlloc` (with `cudaHostAllocMapped` | `cudaHostAllocPortable`), `cudaHostGetDevicePointer`, `cudaFreeHost` + +**From the standard library:** `ctypes` – wrap host pointers for `numpy.frombuffer` float32 views + +**Memory management:** Free host memory with `cudaFreeHost` in a `finally` block; call `stream.close()` when done. + +## Zero-Copy Memory: When to Use + +### Benefits +- **No explicit transfers**: Simplifies code by eliminating `cudaMemcpy` calls +- **Automatic synchronization**: Host can access results immediately after kernel completes +- **Good for small data**: Overhead of explicit transfers can exceed benefits for small arrays +- **Excellent for integrated GPUs**: On systems like Jetson (Tegra), CPU and GPU share physical memory + +### Limitations +- **Slower access**: Limited by PCIe bandwidth vs. device memory bandwidth +- **Not for compute-intensive**: Device memory is much faster for frequently accessed data +- **Discrete GPU overhead**: Each access crosses PCIe bus + +### Best Use Cases +1. Small data sets where transfer overhead dominates +2. Data accessed infrequently by GPU +3. Integrated GPU platforms (shared memory) +4. Streaming data from host to device +5. Prototyping and debugging (simplifies memory management) + +## Requirements + +1. **NVIDIA GPU** and a **driver** compatible with your installed `cuda-python` / `cuda-core` wheels. +2. **Python 3.10 or newer** +3. Install **`pip install -r requirements.txt`** (NumPy, `cuda-python`, `cuda-core`). A **system** CUDA Toolkit is not strictly required if the process can load the driver/runtime; use `LD_LIBRARY_PATH` in *How to run* if you hit missing-library errors. + +**Install packages:** +```bash +pip install -r requirements.txt +``` + +Or manually: +```bash +pip install numpy>=2.3.2 cuda-core>=0.6.0 cuda-python>=13.0.0 +``` + +## How to run + +Basic usage: +```bash +# Pre-steps: Set library path +export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Run with default parameters (1M elements) +python simpleZeroCopy.py +``` + +With custom parameters: +```bash +# Use 2M elements +python simpleZeroCopy.py --num_elements 2097152 + +# Show help +python simpleZeroCopy.py --help +``` + +### Command line arguments + +- `--num_elements`: Number of elements in vectors (default: 1048576) + - Each vector uses `num_elements * 4 bytes` (float32) + - Default: ~4 MB per vector, ~12 MB total + +## Expected Output + +Device name and compute capability **depend on your system**; the rest of the log should match this shape when validation passes. + +``` +====================================================================== +simpleZeroCopy - CUDA Python Sample +====================================================================== + +Device Information: + Name: + Compute Capability: . + +> Memory: mapped pinned host (cudaHostAlloc + cudaHostGetDevicePointer) + +Compiling CUDA kernel... + Kernel compiled successfully + +Allocating memory: + Vector size: 1,048,576 elements + Memory per vector: 4.00 MB + Total memory: 12.00 MB + +> Allocating mapped pinned host memory... + Mapped host memory allocated successfully + +> Initializing vectors on host... +> Computing reference result on CPU... + +> Launching vectorAddGPU kernel... + Note: GPU accesses host memory directly (zero-copy) + Kernel execution complete + +> Checking results from vectorAddGPU()... + Comparing 1,048,576 elements... + Relative error: 0.000000e+00 + Validation PASSED + +====================================================================== +simpleZeroCopy completed successfully! +====================================================================== +``` + +## Files + +- `simpleZeroCopy.py` – Main Python implementation +- `README.md` – This file +- `requirements.txt` – Python package dependencies diff --git a/python/2_CoreConcepts/simpleZeroCopy/requirements.txt b/python/2_CoreConcepts/simpleZeroCopy/requirements.txt new file mode 100644 index 00000000..e3f69898 --- /dev/null +++ b/python/2_CoreConcepts/simpleZeroCopy/requirements.txt @@ -0,0 +1,6 @@ +# simpleZeroCopy - Requirements +# Install with: pip install -r requirements.txt + +numpy>=2.3.2 +cuda-python>=13.0.0 +cuda-core>=0.6.0 diff --git a/python/2_CoreConcepts/simpleZeroCopy/simpleZeroCopy.py b/python/2_CoreConcepts/simpleZeroCopy/simpleZeroCopy.py new file mode 100644 index 00000000..43312299 --- /dev/null +++ b/python/2_CoreConcepts/simpleZeroCopy/simpleZeroCopy.py @@ -0,0 +1,275 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import ctypes +import sys +from pathlib import Path + +try: + import numpy as np + from cuda.bindings import runtime as cuda_rt + from cuda.core import ( + Device, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + + +def _mapped_host_alloc(num_floats, stream): + """ + Allocate page-locked host memory mapped for device access; return + (host_ptr, device_ptr) for CPU views and for ``launch()``. + """ + nbytes = int(num_floats) * np.dtype(np.float32).itemsize + if nbytes <= 0: + return 0, 0 + err, h_ptr = cuda_rt.cudaHostAlloc( + nbytes, cuda_rt.cudaHostAllocMapped | cuda_rt.cudaHostAllocPortable + ) + if err != cuda_rt.cudaError_t.cudaSuccess: + raise RuntimeError(f"cudaHostAlloc failed: {err}") + err, d_ptr = cuda_rt.cudaHostGetDevicePointer(h_ptr, 0) + if err != cuda_rt.cudaError_t.cudaSuccess: + cuda_rt.cudaFreeHost(h_ptr) + raise RuntimeError(f"cudaHostGetDevicePointer failed: {err}") + # Ensure prior work on this stream is visible before host fills buffers. + if stream is not None: + stream.sync() + return h_ptr, d_ptr + + +def _float_view(host_ptr, num_floats): + return np.frombuffer( + (ctypes.c_float * num_floats).from_address(host_ptr), + dtype=np.float32, + count=num_floats, + ) + + +# CUDA C++: vector add with grid-stride loop +VECTOR_ADD_KERNEL = """ +extern "C" __global__ +void vectorAddGPU(float* c, const float* a, const float* b, int N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = gridDim.x * blockDim.x; + + for (size_t i = tid; i < N; i += stride) { + c[i] = a[i] + b[i]; + } +} +""" + + +def run(num_elements=1048576): + """ + Zero-copy vector add: map host memory, launch kernel with device + pointers, validate on CPU. + + This function shows how to: + 1. Allocate pinned (page-locked) host memory + 2. Map host memory into GPU address space (zero-copy) + 3. Access host memory directly from GPU kernel + 4. Validate results + + Parameters + ---------- + num_elements : int + Number of elements in vectors (default: 1048576) + """ + print("\n" + "=" * 70) + print("simpleZeroCopy - CUDA Python Sample") + print("=" * 70) + + # Initialize device + device = Device() + device.set_current() + major, minor = device.compute_capability + + print("\nDevice Information:") + print(f" Name: {device.name}") + print(f" Compute Capability: {major}.{minor}") + + # Create stream + stream = device.create_stream() + mapped_host_ptrs = [] + + try: + print( + "\n> Memory: mapped pinned host " + "(cudaHostAlloc + cudaHostGetDevicePointer)" + ) + + print("\nCompiling CUDA kernel...") + program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(VECTOR_ADD_KERNEL, code_type="c++", options=program_options) + mod = prog.compile("cubin") + kernel = mod.get_kernel("vectorAddGPU") + print(" Kernel compiled successfully") + + bytes_total = num_elements * np.dtype(np.float32).itemsize + print("\nAllocating memory:") + print(f" Vector size: {num_elements:,} elements") + print(f" Memory per vector: {bytes_total / (1024**2):.2f} MB") + print(f" Total memory: {3 * bytes_total / (1024**2):.2f} MB") + + print("\n> Allocating mapped pinned host memory...") + h_a, d_a = _mapped_host_alloc(num_elements, stream) + mapped_host_ptrs.append(h_a) + h_b, d_b = _mapped_host_alloc(num_elements, stream) + mapped_host_ptrs.append(h_b) + h_c, d_c = _mapped_host_alloc(num_elements, stream) + mapped_host_ptrs.append(h_c) + + a = _float_view(h_a, num_elements) + b = _float_view(h_b, num_elements) + c = _float_view(h_c, num_elements) + + print(" Mapped host memory allocated successfully") + + print("\n> Initializing vectors on host...") + rng = np.random.default_rng(42) + a[:] = rng.random(num_elements).astype(np.float32) + b[:] = rng.random(num_elements).astype(np.float32) + c[:] = 0 + + print("> Computing reference result on CPU...") + reference = a + b + + print("\n> Launching vectorAddGPU kernel...") + print(" Note: GPU accesses host memory directly (zero-copy)") + + block_size = 256 + grid_size = (num_elements + block_size - 1) // block_size + config = LaunchConfig(grid=grid_size, block=block_size) + + # Pass device pointers from cudaHostGetDevicePointer, not raw host VAs. + launch( + stream, + config, + kernel, + int(d_c), + int(d_a), + int(d_b), + np.int32(num_elements), + ) + stream.sync() + + print(" Kernel execution complete") + + print("\n> Checking results from vectorAddGPU()...") + print(f" Comparing {num_elements:,} elements...") + + # ``c`` is a host view of the same buffer; no cudaMemcpy D2H needed. + if np.allclose(c, reference, rtol=1e-5, atol=1e-6): + error_norm = np.linalg.norm(c - reference) + ref_norm = np.linalg.norm(reference) + relative_error = error_norm / ref_norm + print(f" Relative error: {relative_error:.6e}") + print(" Validation PASSED") + success = True + else: + max_error = np.max(np.abs(c - reference)) + print(f" Max error: {max_error}") + print(" Validation FAILED") + success = False + + print("\n" + "=" * 70) + if success: + print("simpleZeroCopy completed successfully!") + else: + print("simpleZeroCopy FAILED!") + print("=" * 70 + "\n") + + return 0 if success else 1 + finally: + for h in reversed(mapped_host_ptrs): + if h: + cuda_rt.cudaFreeHost(h) + stream.close() + + +def main(): + """Parse CLI, call ``run()``, and exit with validation status.""" + parser = argparse.ArgumentParser( + description="Demonstrate zero-copy memory access with CUDA", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python simpleZeroCopy.py + python simpleZeroCopy.py --num_elements 2097152 +What is Zero-Copy Memory? + Zero-copy allows the GPU to directly access host (CPU) memory without + explicit memory transfers. This is useful for: + - Small data that doesn't benefit from explicit transfers + - Data that is accessed infrequently + - Integrated GPUs that share memory with CPU + + Trade-offs: + - Slower than device memory (PCIe bandwidth limited) + - No explicit transfers needed (simpler code) + - Good for discrete GPUs with small data + - Excellent for integrated GPUs (e.g., Tegra) + """, + ) + + parser.add_argument( + "--num_elements", + type=int, + default=1048576, + help="Number of elements in vectors (default: 1048576)", + ) + + args = parser.parse_args() + + if args.num_elements <= 0: + print("Error: num_elements must be positive") + sys.exit(1) + + try: + exit_code = run(num_elements=args.num_elements) + except Exception as e: + print(f"\nError: {e}") + import traceback + + traceback.print_exc() + exit_code = 1 + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/streamingCopyComputeOverlap/README.md b/python/2_CoreConcepts/streamingCopyComputeOverlap/README.md new file mode 100644 index 00000000..6d352b61 --- /dev/null +++ b/python/2_CoreConcepts/streamingCopyComputeOverlap/README.md @@ -0,0 +1,98 @@ +# Sample: Streaming Copy + Compute Overlap (Python) + +## Description + +Demonstrate how to overlap memory transfers (H2D/D2H) with kernel computation using CUDA streams. This technique hides transfer latency and improves GPU utilization. + +## What You'll Learn + +- Using `PinnedMemoryResource` for async-capable host memory +- Using `DeviceMemoryResource` for GPU memory allocation +- Creating multiple streams with `Device.create_stream()` +- Async memory copies with `Buffer.copy_to()` +- Overlapping H2D transfers, kernel execution, and D2H transfers + +## Key Concept + +**Without overlap (sequential):** +``` +[====H2D====][====Compute====][====D2H====] +``` + +**With overlap (multiple streams):** +``` +Stream 0: [H2D][Compute][D2H] +Stream 1: [H2D][Compute][D2H] +Stream 2: [H2D][Compute][D2H] +``` + +## Key APIs (all from `cuda.core`) + +- `Device` - Device management +- `Device.create_stream()` - Create CUDA streams +- `Stream.sync()` - Synchronize stream +- `PinnedMemoryResource` - Pinned host memory (required for async transfers) +- `DeviceMemoryResource` - GPU device memory +- `Buffer.copy_to(dst, stream=stream)` - Async memory copy +- `Program`, `LaunchConfig`, `launch` - Kernel compilation and execution + +### From `numpy`: + +- `np.from_dlpack()` - Zero-copy view of pinned memory buffers + +## Requirements + +- CUDA Toolkit 13.0+ +- Python 3.10+ +- `cuda-python`, `cuda-core`, `numpy` + +## Installation + +```bash +pip install -r requirements.txt +``` + +## How to Run + +```bash +python streamingCopyComputeOverlap.py +``` + +## Expected Output + +``` +============================================================ +Streaming Copy + Compute Overlap +Using pure cuda.core APIs +============================================================ + +Device: NVIDIA GeForce RTX XXXX +Kernel compiled ✓ + +Problem size: 16,000,000 elements (61 MB) + +--- Sequential (no overlap) --- +Timeline: [H2D][Compute][D2H] +Time: X.XX ms (±X.XX) + +--- Streamed (with overlap) --- +Stream 0: [H2D][Compute][D2H] +Stream 1: [H2D][Compute][D2H] +Stream 2: [H2D][Compute][D2H] +... +2 streams: X.XX ms (±X.XX) - speedup: X.XXx +4 streams: X.XX ms (±X.XX) - speedup: X.XXx +8 streams: X.XX ms (±X.XX) - speedup: X.XXx + +============================================================ +Key: Pinned memory + multiple streams = overlap transfers with compute + +Note: Speedup depends on hardware characteristics. This technique +benefits most when transfer time is significant relative to compute. +============================================================ +``` + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [CUDA Streams Best Practices](https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#overlapping-data-transfers) diff --git a/python/2_CoreConcepts/streamingCopyComputeOverlap/requirements.txt b/python/2_CoreConcepts/streamingCopyComputeOverlap/requirements.txt new file mode 100644 index 00000000..15612e66 --- /dev/null +++ b/python/2_CoreConcepts/streamingCopyComputeOverlap/requirements.txt @@ -0,0 +1,6 @@ +# Streaming Copy Compute Overlap Sample Requirements +# Requires Python 3.10 or newer + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +numpy>=2.3.2 diff --git a/python/2_CoreConcepts/streamingCopyComputeOverlap/streamingCopyComputeOverlap.py b/python/2_CoreConcepts/streamingCopyComputeOverlap/streamingCopyComputeOverlap.py new file mode 100644 index 00000000..665cce9c --- /dev/null +++ b/python/2_CoreConcepts/streamingCopyComputeOverlap/streamingCopyComputeOverlap.py @@ -0,0 +1,312 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Streaming Copy + Compute Overlap + +Demonstrates how to overlap memory transfers with kernel computation using +CUDA streams to maximize GPU utilization. + +Uses pure cuda.core APIs: + - Device, Stream for device and stream management + - PinnedMemoryResource, DeviceMemoryResource for memory allocation + - Buffer.copy_to() for async memory copies + - Program, LaunchConfig, launch for kernel compilation and execution +""" + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import numpy as np + from cuda.core import ( + Device, + DeviceMemoryResource, + EventOptions, + LaunchConfig, + PinnedMemoryResource, + Program, + ProgramOptions, + launch, + ) + from cuda_samples_utils import print_gpu_info +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Install with: pip install -r requirements.txt") + sys.exit(1) + + +# CUDA Kernel - compute-intensive vector operation (grid-stride loop) +VECTOR_SCALE_KERNEL = r""" +extern "C" __global__ +void vector_scale(const float* input, float* output, float scale, size_t N) { + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = (size_t)gridDim.x * blockDim.x; + for (size_t i = tid; i < N; i += stride) { + float val = input[i] * scale; + // Add compute work to make kernel non-trivial + for (int j = 0; j < 50; j++) { + val = sqrtf(val * val + 1.0f); + } + output[i] = val; + } +} +""" + + +def buffer_to_numpy(buffer, n_elements): + """Create numpy view of cuda.core Buffer via DLPack.""" + return np.from_dlpack(buffer).view(np.float32).reshape(n_elements) + + +def main(): + print("=" * 60) + print("Streaming Copy + Compute Overlap") + print("Using pure cuda.core APIs") + print("=" * 60) + + # Initialize device + device = Device(0) + device.set_current() + print() + print_gpu_info(device) + + # Compile kernel + arch = f"sm_{device.arch}" + program = Program( + VECTOR_SCALE_KERNEL, code_type="c++", options=ProgramOptions(arch=arch) + ) + kernel = program.compile(target_type="cubin").get_kernel("vector_scale") + print("Kernel compiled ✓") + + # Parameters + N = 16_000_000 # 16M elements + n_bytes = N * 4 + scale = 2.5 + n_runs = 10 + + print(f"\nProblem size: {N:,} elements ({n_bytes / 1024 / 1024:.0f} MB)") + + # Create memory resources + pinned_mr = PinnedMemoryResource() + device_mr = DeviceMemoryResource(device.device_id) + default_stream = device.create_stream() + + # ========================================================================= + # Sequential Execution + # ========================================================================= + print("\n--- Sequential (no overlap) ---") + print("Timeline: [H2D][Compute][D2H]") + + h_in = h_out = d_in = d_out = None + try: + # Pre-allocate buffers + h_in = pinned_mr.allocate(n_bytes, default_stream) + h_out = pinned_mr.allocate(n_bytes, default_stream) + d_in = device_mr.allocate(n_bytes, default_stream) + d_out = device_mr.allocate(n_bytes, default_stream) + # Sync before numpy access (numpy operations aren't stream ordered) + default_stream.sync() + + # Initialize input + np_in = buffer_to_numpy(h_in, N) + np_in[:] = np.random.rand(N).astype(np.float32) * 100 + + config = LaunchConfig(grid=((N + 255) // 256,), block=(256,)) + event_opts = EventOptions(enable_timing=True) + + # Warm up + h_in.copy_to(d_in, stream=default_stream) + launch( + default_stream, + config, + kernel, + d_in, + d_out, + np.float32(scale), + np.uint64(N), + ) + d_out.copy_to(h_out, stream=default_stream) + default_stream.sync() + + # Benchmark with CUDA events + times = [] + for _ in range(n_runs): + start_ev = device.create_event(options=event_opts) + end_ev = device.create_event(options=event_opts) + default_stream.record(start_ev) + h_in.copy_to(d_in, stream=default_stream) # Async H2D + launch( + default_stream, + config, + kernel, + d_in, + d_out, + np.float32(scale), + np.uint64(N), + ) + d_out.copy_to(h_out, stream=default_stream) # Async D2H + default_stream.record(end_ev) + default_stream.sync() + times.append(end_ev - start_ev) + + seq_time = np.mean(times) + print(f"Time: {seq_time:.2f} ms (±{np.std(times):.2f})") + + # Verification: compute expected on CPU and compare + default_stream.sync() + np_out = buffer_to_numpy(h_out, N) + expected = np_in.astype(np.float32) * scale + for _ in range(50): + expected = np.sqrt(expected * expected + 1.0).astype(np.float32) + if np.allclose(np_out, expected, rtol=1e-4, atol=1e-4): + print("Verification: PASSED") + else: + print("Verification: FAILED") + finally: + for buf in (h_in, h_out, d_in, d_out): + if buf is not None: + buf.close() + default_stream.close() + + # ========================================================================= + # Streamed Execution + # ========================================================================= + print("\n--- Streamed (with overlap) ---") + print("Stream 0: [H2D][Compute][D2H]") + print("Stream 1: [H2D][Compute][D2H]") + print("Stream 2: [H2D][Compute][D2H]") + print("...") + + for n_streams in [2, 4, 8]: + chunk_size = N // n_streams + chunk_bytes = chunk_size * 4 + + # Create streams + streams = [device.create_stream() for _ in range(n_streams)] + + # Pre-allocate per-stream buffers + h_ins, h_outs, d_ins, d_outs = [], [], [], [] + try: + for i in range(n_streams): + h_ins.append(pinned_mr.allocate(chunk_bytes, streams[i])) + h_outs.append(pinned_mr.allocate(chunk_bytes, streams[i])) + d_ins.append(device_mr.allocate(chunk_bytes, streams[i])) + d_outs.append(device_mr.allocate(chunk_bytes, streams[i])) + + # Initialize input data + for i in range(n_streams): + streams[i].sync() + np_view = buffer_to_numpy(h_ins[i], chunk_size) + np_view[:] = np.random.rand(chunk_size).astype(np.float32) * 100 + + chunk_config = LaunchConfig(grid=((chunk_size + 255) // 256,), block=(256,)) + + # Warm up + for i in range(n_streams): + h_ins[i].copy_to(d_ins[i], stream=streams[i]) + launch( + streams[i], + chunk_config, + kernel, + d_ins[i], + d_outs[i], + np.float32(scale), + np.uint64(chunk_size), + ) + d_outs[i].copy_to(h_outs[i], stream=streams[i]) + for stream in streams: + stream.sync() + + # Benchmark with CUDA events (use stream 0 for timing) + times = [] + event_opts = EventOptions(enable_timing=True) + for _ in range(n_runs): + start_ev = device.create_event(options=event_opts) + end_ev = device.create_event(options=event_opts) + streams[0].record(start_ev) + + # Issue all operations - they overlap across streams + for i in range(n_streams): + h_ins[i].copy_to(d_ins[i], stream=streams[i]) # Async H2D + launch( + streams[i], + chunk_config, + kernel, + d_ins[i], + d_outs[i], + np.float32(scale), + np.uint64(chunk_size), + ) + d_outs[i].copy_to(h_outs[i], stream=streams[i]) # Async D2H + + # Wait for all streams, record end on stream 0 + for stream in streams: + stream.sync() + streams[0].record(end_ev) + streams[0].sync() + times.append(end_ev - start_ev) + + avg = np.mean(times) + speedup = seq_time / avg + print( + f"{n_streams} streams: {avg:.2f} ms (±{np.std(times):.2f}) " + f"- speedup: {speedup:.2f}x" + ) + + # Verification (streamed): concatenate chunks and compare to expected + for s in streams: + s.sync() + out_chunks = [ + buffer_to_numpy(h_outs[i], chunk_size) for i in range(n_streams) + ] + in_chunks = [ + buffer_to_numpy(h_ins[i], chunk_size) for i in range(n_streams) + ] + np_out = np.concatenate(out_chunks) + np_in = np.concatenate(in_chunks) + expected = np_in.astype(np.float32) * scale + for _ in range(50): + expected = np.sqrt(expected * expected + 1.0).astype(np.float32) + if not np.allclose(np_out, expected, rtol=1e-4, atol=1e-4): + print(f" Verification: FAILED for {n_streams} streams") + finally: + for buf in h_ins + h_outs + d_ins + d_outs: + buf.close() + for s in streams: + s.close() + + print("\n" + "=" * 60) + print("Key: Pinned memory + multiple streams = overlap transfers with compute") + print("\nNote: Speedup depends on hardware characteristics. This technique") + print("benefits most when transfer time is significant relative to compute.") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/python/2_CoreConcepts/tmaTensorMap/README.md b/python/2_CoreConcepts/tmaTensorMap/README.md new file mode 100644 index 00000000..1ae7d781 --- /dev/null +++ b/python/2_CoreConcepts/tmaTensorMap/README.md @@ -0,0 +1,138 @@ +# tmaTensorMap (Python) + +## Description + +This sample demonstrates how to use Tensor Memory Accelerator (TMA) +descriptors with `cuda.core` on Hopper and later GPUs (compute +capability >= 9.0). TMA enables efficient bulk data movement between +global and shared memory using hardware-managed tensor map +descriptors, which are a key building block for modern GEMM kernels +and large shared-memory tile loads. + +The sample: + +1. Creates a TMA tiled descriptor from a CuPy device array via + `StridedMemoryView.from_any_interface(...).as_tensor_map(...)`. +2. Passes the descriptor by value (as `__grid_constant__`) to a + kernel that uses libcudacxx TMA/barrier wrappers to bulk-load a + tile into shared memory, then copies it out to verify correctness. +3. Reuses the same descriptor against a new source tensor with + `replace_address()` to avoid rebuilding it. + +## What You'll Learn + +- Creating a TMA descriptor from a strided device tensor via + `StridedMemoryView.as_tensor_map(box_dim=...)` +- Passing a tensor map to a kernel by value using + `__grid_constant__` +- Using libcudacxx (`cuda/barrier`) to coordinate TMA loads with a + block-scoped barrier +- Reusing a descriptor against a new source buffer via + `tensor_map.replace_address(new_tensor)` +- Compiling a kernel to CUBIN for a specific target arch so Hopper + features are available +- Using `cuda.pathfinder` to locate the CUDA toolkit include directory + CCCL headers and libcudacxx + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - compilation, launching, and tensor-map helpers +- `cuda.pathfinder` - locate the CUDA toolkit include directory +- `cupy` - allocate and fill device tensors +- `numpy` - scalar kernel arguments + +## Key APIs + +### From `cuda.core` + +- `StridedMemoryView.from_any_interface(tensor, stream_ptr=-1)` - build a typed view from any DLPack/CUDA-array-interface tensor +- `StridedMemoryView.as_tensor_map(box_dim=(...))` - produce a TMA descriptor for the given tile shape +- `tensor_map.replace_address(new_tensor)` - retarget an existing descriptor at a new tensor +- `Program(code, code_type="c++", options=ProgramOptions(std="c++17", arch="sm_90", include_path=[...]))` - compile a C++ kernel against libcudacxx +- `program.compile("cubin")` - produce a CUBIN so `__grid_constant__` and TMA intrinsics are fully supported +- `launch(stream, config, kernel, tensor_map, ...)` - pass the TMA descriptor as a kernel argument + +### From `cuda.pathfinder` + +- `get_cuda_path_or_home()` - return the detected CUDA toolkit root for locating `include/cccl` + +### From `cuda_samples_utils` + +- `print_gpu_info()` - print device name and compute capability + +## Requirements + +### Hardware + +- NVIDIA Hopper or newer GPU with Compute Capability 9.0 or higher (H100, H200, B200, ...) +- On GPUs older than Hopper the sample exits cleanly without running the kernel +- Minimum GPU memory: 512 MB + +### Software + +- CUDA Toolkit 13.0 or newer with libcudacxx (cccl) headers +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/2_CoreConcepts/tmaTensorMap +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/2_CoreConcepts/tmaTensorMap +python tmaTensorMap.py +``` + +### With custom parameters + +```bash +# Larger tensor (must be a multiple of the 128-element tile) +python tmaTensorMap.py --elements 8192 + +# Use a specific GPU +python tmaTensorMap.py --device 1 +``` + +## Expected Output + +On a Hopper (sm_90) GPU: + +``` +Device: NVIDIA H100 PCIe +Compute Capability: 9.0 + +TMA copy verified: 1024 elements across 8 tiles +replace_address verified: descriptor reused with new source tensor +``` + +**Note:** Device name and compute capability will vary based on your GPU. + +## Files + +- `tmaTensorMap.py` - Python implementation using `cuda.core` TMA APIs +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [TMA in the CUDA C++ Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#tensor-memory-accelerator) +- [`cuda::barrier` reference](https://nvidia.github.io/cccl/libcudacxx/extended_api/synchronization_primitives/barrier.html) diff --git a/python/2_CoreConcepts/tmaTensorMap/requirements.txt b/python/2_CoreConcepts/tmaTensorMap/requirements.txt new file mode 100644 index 00000000..c65cd4fb --- /dev/null +++ b/python/2_CoreConcepts/tmaTensorMap/requirements.txt @@ -0,0 +1,4 @@ +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=1.24.0 diff --git a/python/2_CoreConcepts/tmaTensorMap/tmaTensorMap.py b/python/2_CoreConcepts/tmaTensorMap/tmaTensorMap.py new file mode 100644 index 00000000..82a38713 --- /dev/null +++ b/python/2_CoreConcepts/tmaTensorMap/tmaTensorMap.py @@ -0,0 +1,281 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +TMA Tensor Map with cuda.core + +This sample demonstrates how to use Tensor Memory Accelerator (TMA) +descriptors with cuda.core on Hopper and later GPUs (compute +capability >= 9.0). TMA enables efficient bulk data movement between +global and shared memory using hardware-managed tensor map +descriptors. + +The sample: + + 1. Creates a TMA tiled descriptor from a CuPy device array via + ``StridedMemoryView.from_any_interface(...).as_tensor_map(...)``. + 2. Passes the descriptor by value (as ``__grid_constant__``) to a + kernel that uses libcudacxx TMA/barrier wrappers to bulk-load a + tile into shared memory. + 3. Reuses the same descriptor against a new source tensor with + ``replace_address()`` to avoid rebuilding it. + +On GPUs older than Hopper (sm < 90), the sample prints a diagnostic +and exits cleanly. + +Ported from ``cuda_core/examples/tma_tensor_map.py`` in the +`cuda-python` repository. +""" + +import os +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + LaunchConfig, + Program, + ProgramOptions, + StridedMemoryView, + launch, + ) + from cuda.pathfinder import find_nvidia_header_directory, get_cuda_path_or_home + from cuda_samples_utils import print_gpu_info +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +TILE_SIZE = 128 # elements per tile, must match the kernel constant + +KERNEL_SRC = r""" +#include + +// Minimal definition of the 128-byte opaque tensor-map struct. +struct __align__(64) TensorMap { unsigned long long opaque[16]; }; + +static constexpr int TILE_SIZE = 128; +using TmaBarrier = cuda::barrier; + +extern "C" +__global__ void tma_copy( + const __grid_constant__ TensorMap tensor_map, + float* output, + int N) +{ + __shared__ __align__(128) float smem[TILE_SIZE]; + __shared__ TmaBarrier bar; + + const int tid = threadIdx.x; + const int tile_start = blockIdx.x * TILE_SIZE; + + if (tid == 0) + { + init(&bar, 1); + } + __syncthreads(); + + if (tid == 0) + { + cuda::device::experimental::cp_async_bulk_tensor_1d_global_to_shared( + smem, + reinterpret_cast(&tensor_map), + tile_start, + bar); + bar.wait(cuda::device::barrier_arrive_tx(bar, 1, TILE_SIZE * sizeof(float))); + } + __syncthreads(); + + if (tid < TILE_SIZE) + { + const int idx = tile_start + tid; + if (idx < N) + output[idx] = smem[tid]; + } +} +""" + + +def _get_cccl_include_paths() -> list: + """Locate the CUDA toolkit and libcudacxx (cccl) include directories. + + ``cuda.pathfinder.find_nvidia_header_directory`` searches pip-installed + CUDA packages, conda environments, and the standard system install + locations, so this works without requiring ``CUDA_PATH`` or + ``CUDA_HOME``. ``get_cuda_path_or_home`` is used as a final fallback. + """ + include_path: list = [] + + # libcudacxx (cccl) - preferred, provides used below. + try: + cccl_dir = find_nvidia_header_directory("cccl") + if cccl_dir and os.path.isdir(cccl_dir): + include_path.append(cccl_dir) + except Exception: # noqa: S110 - fallback probes continue below + pass + + # CUDA runtime headers - needed for the CUtensorMap driver type. + try: + cudart_dir = find_nvidia_header_directory("cudart") + if ( + cudart_dir + and os.path.isdir(cudart_dir) + and cudart_dir not in include_path + ): + include_path.append(cudart_dir) + except Exception: # noqa: S110 - fallback probes continue below + pass + + # Fallback: use CUDA_PATH / CUDA_HOME when pathfinder comes up empty. + if not include_path: + cuda_path = get_cuda_path_or_home() + if cuda_path is not None: + cuda_include = os.path.join(cuda_path, "include") + if os.path.isdir(cuda_include): + include_path.append(cuda_include) + cccl_include = os.path.join(cuda_include, "cccl") + if os.path.isdir(cccl_include): + include_path.insert(0, cccl_include) + + if not include_path: + print( + "Could not locate CUDA toolkit headers.\n" + "Tried cuda.pathfinder (pip/conda/system installs) and " + "CUDA_PATH / CUDA_HOME; none succeeded.\n" + "Set CUDA_HOME to your toolkit root (containing include/cccl " + "and include/cuda_runtime.h) and retry.", + file=sys.stderr, + ) + sys.exit(1) + return include_path + + +def main() -> int: + import argparse + + parser = argparse.ArgumentParser( + description="Use a TMA tensor map to bulk-copy data on Hopper+ GPUs" + ) + parser.add_argument( + "--elements", + type=int, + default=1024, + help="Total number of float32 elements (must be a multiple of 128)", + ) + parser.add_argument("--device", type=int, default=0, help="CUDA device id") + args = parser.parse_args() + + if args.elements % TILE_SIZE != 0: + print(f"--elements must be a multiple of TILE_SIZE={TILE_SIZE}") + return 1 + + dev = Device(args.device) + print_gpu_info(dev) + + arch = dev.compute_capability + if arch < (9, 0): + print( + f"\nTMA requires compute capability >= 9.0 (Hopper or later); " + f"this device is {arch.major}.{arch.minor}. Exiting cleanly." + ) + return 0 + + dev.set_current() + include_path = _get_cccl_include_paths() + + # Compile with the CUBIN code type to target the exact device arch. + prog = Program( + KERNEL_SRC, + code_type="c++", + options=ProgramOptions( + std="c++17", + arch=f"sm_{dev.arch}", + include_path=include_path, + ), + ) + mod = prog.compile("cubin") + kernel = mod.get_kernel("tma_copy") + + # (1) Prepare input data and verify the initial TMA copy. + n = args.elements + src = cp.arange(n, dtype=cp.float32) + output = cp.zeros(n, dtype=cp.float32) + dev.sync() # CuPy uses its own stream + + tensor_map = StridedMemoryView.from_any_interface( + src, stream_ptr=-1 + ).as_tensor_map(box_dim=(TILE_SIZE,)) + + n_tiles = n // TILE_SIZE + config = LaunchConfig(grid=n_tiles, block=TILE_SIZE) + launch( + dev.default_stream, + config, + kernel, + tensor_map, + output.data.ptr, + np.int32(n), + ) + dev.sync() + + if not cp.array_equal(output, src): + print("TMA copy produced incorrect results") + return 1 + print(f"TMA copy verified: {n} elements across {n_tiles} tiles") + + # (2) Demonstrate replace_address() without rebuilding the descriptor. + replacement = cp.full(n, fill_value=42.0, dtype=cp.float32) + dev.sync() + + tensor_map.replace_address(replacement) + + output2 = cp.zeros(n, dtype=cp.float32) + launch( + dev.default_stream, + config, + kernel, + tensor_map, + output2.data.ptr, + np.int32(n), + ) + dev.sync() + + if not cp.array_equal(output2, replacement): + print("replace_address produced incorrect results") + return 1 + print("replace_address verified: descriptor reused with new source tensor") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/3_FrameworkInterop/customPyTorchKernel/README.md b/python/3_FrameworkInterop/customPyTorchKernel/README.md new file mode 100644 index 00000000..01d11e00 --- /dev/null +++ b/python/3_FrameworkInterop/customPyTorchKernel/README.md @@ -0,0 +1,54 @@ +# Sample: PyTorch Custom GPU Operator + +## Description + +This sample demonstrates how to add a custom GPU operation to PyTorch using the `cuda.core` API. It implements a simple square operation (y = x²) to show the complete workflow from CUDA kernel to PyTorch integration with autograd support. + +## Requirements + +- NVIDIA GPU with Compute Capability 7.0+ +- CUDA Toolkit 13.0+ +- Python 3.10+ +- PyTorch 2.0+ +- cuda-python >= 13.0.0 +- cuda-core >= 0.6.0 + +## Installation + +```bash +cd python/3_FrameworkInterop/customPyTorchKernel +pip install -r requirements.txt +``` + +## How to Run + +```bash +# Basic usage +python customPyTorchKernel.py + +# Test with more elements +python customPyTorchKernel.py --size 1000000 + +# Use specific GPU +CUDA_VISIBLE_DEVICES=1 python customPyTorchKernel.py +``` + +## Expected Output + +The sample runs three tests: +1. Forward pass correctness (y = x²) +2. Backward pass correctness (gradient computation) +3. Multi-dimensional tensor support + +All tests should pass, confirming the custom operator works correctly with PyTorch's autograd system. + +## Key Concepts + +The sample demonstrates: +- Writing CUDA kernels with grid-stride loops +- Runtime kernel compilation with cuda.core +- PyTorch autograd integration via `torch.autograd.Function` +- Stream management using PyTorch's current stream +- Kernel caching for performance + +The code is self-documenting with inline comments explaining each step. diff --git a/python/3_FrameworkInterop/customPyTorchKernel/customPyTorchKernel.py b/python/3_FrameworkInterop/customPyTorchKernel/customPyTorchKernel.py new file mode 100644 index 00000000..1c53ead1 --- /dev/null +++ b/python/3_FrameworkInterop/customPyTorchKernel/customPyTorchKernel.py @@ -0,0 +1,390 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +PyTorch Custom GPU Operator using cuda.core + +Question: How do I add a custom GPU op to PyTorch? +Answer: This sample shows the complete workflow. + +This sample implements a custom square operation (y = x²) to demonstrate: +- Writing a CUDA kernel +- Compiling with cuda.core +- Integrating with PyTorch's autograd system +- Proper device and stream management +""" + +import sys + +try: + import torch + from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install: pip install torch cuda-python cuda-core") + sys.exit(1) + + +# ============================================================================ +# Step 1: Define CUDA Kernel +# ============================================================================ +# Simple element-wise square: y = x² +# This kernel is easy to understand and verify + +SQUARE_KERNEL = """ +extern "C" __global__ +void square_kernel(const float* x, float* y, int n) +{ + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; + for (int i = tid; i < n; i += gridDim.x * blockDim.x) { + y[i] = x[i] * x[i]; + } +} +""" + + +# ============================================================================ +# PyTorch Stream Wrapper +# ============================================================================ +# cuda.core requires objects with __cuda_stream__ protocol +class PyTorchStreamWrapper: + def __init__(self, pt_stream): + self.pt_stream = pt_stream + + def __cuda_stream__(self): + stream_id = self.pt_stream.cuda_stream + return (0, stream_id) # Return format required by CUDA Python + + +# ============================================================================ +# Step 2: Kernel Compilation and Caching +# ============================================================================ +# Compile kernel once per device and cache it to avoid recompilation overhead +# In real training loops, this avoids paying compilation cost on every forward. + + +_kernel_cache = {} + + +def get_square_kernel(device): + """ + Get or compile the square kernel for a given device. + + Parameters + ---------- + device : Device + CUDA device object + + Returns + ------- + Kernel + Compiled CUDA kernel + """ + # Cache key based on device to avoid recompiling for the same GPU + key = device.pci_bus_id + + if key not in _kernel_cache: + # Compile the kernel with appropriate architecture + opts = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(SQUARE_KERNEL, code_type="c++", options=opts) + mod = prog.compile("cubin") + _kernel_cache[key] = mod.get_kernel("square_kernel") + + return _kernel_cache[key] + + +# ============================================================================ +# Step 3: PyTorch Autograd Function +# ============================================================================ +# This integrates the CUDA kernel with PyTorch's automatic differentiation + + +class SquareOp(torch.autograd.Function): + """ + Custom square operation using cuda.core. + + Forward: y = x² (computed with custom CUDA kernel) + Backward: grad_x = 2 * x * grad_y (computed with PyTorch) + """ + + @staticmethod + def forward(ctx, x): + """ + Forward pass: compute y = x² using custom CUDA kernel. + + Parameters + ---------- + ctx : Context + PyTorch context for saving tensors + x : torch.Tensor + Input tensor (must be CUDA, float32, contiguous) + + Returns + ------- + torch.Tensor + Output tensor with y = x² + """ + # Validate input requirements + if not x.is_cuda: + raise RuntimeError("SquareOp only supports CUDA tensors") + if x.dtype != torch.float32: + raise RuntimeError("SquareOp only supports float32 tensors") + + # Ensure contiguous memory layout for efficient kernel access + x = x.contiguous() + + device = Device() + # Use PyTorch's current stream to ensure proper ordering with other PyTorch ops + # Create a cuda.core Stream from PyTorch's stream wrapper + torch_stream = torch.cuda.current_stream(device=x.device) + stream = device.create_stream(PyTorchStreamWrapper(torch_stream)) + + # Create a try/finally block to ensure the stream is properly closed + try: + # Get compiled kernel (cached) + kernel = get_square_kernel(device) + + # Allocate output tensor + y = torch.empty_like(x) + + # Configure kernel launch + n = int(x.numel()) + threads_per_block = 256 + blocks_per_grid = (n + threads_per_block - 1) // threads_per_block + config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) + + # Launch the kernel + launch(stream, config, kernel, x.data_ptr(), y.data_ptr(), n) + finally: + # Ensure stream is properly closed + stream.close() + + # Save input for backward pass + ctx.save_for_backward(x) + + return y + + @staticmethod + def backward(ctx, grad_output): + """ + Backward pass: compute gradient. + + For y = x², the derivative is dy/dx = 2x + Therefore: grad_x = grad_output * 2x + + Parameters + ---------- + ctx : Context + PyTorch context with saved tensors + grad_output : torch.Tensor + Gradient from upstream + + Returns + ------- + torch.Tensor + Gradient with respect to input + """ + # Retrieve saved input + (x,) = ctx.saved_tensors + + # Note: We assume grad_output has the same dtype and device as x. + # This is guaranteed by PyTorch's autograd system. + + # Compute gradient: d(x²)/dx = 2x + grad_x = 2.0 * x * grad_output + + return grad_x + + +# ============================================================================ +# Step 4: Public API +# ============================================================================ + + +def square(x): + """ + Apply element-wise square operation using custom CUDA kernel. + + Parameters + ---------- + x : torch.Tensor + Input tensor (must be on CUDA device, dtype=float32) + + Returns + ------- + torch.Tensor + Output tensor with y = x² + + Examples + -------- + >>> x = torch.randn(100, device='cuda') + >>> y = square(x) + >>> assert torch.allclose(y, x ** 2) + """ + return SquareOp.apply(x) + + +# ============================================================================ +# Step 5: Testing and Verification +# ============================================================================ + + +def main(): + """Test the custom square operation.""" + import argparse + + parser = argparse.ArgumentParser( + description="Custom PyTorch Square Operator using cuda.core" + ) + parser.add_argument( + "--size", type=int, default=10000, help="Number of elements (default: 10000)" + ) + + args = parser.parse_args() + + # Device info + device = Device() + device.set_current() + major, minor = device.compute_capability + + print("\nDevice Information:") + print(f" Name: {device.name}") + print(f" Compute Capability: sm_{major}{minor}") + + print("\n" + "=" * 70) + print("Custom PyTorch Square Operator Test") + print("=" * 70) + + # ======================================================================== + # Test 1: Forward Pass Correctness + # ======================================================================== + print("\n" + "-" * 70) + print("Test 1: Forward Pass") + print("-" * 70) + + x = torch.randn(args.size, dtype=torch.float32, device="cuda") + + # Custom square operation + y_custom = square(x) + + # PyTorch reference + y_reference = x**2 + + # Check correctness + max_error = torch.max(torch.abs(y_custom - y_reference)).item() + + print(f"Max absolute error: {max_error:.2e}") + + if torch.allclose(y_custom, y_reference, rtol=1e-5, atol=1e-6): + print("[PASS] Forward pass PASSED") + else: + print("[FAIL] Forward pass FAILED") + return 1 + + # ======================================================================== + # Test 2: Backward Pass (Gradient) Correctness + # ======================================================================== + print("\n" + "-" * 70) + print("Test 2: Backward Pass") + print("-" * 70) + + # Test with requires_grad + x_custom = torch.randn( + args.size, dtype=torch.float32, device="cuda", requires_grad=True + ) + x_reference = x_custom.clone().detach().requires_grad_(True) + + # Forward pass + y_custom = square(x_custom) + y_reference = x_reference**2 + + # Create upstream gradient + grad_output = torch.randn_like(y_custom) + + # Backward pass + y_custom.backward(grad_output) + y_reference.backward(grad_output) + + # Check gradients + max_grad_error = torch.max(torch.abs(x_custom.grad - x_reference.grad)).item() + + print(f"Max gradient error: {max_grad_error:.2e}") + + if torch.allclose(x_custom.grad, x_reference.grad, rtol=1e-5, atol=1e-6): + print("[PASS] Backward pass PASSED") + else: + print("[FAIL] Backward pass FAILED") + return 1 + + # ======================================================================== + # Test 3: Multi-dimensional Tensors + # ======================================================================== + print("\n" + "-" * 70) + print("Test 3: Multi-dimensional Tensors") + print("-" * 70) + + # Test with 2D tensor + x_2d = torch.randn(100, 100, dtype=torch.float32, device="cuda") + y_2d_custom = square(x_2d) + y_2d_reference = x_2d**2 + + if torch.allclose(y_2d_custom, y_2d_reference, rtol=1e-5, atol=1e-6): + print("[PASS] 2D tensor test PASSED") + else: + print("[FAIL] 2D tensor test FAILED") + return 1 + + # Test with 3D tensor + x_3d = torch.randn(10, 20, 30, dtype=torch.float32, device="cuda") + y_3d_custom = square(x_3d) + y_3d_reference = x_3d**2 + + if torch.allclose(y_3d_custom, y_3d_reference, rtol=1e-5, atol=1e-6): + print("[PASS] 3D tensor test PASSED") + else: + print("[FAIL] 3D tensor test FAILED") + return 1 + + # ======================================================================== + # Summary + # ======================================================================== + print("\n" + "=" * 70) + print("All tests PASSED!") + print("=" * 70) + print("\nYour custom GPU operator is working correctly!") + print("You can now use it in your PyTorch models like any built-in op.") + print("\nExample usage:") + print(" x = torch.randn(100, device='cuda')") + print(" y = square(x) # Uses your custom CUDA kernel") + print(" loss = y.sum()") + print(" loss.backward() # Gradients computed automatically") + print("=" * 70 + "\n") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/3_FrameworkInterop/customPyTorchKernel/requirements.txt b/python/3_FrameworkInterop/customPyTorchKernel/requirements.txt new file mode 100644 index 00000000..6c4cc859 --- /dev/null +++ b/python/3_FrameworkInterop/customPyTorchKernel/requirements.txt @@ -0,0 +1,5 @@ +# Custom PyTorch Kernel Sample Requirements + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +torch>=2.0.0 diff --git a/python/3_FrameworkInterop/customTensorFlowKernel/README.md b/python/3_FrameworkInterop/customTensorFlowKernel/README.md new file mode 100644 index 00000000..86c934a2 --- /dev/null +++ b/python/3_FrameworkInterop/customTensorFlowKernel/README.md @@ -0,0 +1,81 @@ +# Sample: TensorFlow Custom GPU Operator + +## Description + +Learn how to add a custom GPU operation to TensorFlow using `cuda.core` with `tf.py_function`. This sample implements a custom **ReLU operation** (y = max(0, x)) for rapid prototyping of GPU operations. + +## Key Question Answered + +**Q: How do I add a custom GPU op to TensorFlow?** + +**A:** Use `tf.py_function` to wrap cuda.core kernels: +1. Write CUDA kernels (forward + backward) with grid-stride loops +2. Compile them with cuda.core +3. Wrap in Python functions +4. Use `tf.py_function` to call from TensorFlow +5. Register gradients with `@tf.custom_gradient` + +## Requirements + +- NVIDIA GPU with Compute Capability 7.0+ +- CUDA Toolkit 13.0+ +- Python 3.10+ +- TensorFlow 2.10+ +- cuda-python >= 13.0.0 +- cuda-core >= 0.6.0 (required for LEGACY_DEFAULT_STREAM) +- numpy >= 2.3.2 +- CuPy (for device pointer access) + +## Installation + +```bash +cd python/3_FrameworkInterop/customTensorFlowKernel +pip install -r requirements.txt +``` + +## How to Run + +```bash +python customTensorFlowKernel.py +python customTensorFlowKernel.py --size 1000000 +``` + +## Usage Example + +```python +import tensorflow as tf +from customTensorFlowKernel import custom_relu + +# Simple usage +x = tf.random.normal([100], dtype=tf.float32) +y = custom_relu(x) + +# In a Keras model +model = tf.keras.Sequential([ + tf.keras.layers.Dense(128), + tf.keras.layers.Lambda(custom_relu), + tf.keras.layers.Dense(10) +]) +``` + +## Key Concepts + +- **tf.py_function**: Bridges TensorFlow and Python code using cuda.core (has overhead, not XLA-compatible) +- **@tf.custom_gradient**: Registers custom backward pass +- **cuda.core**: Primary GPU manager (device, stream, kernel compilation) +- **CuPy**: Internal helper for device pointer access only + +## Production Alternatives + +This sample is for rapid prototyping. For production: +- **TensorFlow C++ Custom Op**: Full performance, XLA compatible +- **XLA Custom Calls**: For XLA-compiled models +- See TensorFlow documentation for details + +## See Also + +- [cuda.core Documentation](https://nvidia.github.io/cuda-python/cuda-core/latest/) +- [TensorFlow tf.py_function](https://www.tensorflow.org/api_docs/python/tf/py_function) +- [TensorFlow @custom_gradient](https://www.tensorflow.org/api_docs/python/tf/custom_gradient) +- [TensorFlow C++ Custom Op Guide](https://www.tensorflow.org/guide/create_op) +- [CuPy Documentation](https://docs.cupy.dev/) diff --git a/python/3_FrameworkInterop/customTensorFlowKernel/customTensorFlowKernel.py b/python/3_FrameworkInterop/customTensorFlowKernel/customTensorFlowKernel.py new file mode 100644 index 00000000..837d873d --- /dev/null +++ b/python/3_FrameworkInterop/customTensorFlowKernel/customTensorFlowKernel.py @@ -0,0 +1,430 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +TensorFlow Custom GPU Operator using cuda.core + +Question: How do I add a custom GPU op to TensorFlow? +Answer: This sample shows rapid prototyping with cuda.core + tf.py_function. + +This sample implements a custom ReLU operation (y = max(0, x)) to demonstrate: +- Writing CUDA kernels (forward + backward) with grid-stride loops +- Compiling with cuda.core +- Integrating with TensorFlow via tf.py_function +- Proper gradient registration + +Dependencies: +- tensorflow: Deep learning framework +- cuda-core: GPU kernel compilation and launch + (requires >=0.6.0 for LEGACY_DEFAULT_STREAM) +- cuda-python: CUDA driver API bindings +- cupy: Array operations and device pointer access + +Note: This approach uses tf.py_function for rapid prototyping. For production +TensorFlow applications, use TensorFlow's C++ Custom Op API. +""" + +import sys + +try: + # CuPy is required for array operations and device pointer access + import cupy as cp + import tensorflow as tf + from cuda.core import ( + LEGACY_DEFAULT_STREAM, + Device, + LaunchConfig, + Program, + ProgramOptions, + launch, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install: pip install tensorflow cupy cuda-python cuda-core") + sys.exit(1) + + +# ============================================================================ +# Step 1: Define CUDA Kernels +# ============================================================================ +# Simple element-wise ReLU: y = max(0, x) + +RELU_KERNEL = """ +extern "C" __global__ +void relu_forward_kernel(const float* x, float* y, int n) +{ + // Grid-stride loop: each thread processes multiple elements + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int stride = gridDim.x * blockDim.x; + for (int i = idx; i < n; i += stride) { + y[i] = x[i] > 0.0f ? x[i] : 0.0f; + } +} + +extern "C" __global__ +void relu_backward_kernel(const float* x, const float* grad_y, float* grad_x, int n) +{ + // Grid-stride loop: each thread processes multiple elements + int idx = blockIdx.x * blockDim.x + threadIdx.x; + int stride = gridDim.x * blockDim.x; + for (int i = idx; i < n; i += stride) { + grad_x[i] = x[i] > 0.0f ? grad_y[i] : 0.0f; + } +} +""" + + +# ============================================================================ +# Step 2: Kernel Compilation and Caching +# ============================================================================ +# Compile kernel once per device and cache it to avoid recompilation overhead +# In real training loops, this avoids paying compilation cost on every forward. + +_kernel_cache = {} + + +def _get_relu_kernels(device): + """ + Get or compile the ReLU kernels for a given device. + + Parameters + ---------- + device : Device + CUDA device object + + Returns + ------- + tuple + (forward_kernel, backward_kernel) compiled CUDA kernels + """ + # Cache key based on device to avoid recompiling for the same GPU + key = device.pci_bus_id + + if key not in _kernel_cache: + # Compile the kernel with appropriate architecture + opts = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(RELU_KERNEL, code_type="c++", options=opts) + mod = prog.compile("cubin") + forward_kernel = mod.get_kernel("relu_forward_kernel") + backward_kernel = mod.get_kernel("relu_backward_kernel") + _kernel_cache[key] = (forward_kernel, backward_kernel) + + return _kernel_cache[key] + + +def _launch_relu_forward(x_np): + """ + Internal function: Launch forward CUDA kernel. + + Takes numpy array, returns numpy array. + Uses CuPy for array operations and device pointer access, cuda.core for + device/stream management. + + Note: LEGACY_DEFAULT_STREAM doesn't require explicit cleanup, but kernel + launch failures should be handled by the caller. CuPy arrays are + automatically cleaned up when they go out of scope. + """ + device = Device() + + # Ensure this device is current (TensorFlow usually does this already) + device.set_current() + + # Get compiled kernel (cached) + forward_kernel, _ = _get_relu_kernels(device) + + # Convert numpy to CuPy (CPU-to-GPU copy) + # CuPy is used for array operations and getting device pointers + x_cp = cp.asarray(x_np) + y_cp = cp.empty_like(x_cp) + + # Configure kernel launch + n = int(x_cp.size) + threads_per_block = 256 + blocks_per_grid = (n + threads_per_block - 1) // threads_per_block + config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) + + # Launch on the legacy default stream (stream 0) for TensorFlow interop + launch( + LEGACY_DEFAULT_STREAM, config, forward_kernel, x_cp.data.ptr, y_cp.data.ptr, n + ) + + # Return as numpy array (GPU-to-CPU copy via cp.asnumpy) + return cp.asnumpy(y_cp) + + +def _launch_relu_backward(x_np, grad_y_np): + """ + Internal function: Launch backward CUDA kernel. + + Takes numpy arrays, returns numpy array. + Uses CuPy for array operations and device pointer access, cuda.core for + device/stream management. + + Note: LEGACY_DEFAULT_STREAM doesn't require explicit cleanup, but kernel + launch failures should be handled by the caller. CuPy arrays are + automatically cleaned up when they go out of scope. + """ + device = Device() + + # Ensure this device is current (TensorFlow usually does this already) + device.set_current() + + # Get compiled kernel (cached) + _, backward_kernel = _get_relu_kernels(device) + + # Convert numpy to CuPy (CPU-to-GPU copy) + # CuPy is used for array operations and getting device pointers + x_cp = cp.asarray(x_np) + grad_y_cp = cp.asarray(grad_y_np) + grad_x_cp = cp.empty_like(x_cp) + + # Configure kernel launch + n = int(x_cp.size) + threads_per_block = 256 + blocks_per_grid = (n + threads_per_block - 1) // threads_per_block + config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) + + # Launch on the legacy default stream (stream 0) for TensorFlow interop + launch( + LEGACY_DEFAULT_STREAM, + config, + backward_kernel, + x_cp.data.ptr, + grad_y_cp.data.ptr, + grad_x_cp.data.ptr, + n, + ) + + # Return as numpy array (GPU-to-CPU copy via cp.asnumpy) + return cp.asnumpy(grad_x_cp) + + +# ============================================================================ +# Step 3: TensorFlow Integration via tf.py_function +# ============================================================================ + + +@tf.custom_gradient +def custom_relu(x): + """ + Custom ReLU operation using cuda.core. + + This function provides a TensorFlow-native interface to custom CUDA kernels + compiled with cuda.core. The implementation uses tf.py_function internally + to bridge TensorFlow and cuda.core. + + Parameters + ---------- + x : tf.Tensor + Input tensor (must be float32 on GPU) + + Returns + ------- + tf.Tensor + Output tensor with ReLU applied + + Examples + -------- + >>> x = tf.random.normal([100], dtype=tf.float32) + >>> y = custom_relu(x) + >>> # Use in models + >>> model = tf.keras.Sequential([ + ... tf.keras.layers.Dense(128), + ... tf.keras.layers.Lambda(custom_relu), # Custom ReLU + ... tf.keras.layers.Dense(10) + ... ]) + """ + # Validate input + if x.dtype != tf.float32: + raise ValueError("custom_relu only supports float32 tensors") + + # Forward pass using tf.py_function + # py_function allows us to call arbitrary Python code (including cuda.core) + y = tf.py_function(func=_launch_relu_forward, inp=[x], Tout=tf.float32) + + # Restore shape information (py_function loses shape) + y.set_shape(x.shape) + + # Define gradient function + def grad_fn(grad_y): + """Backward pass using custom CUDA kernel""" + grad_x = tf.py_function( + func=_launch_relu_backward, inp=[x, grad_y], Tout=tf.float32 + ) + grad_x.set_shape(x.shape) + return grad_x + + return y, grad_fn + + +# ============================================================================ +# Step 4: Testing and Verification +# ============================================================================ + + +def main(): + """Test the custom ReLU operation.""" + import argparse + + parser = argparse.ArgumentParser( + description="Custom TensorFlow ReLU Operator using cuda.core" + ) + parser.add_argument( + "--size", type=int, default=10000, help="Number of elements (default: 10000)" + ) + + args = parser.parse_args() + + # Device info + device = Device() + device.set_current() + major, minor = device.compute_capability + + print("\nDevice Information:") + print(f" Name: {device.name}") + print(f" Compute Capability: sm_{major}.{minor}") + + print("\n" + "=" * 70) + print("Custom TensorFlow ReLU Operator Test") + print("=" * 70) + + # ======================================================================== + # Test 1: Forward Pass Correctness + # ======================================================================== + print("\n" + "-" * 70) + print("Test 1: Forward Pass") + print("-" * 70) + + # Run on the first visible GPU (respects CUDA_VISIBLE_DEVICES), + # aligning with cuda.core Device(). + with tf.device("/GPU:0"): + x = tf.random.normal([args.size], dtype=tf.float32) + + # Custom ReLU operation + y_custom = custom_relu(x) + + # TensorFlow reference + y_reference = tf.nn.relu(x) + + # Check correctness + max_error = tf.reduce_max(tf.abs(y_custom - y_reference)).numpy() + + print(f"Max absolute error: {max_error:.2e}") + + if tf.reduce_all(tf.abs(y_custom - y_reference) < 1e-5): + print("[PASS] Forward pass PASSED") + else: + print("[FAIL] Forward pass FAILED") + return 1 + + # ======================================================================== + # Test 2: Backward Pass (Gradient) Correctness + # ======================================================================== + print("\n" + "-" * 70) + print("Test 2: Backward Pass") + print("-" * 70) + + with tf.device("/GPU:0"): + x_custom = tf.random.normal([args.size], dtype=tf.float32) + x_reference = tf.identity(x_custom) + + # Compute gradients with GradientTape + with tf.GradientTape() as tape_custom: + tape_custom.watch(x_custom) + y_custom = custom_relu(x_custom) + grad_custom = tape_custom.gradient(y_custom, x_custom) + + with tf.GradientTape() as tape_reference: + tape_reference.watch(x_reference) + y_reference = tf.nn.relu(x_reference) + grad_reference = tape_reference.gradient(y_reference, x_reference) + + # Check gradients + max_grad_error = tf.reduce_max(tf.abs(grad_custom - grad_reference)).numpy() + + print(f"Max gradient error: {max_grad_error:.2e}") + + if tf.reduce_all(tf.abs(grad_custom - grad_reference) < 1e-5): + print("[PASS] Backward pass PASSED") + else: + print("[FAIL] Backward pass FAILED") + return 1 + + # ======================================================================== + # Test 3: Multi-dimensional Tensors + # ======================================================================== + print("\n" + "-" * 70) + print("Test 3: Multi-dimensional Tensors") + print("-" * 70) + + with tf.device("/GPU:0"): + # Test with 2D tensor + x_2d = tf.random.normal([100, 100], dtype=tf.float32) + y_2d_custom = custom_relu(x_2d) + y_2d_reference = tf.nn.relu(x_2d) + + if tf.reduce_all(tf.abs(y_2d_custom - y_2d_reference) < 1e-5): + print("[PASS] 2D tensor test PASSED") + else: + print("[FAIL] 2D tensor test FAILED") + return 1 + + # Test with 3D tensor + x_3d = tf.random.normal([10, 20, 30], dtype=tf.float32) + y_3d_custom = custom_relu(x_3d) + y_3d_reference = tf.nn.relu(x_3d) + + if tf.reduce_all(tf.abs(y_3d_custom - y_3d_reference) < 1e-5): + print("[PASS] 3D tensor test PASSED") + else: + print("[FAIL] 3D tensor test FAILED") + return 1 + + # ======================================================================== + # Summary + # ======================================================================== + print("\n" + "=" * 70) + print("All tests PASSED!") + print("=" * 70) + print("\nYour custom GPU operator is working correctly!") + print("You can now use it in your TensorFlow models.") + print("\nExample usage:") + print(" x = tf.random.normal([100], dtype=tf.float32)") + print(" y = custom_relu(x) # Uses your custom CUDA kernel") + print(" ") + print(" # In a model:") + print(" model = tf.keras.Sequential([") + print(" tf.keras.layers.Dense(128),") + print(" tf.keras.layers.Lambda(custom_relu),") + print(" tf.keras.layers.Dense(10)") + print(" ])") + print("=" * 70 + "\n") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/3_FrameworkInterop/customTensorFlowKernel/requirements.txt b/python/3_FrameworkInterop/customTensorFlowKernel/requirements.txt new file mode 100644 index 00000000..e11eca93 --- /dev/null +++ b/python/3_FrameworkInterop/customTensorFlowKernel/requirements.txt @@ -0,0 +1,14 @@ +# TensorFlow Custom GPU Operator using cuda.core +# +# This sample demonstrates a cuda.core-first approach: +# - cuda-python and cuda-core: Primary GPU management (device, stream, kernel) +# - TensorFlow 2.10+: Deep learning framework (tf.py_function, tf.custom_gradient) +# - CuPy: Internal helper for device pointer access only +# +# Note: cuda-core>=0.6.0 is required for LEGACY_DEFAULT_STREAM constant + +numpy>=2.3.2 +tensorflow>=2.10.0 +cupy-cuda13x>=13.0.0 +cuda-python>=13.0.0 +cuda-core>=0.6.0 diff --git a/python/4_DistributedComputing/ipcMemoryPool/README.md b/python/4_DistributedComputing/ipcMemoryPool/README.md new file mode 100644 index 00000000..6ba35aeb --- /dev/null +++ b/python/4_DistributedComputing/ipcMemoryPool/README.md @@ -0,0 +1,140 @@ +# ipcMemoryPool (Python) + +## Description + +This sample demonstrates how to share GPU memory between Python +processes using CUDA Inter-Process Communication (IPC) and +`cuda.core`'s IPC-enabled memory pools. + +By default each process has its own CUDA virtual address space and +cannot see allocations made by another process. With an IPC-enabled +`DeviceMemoryResource` the parent allocates once, and the child +process maps that same physical GPU memory into its own address space +so both read and write the same bytes. The sample performs a +round-trip test: + +1. Parent creates an IPC-enabled `DeviceMemoryResource` and allocates + a `Buffer`. +2. Parent fills the buffer with a known pattern. +3. Parent sends the `Buffer` to a child process through an + `multiprocessing.Queue`. cuda.core's pickle reducers re-create the + memory resource and map the buffer in the child. +4. Child verifies the parent's pattern, writes a new pattern, and + signals completion. +5. Parent verifies the child's writes. + +## What You'll Learn + +- Enabling IPC on a `DeviceMemoryResource` with `ipc_enabled=True` +- Sending `Buffer` objects across process boundaries via `mp.Queue` +- How cuda.core's pickle reducers rebuild the MR and map the buffer + in the receiving process +- Why `multiprocessing` must use the `"spawn"` start method with CUDA +- Detecting IPC support at runtime (POSIX file-descriptor handle + type, memory-pool support, Linux-only) + +## Key Libraries + +- [`cuda.core`](https://nvidia.github.io/cuda-python/cuda-core/latest/) - IPC-enabled memory resources and buffer reducers +- `cupy` - zero-copy views over the shared device memory via DLPack +- `multiprocessing` - standard library process management + +## Key APIs + +### From `cuda.core` + +- `DeviceMemoryResource(device, options=DeviceMemoryResourceOptions(ipc_enabled=True))` - create an IPC-enabled memory pool +- `DeviceMemoryResourceOptions(max_size=..., ipc_enabled=True)` - configure the underlying pool +- `mr.allocate(nbytes)` - allocate a `Buffer` from the IPC pool +- `Buffer.is_mapped` - True when the buffer is usable in the current process +- `Device.properties.memory_pools_supported` - runtime feature check +- `Device.properties.handle_type_posix_file_descriptor_supported` - runtime feature check + +### From `cuda_samples_utils` + +- `print_gpu_info()` - print device name and compute capability + +## Requirements + +### Hardware + +- NVIDIA GPU with Compute Capability 7.0 or higher +- Device that supports CUDA memory pools and POSIX file-descriptor IPC handles (the sample detects and reports this at startup) +- Minimum GPU memory: 512 MB + +### Software + +- Linux x86_64 (POSIX file-descriptor IPC handles are not available on Windows or macOS) +- CUDA Toolkit 13.0 or newer (matches `cuda-python` 13.x) +- Python 3.10 or newer +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## Installation + +Install the required packages from `requirements.txt`: + +```bash +cd /path/to/cuda-samples/python/4_DistributedComputing/ipcMemoryPool +pip install -r requirements.txt +``` + +The `requirements.txt` installs: + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) + +## How to Run + +### Basic usage + +```bash +cd cuda-samples/python/4_DistributedComputing/ipcMemoryPool +python ipcMemoryPool.py +``` + +### With custom parameters + +```bash +# Larger shared buffer +python ipcMemoryPool.py --elements 65536 + +# Use a specific GPU +python ipcMemoryPool.py --device 1 +``` + +On platforms or devices that do not support CUDA IPC, the sample +prints a diagnostic and exits cleanly with status 0. + +## Expected Output + +``` +Device: +Compute Capability: + +Created IPC-enabled DeviceMemoryResource (is_ipc_enabled=True) +Parent wrote pattern (first 5 values): [100. 101. 102. 103. 104.] +Parent sent buffer to child pid=; waiting... +[child pid=] received buffer: is_mapped=True, size=4096 +Parent sees child's pattern (first 5 values): [-0. -1. -2. -3. -4.] +IPC round-trip: OK +``` + +**Note:** Device name, compute capability, and child PID will vary +based on your system. + +## Files + +- `ipcMemoryPool.py` - Python implementation using `cuda.core` IPC memory pools +- `README.md` - This file +- `requirements.txt` - Sample dependencies +- `../../Utilities/cuda_samples_utils.py` - Common utilities (imported by this sample) + +## See Also + +- [CUDA Python Documentation](https://nvidia.github.io/cuda-python/) +- [`cuda.core` memory API](https://nvidia.github.io/cuda-python/cuda-core/latest/api.html#memory) +- Upstream `cuda.core` IPC tests: [`test_memory_ipc.py`](https://github.com/NVIDIA/cuda-python/blob/main/cuda_core/tests/memory_ipc/test_memory_ipc.py) +- [CUDA IPC programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#interprocess-communication) diff --git a/python/4_DistributedComputing/ipcMemoryPool/ipcMemoryPool.py b/python/4_DistributedComputing/ipcMemoryPool/ipcMemoryPool.py new file mode 100644 index 00000000..acac0a90 --- /dev/null +++ b/python/4_DistributedComputing/ipcMemoryPool/ipcMemoryPool.py @@ -0,0 +1,220 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +IPC Memory Pool with cuda.core + +Share GPU memory between Python processes using CUDA Inter-Process +Communication (IPC) and cuda.core's IPC-enabled memory pools. By default +each worker process has its own CUDA virtual address space and cannot see +allocations made by another process. With an IPC-enabled +``DeviceMemoryResource`` the parent can allocate once, and the child +process can map that same physical GPU memory into its own address space +so both read and write the same bytes. + +The sample does a round-trip test: + + 1. Parent creates an IPC-enabled ``DeviceMemoryResource`` and allocates + a ``Buffer``. + 2. Parent fills the buffer with a known pattern. + 3. Parent sends the ``Buffer`` to a child process through an + ``mp.Queue`` - cuda.core's pickle reducers take care of re-creating + the memory resource and mapping the buffer in the child. + 4. Child verifies the parent's pattern, writes a new pattern, and + signals completion. + 5. Parent verifies the child's writes. + +IPC requires Linux (POSIX file-descriptor handles) and device support for +memory pools. On unsupported platforms the sample prints a diagnostic and +exits cleanly. +""" + +import multiprocessing as mp +import platform +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) + +try: + import cupy as cp + import numpy as np + from cuda.core import ( + Device, + DeviceMemoryResource, + DeviceMemoryResourceOptions, + ) + from cuda_samples_utils import print_gpu_info # noqa: E402 +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + + +CHILD_TIMEOUT_SEC = 30 + + +def check_ipc_support(device) -> bool: + """Return True if this device/platform supports CUDA IPC memory pools.""" + if platform.system() != "Linux": + print( + f"IPC via POSIX file descriptors is only supported on Linux " + f"(detected {platform.system()})." + ) + return False + if not device.properties.memory_pools_supported: + print("Device does not support CUDA memory pools.") + return False + if not device.properties.handle_type_posix_file_descriptor_supported: + print("Device/platform does not support POSIX-fd IPC handles.") + return False + return True + + +def child_worker(q_in, q_out, n_elements, parent_seed, child_seed): + """Runs in a separate process. Verifies and modifies the shared buffer.""" + device = Device(0) + device.set_current() + pid = mp.current_process().pid + + # The Buffer (and its MR) are reconstructed and mapped in this process + # when the queued object is unpickled. Both ``is_mapped`` flags are + # True here. + buffer = q_in.get(timeout=CHILD_TIMEOUT_SEC) + print( + f"[child pid={pid}] received buffer: is_mapped={buffer.is_mapped}, " + f"size={buffer.size}" + ) + + # Build a zero-copy CuPy view of the shared device memory. + arr = cp.from_dlpack(buffer).view(dtype=cp.float32) + + # Verify the parent's pattern. + expected_parent = cp.arange(n_elements, dtype=cp.float32) + float(parent_seed) + if not cp.allclose(arr, expected_parent): + print("[child] ERROR: parent's pattern did not match expectation") + buffer.close() + q_out.put("fail") + return + + # Write a new pattern for the parent to verify. + arr[:] = cp.arange(n_elements, dtype=cp.float32) * float(child_seed) + device.sync() + + buffer.close() + q_out.put("done") + + +def main() -> int: + import argparse + + parser = argparse.ArgumentParser( + description="Share a GPU buffer between two processes via CUDA IPC" + ) + parser.add_argument( + "--elements", + type=int, + default=1024, + help="Number of float32 elements in the shared buffer (default: 1024)", + ) + parser.add_argument("--device", type=int, default=0, help="CUDA device id") + args = parser.parse_args() + + # CUDA is incompatible with the ``fork`` start method because forked + # children inherit a corrupt CUDA state. Always use ``spawn``. + mp.set_start_method("spawn", force=True) + + device = Device(args.device) + device.set_current() + print_gpu_info(device) + + if not check_ipc_support(device): + print("\nCUDA IPC is not available on this system; exiting cleanly.") + return 0 + + N = args.elements + nbytes = N * np.dtype(np.float32).itemsize + parent_seed = 100 + child_seed = -1.0 + + # Create an IPC-enabled memory pool. Buffers allocated from this MR + # are picklable and can be shared across processes. + mr = DeviceMemoryResource( + device, + options=DeviceMemoryResourceOptions( + max_size=max(nbytes * 4, 1 << 20), + ipc_enabled=True, + ), + ) + print( + "Created IPC-enabled DeviceMemoryResource " + f"(is_ipc_enabled={mr.is_ipc_enabled})" + ) + + buffer = mr.allocate(nbytes) + try: + # Fill the buffer with a known pattern from the parent side. + arr = cp.from_dlpack(buffer).view(dtype=cp.float32) + arr[:] = cp.arange(N, dtype=cp.float32) + float(parent_seed) + device.sync() + print(f"Parent wrote pattern (first 5 values): {arr[:5].get()}") + + # Launch the child process and hand the buffer over. + q_to_child = mp.Queue() + q_from_child = mp.Queue() + child = mp.Process( + target=child_worker, + args=(q_to_child, q_from_child, N, parent_seed, child_seed), + ) + child.start() + q_to_child.put(buffer) + print(f"Parent sent buffer to child pid={child.pid}; waiting...") + + msg = q_from_child.get(timeout=CHILD_TIMEOUT_SEC) + child.join(timeout=CHILD_TIMEOUT_SEC) + + if msg != "done" or child.exitcode != 0: + print(f"Child failed: msg={msg!r}, exitcode={child.exitcode}") + return 1 + + # Verify the child's writes are visible from the parent. + device.sync() + got = arr[:5].get() + expected = (np.arange(N, dtype=np.float32) * child_seed)[:5] + print(f"Parent sees child's pattern (first 5 values): {got}") + if np.allclose(got, expected): + print("IPC round-trip: OK") + return 0 + print(f"IPC round-trip: FAILED (expected {expected})") + return 1 + finally: + buffer.close() + mr.close() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/4_DistributedComputing/ipcMemoryPool/requirements.txt b/python/4_DistributedComputing/ipcMemoryPool/requirements.txt new file mode 100644 index 00000000..c65cd4fb --- /dev/null +++ b/python/4_DistributedComputing/ipcMemoryPool/requirements.txt @@ -0,0 +1,4 @@ +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=1.24.0 diff --git a/python/4_DistributedComputing/multiGPUGradientAverage/README.md b/python/4_DistributedComputing/multiGPUGradientAverage/README.md new file mode 100644 index 00000000..26aa79d2 --- /dev/null +++ b/python/4_DistributedComputing/multiGPUGradientAverage/README.md @@ -0,0 +1,110 @@ +# Sample: multiGPUGradientAverage (Python) + +## Description + +This sample demonstrates gradient averaging across multiple GPUs using MPI and cuda.core. Each GPU computes local gradients, which are synchronized (averaged) across all GPUs using MPI Allreduce with host-staging (GPU → CPU → MPI → CPU → GPU) for maximum compatibility. + +## What you will learn + +- How to initialize MPI for multi-process GPU communication +- How to map MPI ranks to CUDA devices consistently +- How to integrate cuda.core streams with CuPy using `ExternalStream` +- How to compile and launch custom CUDA kernels using cuda.core +- How to use cuda.core Event for GPU timing measurements +- How to use MPI Allreduce with host-staging for universal compatibility + +## Prerequisites + +- Python 3.10+ +- CUDA Toolkit 13.0+ +- Standard MPI implementation (OpenMPI, MPICH, or Intel MPI) +- Multiple NVIDIA GPUs (tested with 2+ GPUs) + +## Installation + +```bash +pip install mpi4py cupy-cuda13x cuda-python cuda-core +``` + +## Running + +**IMPORTANT:** This sample **MUST** be run with `mpirun` with at least 2 processes. + +```bash +# Single node (2 GPUs) +mpirun -np 2 python multiGPUGradientAverage.py --size 10000 + +# Single node (4 GPUs) +mpirun -np 4 python multiGPUGradientAverage.py --size 10000 + +# With specific GPUs +CUDA_VISIBLE_DEVICES=0,2 mpirun -np 2 python multiGPUGradientAverage.py +``` + +## Sample Output + +``` +[Rank 0] World size = 4 + +====================================================================== +Multi-GPU Gradient Average Demo +====================================================================== +Number of MPI ranks (GPUs): 4 +Gradient vector length per GPU: 10000 +Device: NVIDIA GeForce RTX 4090 +Computation: gradients computed on GPU via cuda.core. +Communication: gradients averaged via MPI_Allreduce on host (CPU) buffers. +====================================================================== + +Sample averaged gradient values (rank 0): + avg_grad[0] = 1.500000 + avg_grad[5000] = 6.500000 + avg_grad[9999] = 11.499000 + +Expected values: + expected[0] = 1.500000 + expected[5000] = 6.500000 + expected[9999] = 11.499000 + +Verifying gradient averaging correctness... +[PASS] Gradient averaging is correct. +[PASS] Gradient averaging is correct on all ranks. + +Performance: + Kernel time (GPU only): 0.123 ms + MPI communication time (host-staging, end-to-end): 0.456 ms + Total time: 0.579 ms + +====================================================================== +Demo complete. +====================================================================== +``` + +## Key Technical Details + +The sample uses cuda.core streams and makes CuPy use them via `ExternalStream`: + +```python +stream = device.create_stream() +cp.cuda.ExternalStream(int(stream.handle)).use() +``` + +GPU timing is measured using cuda.core Event: + +```python +from cuda.core import EventOptions +timing_options = EventOptions(enable_timing=True) +start_event = stream.record(options=timing_options) +# ... GPU work ... +end_event = stream.record(options=timing_options) +end_event.sync() +kernel_time = end_event - start_event # Returns milliseconds +``` + +The host-staging pattern transfers data GPU → CPU → MPI → CPU → GPU for universal MPI compatibility without requiring CUDA-aware MPI. + +## Troubleshooting + +**Error: "This sample requires at least 2 MPI processes!"** + +Solution: Run with `mpirun -np 2 python multiGPUGradientAverage.py` diff --git a/python/4_DistributedComputing/multiGPUGradientAverage/multiGPUGradientAverage.py b/python/4_DistributedComputing/multiGPUGradientAverage/multiGPUGradientAverage.py new file mode 100644 index 00000000..02a47672 --- /dev/null +++ b/python/4_DistributedComputing/multiGPUGradientAverage/multiGPUGradientAverage.py @@ -0,0 +1,416 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Multi-GPU Gradient Average using MPI and cuda.core (Host-staging Allreduce) + +Question: How do I synchronize gradients across GPUs? + +Answer: +Each GPU (each MPI rank) computes local gradients on device via CUDA. +Gradients are then averaged across ranks via an MPI Allreduce over host +(CPU) buffers, following the classic data-parallel training pattern. + +This sample shows how to: +- Initialize MPI for multi-process GPU workloads +- Map MPI ranks to GPUs +- Use cuda.core for kernel compilation and execution +- Integrate cuda.core with CuPy using the stream protocol +- Perform gradient averaging with MPI Allreduce (using host staging) +- Use cuda.core Event for GPU timing measurements +- Verify correctness of distributed gradient synchronization + +Key concepts: Allreduce, NCCL collectives (conceptually), distributed training + +Note: +- All gradient computation and validation happen on GPUs. +- MPI Allreduce is executed on CPU (host) buffers via a simple + GPU -> CPU -> MPI -> CPU -> GPU staging pattern so that the sample + works on any MPI stack, without requiring CUDA-aware MPI. +- In production deep learning frameworks (e.g., PyTorch DDP), NCCL + usually implements the GPU Allreduce directly; the communication + pattern and semantics are the same as demonstrated here. +""" + +import sys +from pathlib import Path + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result # noqa: E402 + +try: + import cupy as cp + from cuda.core import ( + Device, + EventOptions, + LaunchConfig, + Program, + ProgramOptions, + launch, + system, + ) + from mpi4py import MPI +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install: pip install mpi4py cupy-cuda12x cuda-python cuda-core") + sys.exit(1) + + +# ============================================================================ +# CUDA device selection and stream management +# ============================================================================ + + +def init_device(rank: int): + """ + Initialize CUDA device and stream for this MPI rank. + + For a simple single-node run, we map rank % num_gpus to a device id. + This covers both the common case (world_size == num_gpus) and the case + where multiple ranks share a GPU. + + Returns + ------- + tuple[Device, Stream] + CUDA device object and stream object. + """ + num_gpus = system.get_num_devices() + if num_gpus == 0: + raise RuntimeError("No CUDA devices available") + + dev_id = rank % num_gpus # simple mapping: rank -> GPU in round-robin + + try: + device = Device(dev_id) + except (RuntimeError, ValueError) as e: + if rank == 0: + print(f"Warning: Cannot assign GPU {dev_id}, using GPU 0. Error: {e}") + device = Device(0) + + device.set_current() + # Align CuPy with cuda.core's chosen device ID + cp.cuda.Device(device.device_id).use() + + # Create cuda.core stream and make CuPy use it + stream = device.create_stream() + cp.cuda.ExternalStream(int(stream.handle)).use() + + return device, stream + + +# ============================================================================ +# CUDA kernel definition and compilation +# ============================================================================ + +# Tiny CUDA kernel to initialize local "gradients" +# Uses grid-stride loop to handle arrays larger than grid size +INIT_KERNEL = r""" +extern "C" __global__ +void init_grad_kernel(float* grad, int n, int rank) +{ + // Grid-stride loop: each thread processes multiple elements + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = gridDim.x * blockDim.x; + for (size_t i = tid; i < n; i += stride) { + // Gradient value depends on MPI rank so we can verify reduction: + // grad_i = rank + 0.001 * i + grad[i] = rank + 0.001f * i; + } +} +""" + +_kernel_cache = {} + + +def get_init_kernel(device: Device): + """Compile (or retrieve cached) init_grad_kernel for this device.""" + key = device.pci_bus_id + if key not in _kernel_cache: + opts = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") + prog = Program(INIT_KERNEL, code_type="c++", options=opts) + mod = prog.compile("cubin") + _kernel_cache[key] = mod.get_kernel("init_grad_kernel") + return _kernel_cache[key] + + +# ============================================================================ +# Local gradient computation on each GPU +# ============================================================================ + + +def compute_local_gradients( + num_elements: int, device: Device, stream: object, rank: int +) -> cp.ndarray: + """ + Compute a local "gradient" vector on the current GPU. + + For demo purposes, we initialize: + grad[i] = rank + 0.001 * i + + Parameters + ---------- + num_elements : int + Length of gradient vector. + device : Device + CUDA device object. + stream : Stream + CUDA stream object (created at device initialization). + rank : int + MPI rank ID. + + Returns + ------- + cupy.ndarray + Gradient vector on GPU. + """ + # Create gradient array (CuPy uses the stream set at device initialization) + grad = cp.empty(num_elements, dtype=cp.float32) + + # Use a CUDA kernel compiled with cuda.core to fill the array + kernel = get_init_kernel(device) + + threads_per_block = 256 + blocks_per_grid = (num_elements + threads_per_block - 1) // threads_per_block + config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) + + # Launch kernel using cuda.core stream + launch(stream, config, kernel, grad.data.ptr, num_elements, rank) + + return grad + + +# ============================================================================ +# MPI Allreduce to average gradients (host-staging) +# ============================================================================ + + +def average_gradients( + local_grad: cp.ndarray, comm: object, world_size: int +) -> cp.ndarray: + """ + Average gradients across all MPI ranks using host-staging Allreduce. + + Steps: + 1. Copy local gradients from GPU to CPU (NumPy). + 2. Perform MPI_Allreduce on host buffers. + 3. Divide by world_size to obtain the average. + 4. Copy the averaged gradients back to GPU. + + This pattern is environment-agnostic and works on any MPI stack. + """ + assert local_grad.dtype == cp.float32 + + # GPU -> CPU + local_host = local_grad.get() # NumPy array on host + avg_host = local_host.copy() + + # Allreduce on host buffers + comm.Allreduce(local_host, avg_host, op=MPI.SUM) + + # Average + avg_host /= world_size + + # CPU -> GPU + avg_grad = cp.asarray(avg_host) + + return avg_grad + + +# ============================================================================ +# Testing and verification +# ============================================================================ + + +def main(): + """Demo: Multi-GPU gradient averaging with MPI (host-staging Allreduce).""" + import argparse + + # Initialize MPI + comm = MPI.COMM_WORLD + world_size = comm.Get_size() + rank = comm.Get_rank() + + parser = argparse.ArgumentParser( + description=( + "Multi-GPU Gradient Average with mpi4py + cuda.core " + "(host-staging Allreduce)" + ) + ) + parser.add_argument( + "--size", + type=int, + default=1024, + help="Number of gradient elements per GPU (default: 1024)", + ) + args = parser.parse_args() + + num_elements = args.size + + # Initialize device and stream + device = None + stream = None + try: + device, stream = init_device(rank) + + if rank == 0: + print(f"[Rank 0] World size = {world_size}") + comm.Barrier() + + # Validate world size + if world_size < 2: + if rank == 0: + print("=" * 70) + print("ERROR: This sample requires at least 2 MPI processes!") + print("=" * 70) + print("\nPlease run with mpirun:") + print(" mpirun -np 2 python multiGPUGradientAverage.py") + print(" mpirun -np 4 python multiGPUGradientAverage.py --size 10000") + print("\nFor multi-GPU systems:") + print(" mpirun -np N python multiGPUGradientAverage.py") + print(" (where N = number of GPUs)") + print("=" * 70) + sys.exit(1) + + # Validate input + if num_elements <= 0: + if rank == 0: + print("Error: --size must be positive") + sys.exit(1) + + if rank == 0: + print("\n" + "=" * 70) + print("Multi-GPU Gradient Average Demo") + print("=" * 70) + print(f"Number of MPI ranks (GPUs): {world_size}") + print(f"Gradient vector length per GPU: {num_elements}") + print(f"Device: {device.name}") + print("Computation: gradients computed on GPU via cuda.core.") + print( + "Communication: gradients averaged via MPI_Allreduce on host " + "(CPU) buffers." + ) + print("=" * 70) + + # Step 1: Compute local gradients on each GPU + # Use cuda.core Event for GPU timing measurements + timing_options = EventOptions(enable_timing=True) + start_event = stream.record(options=timing_options) + + local_grad = compute_local_gradients(num_elements, device, stream, rank) + + # Record end event and synchronize to ensure timing is complete + end_event = stream.record(options=timing_options) + end_event.sync() + + # Calculate elapsed time: Event subtraction returns milliseconds + kernel_time = end_event - start_event + + # Step 2: Average gradients across all ranks (host-staging Allreduce) + # Use CPU timing for MPI communication (host-staging includes GPU↔CPU transfers) + import time + + comm_start = time.time() + avg_grad = average_gradients(local_grad, comm, world_size) + comm_time = (time.time() - comm_start) * 1000 # Convert to ms + + # Step 3: Sanity check on rank 0 + # For each element i: + # local_grad_r[i] = r + 0.001 * i, r = 0..world_size-1 + # Sum over ranks: + # sum[i] = sum_r r + 0.001 * i * world_size + # Average: + # avg[i] = (0 + ... + (world_size-1))/world_size + 0.001 * i + # = (world_size - 1)/2 + 0.001 * i + # + # We verify this formula. + + expected_base = (world_size - 1) / 2.0 + i0 = 0 + i1 = num_elements // 2 + i2 = num_elements - 1 + + # Copy a few sample elements back to host for printing on rank 0 + if rank == 0: + avg_host_samples = avg_grad[[i0, i1, i2]].get() + print("\nSample averaged gradient values (rank 0):") + print(f" avg_grad[{i0}] = {avg_host_samples[0]:.6f}") + print(f" avg_grad[{i1}] = {avg_host_samples[1]:.6f}") + print(f" avg_grad[{i2}] = {avg_host_samples[2]:.6f}") + + expected0 = expected_base + 0.001 * i0 + expected1 = expected_base + 0.001 * i1 + expected2 = expected_base + 0.001 * i2 + print("\nExpected values:") + print(f" expected[{i0}] = {expected0:.6f}") + print(f" expected[{i1}] = {expected1:.6f}") + print(f" expected[{i2}] = {expected2:.6f}") + + # All ranks perform a full-array correctness check on GPU + expected_full = expected_base + 0.001 * cp.arange( + num_elements, dtype=cp.float32 + ) + + # Use utility function to verify results + if rank == 0: + print("\nVerifying gradient averaging correctness...") + ok = verify_array_result( + avg_grad, expected_full, rtol=1e-5, atol=1e-8, verbose=(rank == 0) + ) + + # Ensure all ranks agree on correctness + ok_all = comm.allreduce(ok, op=MPI.LAND) + + if rank == 0: + if ok_all: + print("[PASS] Gradient averaging is correct on all ranks.") + else: + print( + "[FAIL] Gradient averaging mismatch detected on one or more ranks." + ) + + print("\nPerformance:") + print(f" Kernel time (GPU only): {kernel_time:.3f} ms") + print( + " MPI communication time (host-staging, end-to-end): " + f"{comm_time:.3f} ms" + ) + print(f" Total time: {kernel_time + comm_time:.3f} ms") + + print("\n" + "=" * 70) + print("Demo complete.") + print("=" * 70) + + return 0 if ok_all else 1 + finally: + # Clean up stream resources + if stream is not None: + stream.close() + cp.cuda.Stream.null.use() # Reset CuPy's current stream to the null stream + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/python/4_DistributedComputing/multiGPUGradientAverage/requirements.txt b/python/4_DistributedComputing/multiGPUGradientAverage/requirements.txt new file mode 100644 index 00000000..f719cfa4 --- /dev/null +++ b/python/4_DistributedComputing/multiGPUGradientAverage/requirements.txt @@ -0,0 +1,21 @@ +# Multi-GPU Gradient Average Sample Requirements + +# MPI Python bindings for distributed communication +mpi4py>=3.1.4 + +# GPU array library (NumPy-compatible arrays on CUDA) +# Use cupy-cuda11x, cupy-cuda12x, or cupy-cuda13x depending on your CUDA version +cupy-cuda13x>=13.0.0 + +# CUDA Python bindings (low-level CUDA driver API) +cuda-python>=13.0.0 + +# cuda.core - Modern Python interface for CUDA +# Provides Program, LaunchConfig, Device, and launch APIs +cuda-core>=0.6.0 + +# Note: This sample uses host-staging for MPI communication +# Standard MPI installation is sufficient (no CUDA-aware MPI required) +# Install MPI using system package manager: +# Ubuntu/Debian: sudo apt-get install openmpi-bin libopenmpi-dev +# Or build from source: https://www.open-mpi.org/software/ompi/ diff --git a/python/4_DistributedComputing/simpleP2P/README.md b/python/4_DistributedComputing/simpleP2P/README.md new file mode 100644 index 00000000..9f5f01fe --- /dev/null +++ b/python/4_DistributedComputing/simpleP2P/README.md @@ -0,0 +1,190 @@ +# Sample: simpleP2P (Python) + +## Description + +This sample demonstrates peer-to-peer (P2P) memory access between multiple GPUs in CUDA using the cuda.core Python library. P2P allows GPUs to directly access each other's memory without routing data through the host (CPU), enabling efficient multi-GPU applications. This sample detects P2P-capable GPUs, enables peer access, measures bandwidth using CUDA events for accurate GPU-side timing, and launches kernels (using grid-stride loops) that read from one GPU's memory and write to another GPU's memory. + +## What you will learn + +- How to detect multiple CUDA-capable GPUs using `system.get_num_devices()` and `Device(id)` +- How to check P2P capability between GPU pairs using `device.can_access_peer()` +- How to enable and disable peer access using `DeviceMemoryResource.peer_accessible_by` +- How to allocate device memory on specific GPUs using `DeviceMemoryResource` +- How to perform direct GPU-to-GPU memory transfers with explicit event-based synchronization +- How to measure P2P bandwidth using CUDA events for accurate GPU-side timing +- How to use event-based synchronization between streams for sequential bandwidth measurement +- How to launch kernels on one GPU that access memory from another GPU +- How to compile and launch CUDA kernels using cuda.core's `Program` and `launch` APIs with grid-stride loops +- How to validate multi-GPU computation results +- How to properly clean up resources using try/finally blocks + +## Key libraries + +- `numpy` - CPU array operations and data initialization +- `cuda-core` - Modern Python interface to CUDA runtime with full P2P support + +## Key APIs + +**From cuda.core:** +- `system` – Pre-instantiated singleton for system-level CUDA information +- `system.get_num_devices()` – Get number of CUDA-capable devices +- `Device(id)` – Get specific CUDA device handle +- `device.can_access_peer(peer)` – Check if this device can access peer device memory +- `device.set_current()` – Set active device for subsequent operations +- `device.create_stream()` – Create CUDA stream for kernel execution +- `DeviceMemoryResource(device)` – Create memory resource for specific GPU +- `memory_resource.peer_accessible_by` – Get/set which devices can access this memory pool's allocations + - Example: `mr.peer_accessible_by = [1]` grants device 1 access + - Example: `mr.peer_accessible_by = []` revokes all access +- `PinnedMemoryResource()` – Allocate pinned (page-locked) host memory +- `EventOptions(enable_timing=True)` – Create options for CUDA events with timing enabled +- `stream.record(options=event_options)` – Record a CUDA event on a stream +- `event.elapsed_time(start_event)` – Get elapsed time in milliseconds between two events +- `stream.wait_event(event)` – Make a stream wait for an event to complete +- `stream.close()` – Clean up stream resources +- `Program()` – Compile CUDA C++ kernel code +- `LaunchConfig()` – Configure kernel launch parameters (grid, block) +- `launch()` – Launch compiled kernel with arguments +- `buffer.copy_from(src, stream=stream)` – Copy data from source buffer asynchronously +- `buffer.copy_to(dst, stream=stream)` – Copy data to destination buffer asynchronously + +**From DLPack:** +- `numpy.from_dlpack()` – Create NumPy array view of memory buffer + +**Memory Management:** +- Resources (streams, buffers) should be cleaned up using try/finally blocks to ensure proper cleanup even if errors occur +- Streams should be explicitly closed with `stream.close()` in finally blocks + +## Peer-to-Peer (P2P): When to Use + +### Benefits +- **Direct GPU-to-GPU transfers**: Bypass host memory for faster communication +- **Higher bandwidth**: PCIe or NVLink bandwidth between GPUs (up to 600 GB/s with NVLink) +- **Lower latency**: No CPU involvement in data transfers +- **Efficient multi-GPU**: Essential for scaling deep learning, HPC, and simulation workloads +- **Simplified programming**: Kernels can directly access remote GPU memory + +### Requirements +- **Two or more GPUs**: System must have multiple CUDA-capable GPUs +- **P2P support**: GPUs must be P2P-capable (check with `can_access_peer()`) +- **PCIe topology**: Usually requires GPUs on same PCIe root complex +- **Platform support**: Not available on Mac OSX, limited on ARM platforms + +### Best Use Cases +1. Multi-GPU deep learning training (model parallelism, data parallelism) +2. Large-scale scientific simulations across multiple GPUs +3. Real-time rendering with multiple GPUs +4. GPU clusters with direct GPU communication +5. Reducing CPU-GPU traffic in multi-GPU systems + +## Requirements + +1. **Two or more NVIDIA Graphics Cards** with CUDA support and P2P capability +2. **CUDA Drivers** installed on your system +3. **CUDA Toolkit 13.0+** installed on your system +4. **Python 3.10 or newer** +5. **Proper PCIe topology** (GPUs should be on same PCIe root complex for best performance) + +**Note**: This sample will gracefully exit if fewer than 2 GPUs are detected or if P2P is not supported between any GPU pair. + +**Install packages:** +```bash +pip install -r requirements.txt +``` + +Or manually: +```bash +pip install numpy>=2.3.2 cuda-core>=0.6.0 cuda-python>=13.0.0 +``` + +## How to run + +Basic usage: +```bash + +# Run with default parameters (16M elements = 64MB) +python simpleP2P.py +``` + +With custom parameters: +```bash +# Use 32M elements (128MB) +python simpleP2P.py --num_elements 33554432 + +# Show help +python simpleP2P.py --help +``` + +### Command line arguments + +- `--num_elements`: Number of elements in arrays (default: 16777216) + - Each array uses `num_elements * 4 bytes` (float32) + - Default: 64 MB per array + - Sample allocates 2 device buffers + 1 host buffer + +## Expected Output + +``` +====================================================================== +simpleP2P - CUDA Python Sample +====================================================================== + +Starting... + +Checking for multiple GPUs... +CUDA-capable device count: 2 + +Checking GPU(s) for support of peer to peer memory access... +> Peer access from Tesla T10 (GPU0) -> Tesla T10 (GPU1): Yes +> Peer access from Tesla T10 (GPU1) -> Tesla T10 (GPU0): Yes + +Using GPU0 (Tesla T10) and GPU1 (Tesla T10) + +Allocating buffers (64MB on GPU0, GPU1 and CPU Host)... + Peer access enabled: GPU0 <-> GPU1 + Peer access status: MR0 accessible by (1,), MR1 accessible by (0,) + Memory allocated successfully + +Measuring P2P bandwidth... + Performing 100 ping-pong copies between GPUs... + P2P bandwidth: 12.37 GB/s + +Preparing host buffer and memcpy to GPU0... + Data initialized and copied to GPU + +Compiling CUDA kernel... + Kernels compiled successfully + +Run kernel on GPU1, taking source data from GPU0 and writing to GPU1... + Kernel execution complete + +Run kernel on GPU0, taking source data from GPU1 and writing to GPU0... + Kernel execution complete + +Copy data back to host from GPU0 and verify results... + +Checking results... + Comparing 16,777,216 elements... +Test PASSED + [PASS] Validation PASSED + +Disabling peer access... + Peer access revoked: MR0 accessible by (), MR1 accessible by () + +====================================================================== +simpleP2P completed successfully! +====================================================================== + +Shutting down... +``` + +**Note**: P2P bandwidth varies based on: +- PCIe generation +- NVLink +- System topology and configuration + +## Files + +- `simpleP2P.py` – Main Python implementation +- `README.md` – This file +- `requirements.txt` – Python package dependencies diff --git a/python/4_DistributedComputing/simpleP2P/requirements.txt b/python/4_DistributedComputing/simpleP2P/requirements.txt new file mode 100644 index 00000000..d0365796 --- /dev/null +++ b/python/4_DistributedComputing/simpleP2P/requirements.txt @@ -0,0 +1,6 @@ +# simpleP2P - Requirements +# Install with: pip install -r requirements.txt + +numpy>=2.3.2 +cuda-python>=13.0.0 +cuda-core>=0.6.0 diff --git a/python/4_DistributedComputing/simpleP2P/simpleP2P.py b/python/4_DistributedComputing/simpleP2P/simpleP2P.py new file mode 100644 index 00000000..534f357a --- /dev/null +++ b/python/4_DistributedComputing/simpleP2P/simpleP2P.py @@ -0,0 +1,377 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import sys +from pathlib import Path + +try: + import numpy as np + from cuda.core import ( + Device, + DeviceMemoryResource, + EventOptions, + LaunchConfig, + PinnedMemoryResource, + Program, + ProgramOptions, + launch, + system, + ) +except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + sys.exit(1) + +# Add parent directory to path to import utilities +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result # noqa: E402 + +# CUDA kernel for simple P2P operation +SIMPLE_P2P_KERNEL = """ +extern "C" __global__ +void SimpleKernel(float *src, float *dst, int N) { + // Grid-stride loop pattern for canonical CUDA kernel + size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + size_t stride = gridDim.x * blockDim.x; + + for (size_t i = tid; i < N; i += stride) { + dst[i] = src[i] * 2.0f; + } +} +""" + + +def run(num_elements=1024 * 1024 * 16): + """ + Demonstrates peer-to-peer (P2P) memory access between multiple GPUs using cuda.core. + + This function shows how to: + 1. Detect and verify multiple GPUs with P2P capability + 2. Enable peer access between GPUs + 3. Perform direct GPU-to-GPU memory transfers + 4. Launch kernels that access memory from other GPUs + 5. Measure P2P bandwidth + 6. Validate results + + Parameters + ---------- + num_elements : int + Number of elements in arrays (default: 16M elements = 64MB) + """ + + print("\n" + "=" * 70) + print("simpleP2P - CUDA Python Sample") + print("=" * 70) + print("\nStarting...") + + # Check for multiple GPUs + print("\nChecking for multiple GPUs...") + num_devices = system.get_num_devices() + print(f"CUDA-capable device count: {num_devices}") + + if num_devices < 2: + print( + "Two or more GPUs with Peer-to-Peer access capability are " + "required, waiving this sample." + ) + return 2 + + # Get device properties + devices = [Device(i) for i in range(num_devices)] + + # Check for P2P capability + print("\nChecking GPU(s) for support of peer to peer memory access...") + + p2p_capable_gpus = [-1, -1] + + for i in range(num_devices): + p2p_capable_gpus[0] = i + for j in range(num_devices): + if i == j: + continue + + # Check peer access capability using cuda.core + i_access_j = devices[i].can_access_peer(devices[j]) + j_access_i = devices[j].can_access_peer(devices[i]) + + print( + f"> Peer access from {devices[i].name} (GPU{i}) -> " + f"{devices[j].name} (GPU{j}): {'Yes' if i_access_j else 'No'}" + ) + print( + f"> Peer access from {devices[j].name} (GPU{j}) -> " + f"{devices[i].name} (GPU{i}): {'Yes' if j_access_i else 'No'}" + ) + + if i_access_j and j_access_i: + p2p_capable_gpus[1] = j + break + + if p2p_capable_gpus[1] != -1: + break + + if p2p_capable_gpus[0] == -1 or p2p_capable_gpus[1] == -1: + print("\nTwo or more GPUs with Peer-to-Peer access capability are required.") + print( + "Peer to Peer access is not available amongst GPUs in the system, " + "waiving test." + ) + return 2 + + # Use first pair of P2P capable GPUs detected + gpuid = [p2p_capable_gpus[0], p2p_capable_gpus[1]] + dev0 = devices[gpuid[0]] + dev1 = devices[gpuid[1]] + + print(f"\nUsing GPU{gpuid[0]} ({dev0.name}) and GPU{gpuid[1]} ({dev1.name})") + + # Allocate buffers with P2P access + buf_size = num_elements * np.dtype(np.float32).itemsize + print( + f"\nAllocating buffers ({int(buf_size / 1024 / 1024)}MB on " + f"GPU{gpuid[0]}, GPU{gpuid[1]} and CPU Host)..." + ) + + # Allocate on GPU 0 and grant access to GPU 1 + dev0.set_current() + mr0 = DeviceMemoryResource(dev0) + mr0.peer_accessible_by = [gpuid[1]] # Grant GPU 1 access to GPU 0's memory + g0 = mr0.allocate(buf_size) + + # Allocate on GPU 1 and grant access to GPU 0 + dev1.set_current() + mr1 = DeviceMemoryResource(dev1) + mr1.peer_accessible_by = [gpuid[0]] # Grant GPU 0 access to GPU 1's memory + g1 = mr1.allocate(buf_size) + + print(f" Peer access enabled: GPU{gpuid[0]} <-> GPU{gpuid[1]}") + print( + f" Peer access status: MR0 accessible by {mr0.peer_accessible_by}, " + f"MR1 accessible by {mr1.peer_accessible_by}" + ) + + # Allocate pinned host memory + pinned_mr = PinnedMemoryResource() + h0 = pinned_mr.allocate(buf_size) + + print(" Memory allocated successfully") + + # Create streams + stream0 = dev0.create_stream() + stream1 = dev1.create_stream() + + try: + # P2P bandwidth test using CUDA events for accurate GPU-side timing + print("\nMeasuring P2P bandwidth...") + print(" Performing 100 ping-pong copies between GPUs...") + + event_options = EventOptions(enable_timing=True) + sync_event0 = None + sync_event1 = None + + # Record start event on stream0 + start_event = stream0.record(options=event_options) + + for i in range(100): + # Ping-pong copy between GPUs with explicit event-based synchronization + if i % 2 == 0: + # Wait for previous stream1 copy to complete (if any) + if sync_event1 is not None: + stream0.wait(sync_event1) + # Copy g0 -> g1 on stream0 + g1.copy_from(g0, stream=stream0) + # Record event on stream0 to signal completion of this copy + sync_event0 = stream0.record(options=EventOptions(enable_timing=False)) + else: + # Wait for previous stream0 copy to complete + if sync_event0 is not None: + stream1.wait(sync_event0) + # Copy g1 -> g0 on stream1 + g0.copy_from(g1, stream=stream1) + # Record event on stream1 to signal completion of this copy + sync_event1 = stream1.record(options=EventOptions(enable_timing=False)) + + # Wait for last stream1 copy to complete + if sync_event1 is not None: + stream0.wait(sync_event1) + + # Record end event on stream0 after all copies have been enqueued + end_event = stream0.record(options=event_options) + end_event.sync() + + # Elapsed time in milliseconds (using subtraction operator) + time_memcpy = end_event - start_event + + bandwidth = (1.0 / (time_memcpy / 1000.0)) * (100.0 * buf_size) / (1024.0**3) + print(f" P2P bandwidth: {bandwidth:.2f} GB/s") + + # Prepare host buffer and initialize data + print(f"\nPreparing host buffer and memcpy to GPU{gpuid[0]}...") + + # Create numpy view and initialize + h0_array = np.from_dlpack(h0).view(dtype=np.float32) + h0_array[:] = (np.arange(num_elements, dtype=np.float32) % 4096).astype( + np.float32 + ) + + # Copy to GPU 0 + dev0.set_current() + g0.copy_from(h0, stream=stream0) + stream0.sync() + + print(" Data initialized and copied to GPU") + + # Compile kernel for both GPUs + print("\nCompiling CUDA kernel...") + dev0.set_current() + program_options = ProgramOptions(std="c++17", arch=f"sm_{dev0.arch}") + prog = Program(SIMPLE_P2P_KERNEL, code_type="c++", options=program_options) + mod0 = prog.compile("cubin") + kernel0 = mod0.get_kernel("SimpleKernel") + + dev1.set_current() + program_options = ProgramOptions(std="c++17", arch=f"sm_{dev1.arch}") + prog = Program(SIMPLE_P2P_KERNEL, code_type="c++", options=program_options) + mod1 = prog.compile("cubin") + kernel1 = mod1.get_kernel("SimpleKernel") + + print(" Kernels compiled successfully") + + # Launch configuration + threads = 512 + blocks = (num_elements + threads - 1) // threads + config = LaunchConfig(grid=blocks, block=threads) + + # Run kernel on GPU 1, reading from GPU 0, writing to GPU 1 + print( + f"\nRun kernel on GPU{gpuid[1]}, taking source data from " + f"GPU{gpuid[0]} and writing to GPU{gpuid[1]}..." + ) + dev1.set_current() + launch(stream1, config, kernel1, g0, g1, np.int32(num_elements)) + stream1.sync() + print(" Kernel execution complete") + + # Run kernel on GPU 0, reading from GPU 1, writing to GPU 0 + print( + f"\nRun kernel on GPU{gpuid[0]}, taking source data from " + f"GPU{gpuid[1]} and writing to GPU{gpuid[0]}..." + ) + dev0.set_current() + launch(stream0, config, kernel0, g1, g0, np.int32(num_elements)) + stream0.sync() + print(" Kernel execution complete") + + # Copy data back to host and verify + print(f"\nCopy data back to host from GPU{gpuid[0]} and verify results...") + g0.copy_to(h0, stream=stream0) + stream0.sync() + + # Verify results + print("\nChecking results...") + print(f" Comparing {num_elements:,} elements...") + + # Input data goes through two kernels, each multiplying by 2.0. + expected = (np.arange(num_elements, dtype=np.float32) % 4096) * 4.0 + + # Use utility function for verification (handles both numpy and cupy arrays) + if verify_array_result(h0_array, expected, rtol=1e-5, atol=1e-6, verbose=True): + print(" [PASS] Validation PASSED") + success = True + else: + print(" [FAIL] Validation FAILED") + # Show first few errors for debugging + errors = np.where(~np.isclose(h0_array, expected, rtol=1e-5, atol=1e-6))[0] + print(f" Number of mismatches: {len(errors)}") + for idx in errors[:10]: + print( + f" Error @ element {idx}: got {h0_array[idx]}, " + f"expected {expected[idx]}" + ) + success = False + + # Disable peer access + print("\nDisabling peer access...") + mr0.peer_accessible_by = [] # Revoke GPU 1's access to GPU 0's memory + mr1.peer_accessible_by = [] # Revoke GPU 0's access to GPU 1's memory + print( + f" Peer access revoked: MR0 accessible by {mr0.peer_accessible_by}, " + f"MR1 accessible by {mr1.peer_accessible_by}" + ) + + print("\n" + "=" * 70) + if success: + print("simpleP2P completed successfully!") + else: + print("simpleP2P FAILED!") + print("=" * 70 + "\n") + + return 0 if success else 1 + finally: + # Cleanup streams and buffers + print("Shutting down...") + stream0.close() + stream1.close() + + +def main(): + """Main entry point with argument parsing.""" + parser = argparse.ArgumentParser( + description=( + "Demonstrate peer-to-peer (P2P) memory access between " + "multiple GPUs with CUDA" + ) + ) + + parser.add_argument( + "--num_elements", + type=int, + default=1024 * 1024 * 16, # 16M elements = 64MB + help="Number of elements in arrays (default: 16777216 = 64MB)", + ) + + args = parser.parse_args() + + # Validate arguments + if args.num_elements <= 0: + print("Error: num_elements must be positive") + return 1 + + try: + exit_code = run(num_elements=args.num_elements) + sys.exit(exit_code) + except Exception as e: + print(f"\nError: {e}") + import traceback + + traceback.print_exc() + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/Utilities/README.md b/python/Utilities/README.md new file mode 100644 index 00000000..2596ac56 --- /dev/null +++ b/python/Utilities/README.md @@ -0,0 +1,134 @@ +# CUDA Python Utilities + +Common utilities for CUDA Python samples using the `cuda.core` API. + +## Overview + +This module provides reusable utility functions for CUDA samples to reduce code duplication. Samples import from `cuda_samples_utils.py` using simple path-based imports (no package structure needed). + +## Installation Requirements + +Install from the Python samples directory: + +```bash +cd /path/to/cuda-samples/Python +pip install -r requirements.txt +``` + +This installs a common CUDA 13 stack (see `python/requirements.txt`): + +- `cuda-python` (>=13.0.0) +- `cuda-core` (>=0.6.0) +- `cupy-cuda13x` (>=13.0.0) +- `numpy` (>=2.3.2) + +## How to Use in Samples + +Import utilities using path-based import: + +```python +import sys +from pathlib import Path + +# Add Utilities directory to path +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + +# Use the utility +if verify_array_result(result, expected): + print("Success!") +``` + +## Available Functions + +### Result Verification + +#### `verify_array_result(result, expected, rtol=1e-5, atol=1e-8, verbose=True)` + +Verify computed results match expected values. The helper detects whether both +arguments are NumPy arrays or both are CuPy arrays and uses the matching +library's `allclose` (no unnecessary cross-device transfers). + +**Parameters:** +- `result`: NumPy or CuPy array with computed results +- `expected`: NumPy or CuPy array with expected values (same kind as `result`) +- `rtol`: Relative tolerance (default: 1e-5) +- `atol`: Absolute tolerance (default: 1e-8) +- `verbose`: Print test result (default: True) + +**Returns:** +- `True` if results match within tolerance, `False` otherwise + +**Example:** +```python +expected = a + b +if verify_array_result(c, expected): + print("Computation correct!") +``` + +### Package Check + +#### `check_cuda_requirements()` + +Check if required CUDA packages are available. + +**Returns:** +- `True` if requirements are met, `False` otherwise + +**Example:** +```python +if not check_cuda_requirements(): + sys.exit(1) +``` + +## Design Philosophy + +These utilities focus on common operations that are **not** part of `cuda.core` API: +- Result verification for NumPy or CuPy arrays +- Package requirements checking + +For CUDA operations like device initialization, kernel compilation, and grid size calculations, samples should use `cuda.core` API directly to demonstrate the proper usage patterns. + +## Complete Example + +See `../1_GettingStarted/vectorAdd/vectorAdd.py` for a complete example: + +```python +import sys +from pathlib import Path + +# Import utility +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "Utilities")) +from cuda_samples_utils import verify_array_result + +import cupy as cp +from cuda.core import Device, Program, ProgramOptions, LaunchConfig, launch + +# Use cuda.core directly for device and kernel operations +device = Device(0) +device.set_current() + +program_options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") +program = Program(kernel_source, code_type="c++", options=program_options) +module = program.compile("cubin", name_expressions=("kernel_name",)) +kernel = module.get_kernel("kernel_name") + +# Calculate grid size inline +threads_per_block = 256 +blocks_per_grid = (num_elements + threads_per_block - 1) // threads_per_block + +# Launch kernel - pass cupy arrays directly +config = LaunchConfig(grid=blocks_per_grid, block=threads_per_block) +launch(stream, config, kernel, a, b, c, cp.int32(num_elements)) + +# Verify results using utility +verify_array_result(c, expected) +``` + +## Benefits + +- **Code Reuse**: Write common functionality once +- **Consistency**: All samples use the same patterns +- **Maintainability**: Bug fixes benefit all samples +- **Transparency**: Samples show cuda.core API usage directly +- **Simplicity**: No complex package structure needed diff --git a/python/Utilities/__init__.py b/python/Utilities/__init__.py new file mode 100644 index 00000000..a84c3bf5 --- /dev/null +++ b/python/Utilities/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +CUDA Python Samples - Utilities + +Common utilities for CUDA Python samples. + +Provides: +- Package requirements checking +- Result verification +""" + +from .cuda_samples_utils import ( + check_cuda_requirements, + verify_array_result, +) + +__version__ = "1.0.0" + +__all__ = [ + "check_cuda_requirements", + "verify_array_result", +] diff --git a/python/Utilities/cuda_samples_utils.py b/python/Utilities/cuda_samples_utils.py new file mode 100644 index 00000000..678d1ac5 --- /dev/null +++ b/python/Utilities/cuda_samples_utils.py @@ -0,0 +1,144 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# distribution and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +Common CUDA utilities for Python samples. + +This module provides common utility functions for CUDA samples including: +- Package requirements checking +- Result verification +- GPU device information + +Requirements: +- Python 3.10+ +- CUDA Toolkit 13.0+ (recommended; matches cuda-python 13.x) +- cuda-python >= 13.0.0 +- cuda-core >= 0.6.0 +- cupy-cuda13x >= 13.0.0 +- numpy >= 2.3.2 (when used with samples that install it) +""" + + +def check_cuda_requirements() -> bool: + """ + Check if required CUDA packages are available. + + Returns + ------- + bool + True if requirements are met, False otherwise + """ + try: + import cupy as cp # noqa: F401 + from cuda.core import Device # noqa: F401 + + return True + except ImportError as e: + print(f"Error: Required package not found: {e}") + print("Please install from requirements.txt:") + print(" pip install -r requirements.txt") + return False + + +def verify_array_result( + result, expected, rtol: float = 1e-5, atol: float = 1e-8, verbose: bool = True +) -> bool: + """ + Verify that computed result matches expected result. + + Automatically detects whether arrays are NumPy or CuPy and uses the + appropriate library without unnecessary data transfers. + + Parameters + ---------- + result : numpy.ndarray or cupy.ndarray + Computed result array. + expected : numpy.ndarray or cupy.ndarray + Expected result array. + rtol : float + Relative tolerance (default: 1e-5) + atol : float + Absolute tolerance (default: 1e-8) + verbose : bool + Whether to print verification result (default: True). + + Returns + ------- + bool + True if results match, False otherwise. + + Raises + ------ + TypeError + If arrays are not both NumPy or both CuPy, or if CuPy is needed + but not available. + """ + import numpy as np + + is_np = isinstance(result, np.ndarray) and isinstance(expected, np.ndarray) + + if is_np: + allclose = np.allclose + abs_ = np.abs + max_ = np.max + else: + import cupy as cp + + is_cp = isinstance(result, cp.ndarray) and isinstance(expected, cp.ndarray) + + if not is_cp: + raise TypeError( + "verify_array_result expects both arrays to be either " + "numpy.ndarray or cupy.ndarray" + ) + + allclose = cp.allclose + abs_ = cp.abs + max_ = cp.max + + if allclose(result, expected, rtol=rtol, atol=atol): + if verbose: + print("Test PASSED") + return True + else: + max_error = max_(abs_(result - expected)) + if verbose: + print(f"Test FAILED - Max error: {max_error}") + return False + + +def print_gpu_info(device) -> None: + """ + Print GPU device information. + + Parameters + ---------- + device : cuda.core.Device + CUDA device object + """ + print(f"Device: {device.name}") + cc = device.compute_capability + print(f"Compute Capability: {cc.major}.{cc.minor}") diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 00000000..1895a844 --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,7 @@ +# CUDA Python Samples — common base for CUDA 13.x +# Install sample-specific extras from each sample's requirements.txt (RAPIDS, TensorFlow, etc.). + +cuda-python>=13.0.0 +cuda-core>=0.6.0 +cupy-cuda13x>=13.0.0 +numpy>=2.3.2