mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 13:19:14 +08:00
Update samples for CUDA 11.8 with correct props
This commit is contained in:
parent
b312abaa07
commit
81992093d2
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
.vscode/*
|
|
@ -1,5 +1,10 @@
|
|||
## Changelog
|
||||
|
||||
### CUDA 11.6
|
||||
* Added new folder structure for samples
|
||||
* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
|
||||
* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
|
||||
|
||||
### CUDA 11.5
|
||||
* Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
|
||||
* Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
|
||||
|
|
|
@ -666,6 +666,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{0x87, 128},
|
||||
{0x90, 128},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
@ -712,6 +713,8 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
|||
{0x75, "Turing"},
|
||||
{0x80, "Ampere"},
|
||||
{0x86, "Ampere"},
|
||||
{0x87, "Ampere"},
|
||||
{0x90, "Hopper"},
|
||||
{-1, "Graphics Device"}};
|
||||
|
||||
int index = 0;
|
||||
|
|
|
@ -114,6 +114,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{0x87, 128},
|
||||
{0x90, 128},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
# CUDA Samples
|
||||
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads).
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Release Notes
|
||||
|
||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||
|
||||
### CUDA 11.6
|
||||
### CUDA 11.8
|
||||
* Added new folder structure for samples
|
||||
* Added support of Visual Studio 2022 to all samples supported on [Windows](#windows-1).
|
||||
* All CUDA samples are now only available on [GitHub](https://github.com/nvidia/cuda-samples). They are no longer available via CUDA toolkit.
|
||||
|
@ -17,7 +17,7 @@ This section describes the release notes for the CUDA Samples on GitHub only.
|
|||
|
||||
### Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||
|
||||
### Getting the CUDA Samples
|
||||
|
|
|
@ -318,9 +318,9 @@ endif
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -6,11 +6,11 @@
|
|||
<toolkit>cudaStreamDestroy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMallocManaged</toolkit>
|
||||
<toolkit>cudaStreamCreate</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaStreamAttachMemAsync</toolkit>
|
||||
<toolkit>cudaSetDevice</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaStreamSynchronize</toolkit>
|
||||
<toolkit>cudaStreamCreate</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates the use of OpenMP and streams with Unified Memory on a single GPU.]]></description>
|
||||
|
@ -70,6 +70,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, CUBLAS, Multithreading, Unified Memory, CUDA S
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamCreate, cudaDeviceSynchronize, cudaStreamAttachMemAsync, cudaSetDevice, cudaStreamSynchronize, cudaGetDeviceProperties
|
||||
cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSetDevice, cudaDeviceSynchronize, cudaStreamSynchronize, cudaStreamCreate, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[OpenMP](../../../README.md#openmp), [UVM](../../../README.md#uvm), [CUBLAS](../../../README.md#cublas)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -3,21 +3,21 @@
|
|||
<entry>
|
||||
<name>asyncAPI</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMemset</toolkit>
|
||||
<toolkit>cudaProfilerStop</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaMemcpyAsync</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaProfilerStart</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaEventQuery</toolkit>
|
||||
<toolkit>cudaProfilerStop</toolkit>
|
||||
<toolkit>cudaMemset</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaMemcpyAsync</toolkit>
|
||||
<toolkit>cudaEventQuery</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -59,6 +59,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Asynchronous Data Transfers, CUDA Streams and Events
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMemset, cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaEventQuery, cudaProfilerStop, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties
|
||||
cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaProfilerStart, cudaDeviceSynchronize, cudaEventRecord, cudaFreeHost, cudaMemset, cudaEventDestroy, cudaEventQuery, cudaEventElapsedTime, cudaGetDeviceProperties, cudaEventCreate
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -310,9 +310,9 @@ endif
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
@ -363,7 +363,6 @@ run: build
|
|||
$(EXEC) ./c++11_cuda
|
||||
|
||||
testrun: build
|
||||
$(EXEC) ./c++11_cuda --dummy-test-param
|
||||
|
||||
clean:
|
||||
rm -f c++11_cuda c++11_cuda.o
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
</cflags>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaMemset</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters. ]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -31,9 +31,6 @@
|
|||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>c++11_cuda.cu</primary_file>
|
||||
<qatests>
|
||||
<qatest>--dummy-test-param</qatest>
|
||||
</qatests>
|
||||
<required_dependencies>
|
||||
<dependency>CPP11</dependency>
|
||||
</required_dependencies>
|
||||
|
@ -54,6 +51,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CPP11 CUDA
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMalloc, cudaMemset, cudaFree, cudaMemcpy
|
||||
cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[CPP11](../../../README.md#cpp11)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
<name>clock</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -47,6 +47,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMalloc, cudaFree, cudaMemcpy
|
||||
cudaMalloc, cudaMemcpy, cudaFree
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Runtime Compilation
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,7 +23,7 @@ x86_64, ppc64le, aarch64
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
|
||||
cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaBlockSize, cudaGridSize
|
||||
|
@ -33,7 +33,7 @@ cudaBlockSize, cudaGridSize
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -3,22 +3,22 @@
|
|||
<entry>
|
||||
<name>concurrentKernels</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaStreamWaitEvent</toolkit>
|
||||
<toolkit>cudaStreamDestroy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaStreamCreate</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaEventSynchronize</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaEventCreateWithFlags</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaMemcpyAsync</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaEventCreateWithFlags</toolkit>
|
||||
<toolkit>cudaEventSynchronize</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaGetDevice</toolkit>
|
||||
<toolkit>cudaStreamWaitEvent</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaStreamCreate</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -57,6 +57,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaStreamWaitEvent, cudaStreamDestroy, cudaFree, cudaEventRecord, cudaMallocHost, cudaStreamCreate, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaEventCreateWithFlags, cudaEventDestroy, cudaMemcpyAsync, cudaGetDeviceProperties, cudaGetDevice
|
||||
cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
<name>cppIntegration</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -41,6 +41,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CPP-CUDA Integration
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMalloc, cudaFree, cudaMemcpy
|
||||
cudaMalloc, cudaMemcpy, cudaFree
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -109,6 +109,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -3,17 +3,17 @@
|
|||
<entry>
|
||||
<name>cppOverload</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFuncSetCacheConfig</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaFuncGetAttributes</toolkit>
|
||||
<toolkit>cudaGetDeviceCount</toolkit>
|
||||
<toolkit>cudaSetDevice</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaSetDevice</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaFuncGetAttributes</toolkit>
|
||||
<toolkit>cudaGetDeviceCount</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates how to use C++ function overloading on the GPU.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -52,6 +52,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ C++ Function Overloading, CUDA Streams and Events
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaFree, cudaFuncSetCacheConfig, cudaMallocHost, cudaFuncGetAttributes, cudaGetDeviceCount, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaSetDevice, cudaMemcpy, cudaGetDeviceProperties
|
||||
cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -321,9 +321,9 @@ endif
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Systems Integration, OpenMP, Multithreading
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMemset, cudaFree, cudaGetDeviceCount, cudaSetDevice, cudaMalloc, cudaGetLastError, cudaMemcpy, cudaGetErrorString, cudaGetDeviceProperties, cudaGetDevice
|
||||
cudaMemcpy, cudaGetErrorString, cudaFree, cudaGetLastError, cudaSetDevice, cudaGetDeviceCount, cudaGetDevice, cudaMemset, cudaMalloc, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[OpenMP](../../../README.md#openmp)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/cudaOpenMP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -285,9 +285,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 60 61 70 75 80 86
|
||||
SMS ?= 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
<entry>
|
||||
<name>fp16ScalarProduct</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[Calculates scalar product of two vectors of FP16 numbers.]]></description>
|
||||
|
@ -44,6 +44,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaMemcpy, cudaGetDeviceProperties
|
||||
cudaMemcpy, cudaFree, cudaMallocHost, cudaFreeHost, cudaMalloc, cudaGetDeviceProperties
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[FP16](../../../README.md#fp16)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/fp16ScalarProduct.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -3,20 +3,20 @@
|
|||
<entry>
|
||||
<name>matrixMul</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaStreamCreateWithFlags</toolkit>
|
||||
<toolkit>cudaProfilerStop</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaProfilerStart</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaEventSynchronize</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaProfilerStop</toolkit>
|
||||
<toolkit>cudaStreamCreateWithFlags</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaStreamSynchronize</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaMemcpyAsync</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample implements matrix multiplication and is exactly the same as Chapter 6 of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication. To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -56,6 +56,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaFree, cudaEventRecord, cudaMallocHost, cudaProfilerStart, cudaEventCreate, cudaEventElapsedTime, cudaEventSynchronize, cudaFreeHost, cudaMalloc, cudaProfilerStop, cudaStreamCreateWithFlags, cudaEventDestroy, cudaStreamSynchronize, cudaMemcpyAsync
|
||||
cudaStreamCreateWithFlags, cudaProfilerStop, cudaMalloc, cudaFree, cudaMallocHost, cudaProfilerStart, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaStreamSynchronize, cudaEventDestroy, cudaEventElapsedTime, cudaMemcpyAsync, cudaEventCreate
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMul.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -283,9 +283,9 @@ FATBIN_FILE := matrixMul_kernel${TARGET_SIZE}.fatbin
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Driver API, Matrix Multiply
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l, aarch64
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuModuleGetFunction, cuMemcpyHtoD, cuModuleLoadData, cuCtxCreate, cuLaunchKernel, cuDeviceGetName, cuMemAlloc, cuOccupancyMaxPotentialBlockSize, cuDeviceTotalMem, cuMemFree, cuMemcpyDtoH, cuCtxDestroy, cuDeviceGetAttribute
|
||||
cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuDeviceGetName, cuDeviceTotalMem, cuDeviceGetAttribute, cuModuleLoadData, cuOccupancyMaxPotentialBlockSize, cuMemAlloc, cuMemFree, cuCtxDestroy, cuModuleGetFunction, cuCtxCreate
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -111,6 +111,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/matrixMulDrv.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Driver API, CUDA Dynamically Linked Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuParamSetv, cuMemFree, cuInit, cuModuleGetFunction, cuCtxDestroy, cuCtxCreate, cuDeviceGetName, cuCtxSynchronize, cuParamSeti, cuModuleLoadDataEx, cuDeviceGet, cuFuncSetSharedSize, cuMemAlloc, cuDeviceComputeCapability, cuFuncSetBlockShape, cuMemcpyHtoD, cuParamSetSize, cuLaunchGrid, cuDeviceGetCount, cuLaunchKernel, cuMemcpyDtoH
|
||||
cuMemcpyDtoH, cuDeviceGetName, cuParamSeti, cuModuleLoadDataEx, cuModuleGetFunction, cuLaunchGrid, cuFuncSetSharedSize, cuMemFree, cuParamSetSize, cuParamSetv, cuInit, cuMemcpyHtoD, cuLaunchKernel, cuDeviceGet, cuFuncSetBlockShape, cuCtxDestroy, cuDeviceGetCount, cuDeviceComputeCapability, cuCtxSynchronize, cuMemAlloc, cuCtxCreate
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{0x87, 128},
|
||||
{0x90, 128},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -116,6 +116,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Runtime API, Linear Algebra, Runtime Compilation
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, aarch64
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuModuleGetFunction, cuMemAlloc, cuLaunchKernel, cuCtxSynchronize, cuMemFree, cuMemcpyDtoH, cuMemcpyHtoD
|
||||
cuMemcpyDtoH, cuLaunchKernel, cuMemcpyHtoD, cuCtxSynchronize, cuMemAlloc, cuMemFree, cuModuleGetFunction
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[NVRTC](../../../README.md#nvrtc)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -109,6 +109,6 @@ xcopy /y /e /s "$(CudaToolkitDir)include\cooperative_groups" .\cooperative_group
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -279,9 +279,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
<name>mergeSort</name>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample implements a merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient on large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), may be the algorithms of choice for sorting batches of short- to mid-sized (key, value) array pairs. Refer to the excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -46,6 +46,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Data-Parallel Algorithms
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,11 +23,11 @@ x86_64, ppc64le, armv7l
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaMalloc, cudaFree, cudaDeviceSynchronize, cudaMemcpy
|
||||
cudaMalloc, cudaDeviceSynchronize, cudaMemcpy, cudaFree
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -111,6 +111,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/mergeSort.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -316,9 +316,9 @@ endif
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 70 72 75 80 86 87
|
||||
SMS ?= 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 70 75 80 86
|
||||
SMS ?= 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -6,17 +6,17 @@
|
|||
<flag>--std=c++11</flag>
|
||||
</cflags>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
|
||||
<toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
|
||||
<toolkit>cudaDeviceGetAttribute</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaStreamCreateWithFlags</toolkit>
|
||||
<toolkit>cudaLaunchCooperativeKernel</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaDeviceGetAttribute</toolkit>
|
||||
<toolkit>cudaMallocHost</toolkit>
|
||||
<toolkit>cudaFreeHost</toolkit>
|
||||
<toolkit>cudaStreamSynchronize</toolkit>
|
||||
<toolkit>cudaLaunchCooperativeKernel</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaOccupancyMaxActiveBlocksPerMultiprocessor</toolkit>
|
||||
<toolkit>cudaMemcpyAsync</toolkit>
|
||||
<toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[A simple demonstration of arrive wait barriers.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
|
@ -53,6 +53,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Arrive Wait Barrier
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -23,14 +23,14 @@ x86_64, ppc64le, armv7l, aarch64
|
|||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaFree, cudaMallocHost, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaOccupancyMaxPotentialBlockSize, cudaDeviceGetAttribute, cudaFreeHost, cudaMalloc, cudaStreamCreateWithFlags, cudaLaunchCooperativeKernel, cudaStreamSynchronize, cudaMemcpyAsync
|
||||
cudaStreamCreateWithFlags, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaFreeHost, cudaStreamSynchronize, cudaLaunchCooperativeKernel, cudaMalloc, cudaOccupancyMaxActiveBlocksPerMultiprocessor, cudaMemcpyAsync, cudaOccupancyMaxPotentialBlockSize
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[CPP11](../../../README.md#cpp11), [MBCG](../../../README.md#mbcg)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAWBarrier.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -285,9 +285,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Assert
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaDeviceSynchronize, cudaGetErrorString
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.6](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/simpleAssert.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_90,sm_90;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
||||
<Include>./;../../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.6.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.8.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user