2020-05-19 00:52:06 +08:00
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry >
<name > globalToShmemAsyncCopy</name>
<cflags >
<flag > --std=c++11</flag>
</cflags>
<cuda_api_list >
<toolkit > cudaEventCreate</toolkit>
<toolkit > cudaEventRecord</toolkit>
<toolkit > cudaEventQuery</toolkit>
<toolkit > cudaEventDestroy</toolkit>
<toolkit > cudaEventElapsedTime</toolkit>
<toolkit > cudaEventSynchronize</toolkit>
<toolkit > cudaMalloc</toolkit>
<toolkit > cudaFree</toolkit>
<toolkit > cudaMemcpy</toolkit>
</cuda_api_list>
<description > <![CDATA[This sample implements matrix multiplication which uses asynchronous copy of data from global to shared memory when on compute capability 8.0 or higher. Also demonstrates arrive-wait barrier for synchronization.]]> </description>
<devicecompilation > whole</devicecompilation>
<includepaths >
<path > ./</path>
<path > ../</path>
<path > ../../common/inc</path>
</includepaths>
<keyconcepts >
<concept level= "basic" > CUDA Runtime API</concept>
<concept level= "basic" > Linear Algebra</concept>
<concept level= "basic" > CPP11 CUDA</concept>
</keyconcepts>
<keywords >
<keyword > CUDA</keyword>
<keyword > matrix multiply</keyword>
<keyword > Async copy</keyword>
<keyword > CPP11</keyword>
<keyword > GCC 5.0.0</keyword>
</keywords>
<libraries >
</libraries>
<librarypaths >
</librarypaths>
<nsight_eclipse > true</nsight_eclipse>
<primary_file > globalToShmemAsyncCopy.cu</primary_file>
<required_dependencies >
<dependency > CPP11</dependency>
</required_dependencies>
<scopes >
<scope > 1:CUDA Basic Topics</scope>
<scope > 3:Linear Algebra</scope>
</scopes>
<sm-arch > sm35</sm-arch>
<sm-arch > sm37</sm-arch>
<sm-arch > sm50</sm-arch>
<sm-arch > sm52</sm-arch>
<sm-arch > sm60</sm-arch>
<sm-arch > sm61</sm-arch>
<sm-arch > sm70</sm-arch>
<sm-arch > sm72</sm-arch>
<sm-arch > sm75</sm-arch>
<sm-arch > sm80</sm-arch>
2020-09-16 02:15:56 +08:00
<sm-arch > sm86</sm-arch>
2020-05-19 00:52:06 +08:00
<supported_envs >
<env >
<arch > x86_64</arch>
<platform > linux</platform>
</env>
<env >
<arch > x86_64</arch>
<platform > macosx</platform>
</env>
<env >
<arch > arm</arch>
</env>
<env >
<arch > ppc64le</arch>
<platform > linux</platform>
</env>
<env >
<arch > aarch64</arch>
<platform > linux</platform>
</env>
<env >
<arch > aarch64</arch>
<platform > qnx</platform>
</env>
<env >
<platform > windows7</platform>
</env>
</supported_envs>
<supported_sm_architectures >
<include > all</include>
</supported_sm_architectures>
<title > Global Memory to Shared Memory Async Copy</title>
</entry>