2021-10-21 19:04:49 +08:00
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry >
<name > simpleMultiCopy</name>
<cuda_api_list >
2022-01-13 14:05:24 +08:00
<toolkit > cudaHostAlloc</toolkit>
2022-10-15 08:43:37 +08:00
<toolkit > cudaStreamDestroy</toolkit>
<toolkit > cudaMalloc</toolkit>
<toolkit > cudaMemcpyAsync</toolkit>
<toolkit > cudaFree</toolkit>
<toolkit > cudaSetDevice</toolkit>
2022-01-13 14:05:24 +08:00
<toolkit > cudaEventSynchronize</toolkit>
2022-10-15 08:43:37 +08:00
<toolkit > cudaDeviceSynchronize</toolkit>
<toolkit > cudaEventRecord</toolkit>
2022-01-13 14:05:24 +08:00
<toolkit > cudaFreeHost</toolkit>
2022-10-15 08:43:37 +08:00
<toolkit > cudaMemset</toolkit>
2022-01-13 14:05:24 +08:00
<toolkit > cudaEventDestroy</toolkit>
2022-10-15 08:43:37 +08:00
<toolkit > cudaEventElapsedTime</toolkit>
<toolkit > cudaStreamCreate</toolkit>
2022-01-13 14:05:24 +08:00
<toolkit > cudaGetDeviceProperties</toolkit>
2022-10-15 08:43:37 +08:00
<toolkit > cudaEventCreate</toolkit>
2021-10-21 19:04:49 +08:00
</cuda_api_list>
<description > <![CDATA[Supported in GPUs with Compute Capability 1.1, overlapping compute with one memcopy is possible from the host system. For Quadro and Tesla GPUs with Compute Capability 2.0, a second overlapped copy operation in either direction at full speed is possible (PCI-e is symmetric). This sample illustrates the usage of CUDA streams to achieve overlapping of kernel execution with data copies to and from the device.]]> </description>
<devicecompilation > whole</devicecompilation>
<files >
<file > doc</file>
<file > doc\C1060_CopyOverlap.cpj</file>
<file > doc\C1060_CopyOverlap_Session1_Context_0.csv</file>
<file > doc\GTX480_CopyOverlap.cpj</file>
<file > doc\GTX480_CopyOverlap_Session1_Context_0.csv</file>
</files>
<includepaths >
<path > ./</path>
<path > ../</path>
2022-01-13 14:05:24 +08:00
<path > ../../../Common</path>
2021-10-21 19:04:49 +08:00
</includepaths>
<keyconcepts >
<concept level= "basic" > CUDA Streams and Events</concept>
<concept level= "basic" > Asynchronous Data Transfers</concept>
<concept level= "basic" > Overlap Compute and Copy</concept>
<concept level= "basic" > GPU Performance</concept>
</keyconcepts>
<keywords >
<keyword > GPGPU</keyword>
</keywords>
<libraries >
</libraries>
<librarypaths >
</librarypaths>
<nsight_eclipse > true</nsight_eclipse>
<primary_file > simpleMultiCopy.cu</primary_file>
<scopes >
<scope > 1:CUDA Advanced Topics</scope>
<scope > 1:Performance Strategies</scope>
</scopes>
<sm-arch > sm50</sm-arch>
<sm-arch > sm52</sm-arch>
2022-01-13 14:05:24 +08:00
<sm-arch > sm53</sm-arch>
2021-10-21 19:04:49 +08:00
<sm-arch > sm60</sm-arch>
<sm-arch > sm61</sm-arch>
<sm-arch > sm70</sm-arch>
<sm-arch > sm72</sm-arch>
<sm-arch > sm75</sm-arch>
<sm-arch > sm80</sm-arch>
<sm-arch > sm86</sm-arch>
2022-01-13 14:05:24 +08:00
<sm-arch > sm87</sm-arch>
2022-10-15 08:43:37 +08:00
<sm-arch > sm90</sm-arch>
2021-10-21 19:04:49 +08:00
<supported_envs >
<env >
<arch > x86_64</arch>
<platform > linux</platform>
</env>
<env >
<platform > windows7</platform>
</env>
<env >
<arch > x86_64</arch>
<platform > macosx</platform>
</env>
<env >
<arch > arm</arch>
</env>
2022-01-13 14:05:24 +08:00
<env >
<arch > sbsa</arch>
</env>
2021-10-21 19:04:49 +08:00
<env >
<arch > ppc64le</arch>
<platform > linux</platform>
</env>
</supported_envs>
<supported_sm_architectures >
<include > all</include>
</supported_sm_architectures>
<title > Simple Multi Copy and Compute</title>
<type > exe</type>
</entry>