mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-12-01 09:19:16 +08:00
88 lines
2.7 KiB
XML
88 lines
2.7 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
<entry>
|
|
<name>UnifiedMemoryPerf</name>
|
|
<cuda_api_list>
|
|
<toolkit>cudaMemcpy</toolkit>
|
|
<toolkit>cudaStreamDestroy</toolkit>
|
|
<toolkit>cudaMemPrefetchAsync</toolkit>
|
|
<toolkit>cudaFree</toolkit>
|
|
<toolkit>cudaMallocHost</toolkit>
|
|
<toolkit>cudaMallocManaged</toolkit>
|
|
<toolkit>cudaStreamAttachMemAsync</toolkit>
|
|
<toolkit>cudaHostGetDevicePointer</toolkit>
|
|
<toolkit>cudaFreeHost</toolkit>
|
|
<toolkit>cudaStreamSynchronize</toolkit>
|
|
<toolkit>cudaMalloc</toolkit>
|
|
<toolkit>cudaMemcpyAsync</toolkit>
|
|
<toolkit>cudaStreamCreate</toolkit>
|
|
<toolkit>cudaGetDeviceProperties</toolkit>
|
|
</cuda_api_list>
|
|
<description><![CDATA[This sample demonstrates the performance comparision using matrix multiplication kernel of Unified Memory with/without hints and other types of memory like zero copy buffers, pageable, pagelocked memory performing synchronous and Asynchronous transfers on a single GPU.]]></description>
|
|
<devicecompilation>whole</devicecompilation>
|
|
<includepaths>
|
|
<path>./</path>
|
|
<path>../</path>
|
|
<path>../../../Common</path>
|
|
</includepaths>
|
|
<keyconcepts>
|
|
<concept level="basic">CUDA Systems Integration</concept>
|
|
<concept level="basic">Unified Memory</concept>
|
|
<concept level="basic">CUDA Streams and Events</concept>
|
|
<concept level="basic">Pinned System Paged Memory</concept>
|
|
</keyconcepts>
|
|
<keywords>
|
|
<keyword>CUDA</keyword>
|
|
<keyword>Unified Memory</keyword>
|
|
<keyword>Pinned Memory</keyword>
|
|
<keyword>Zero copy buffer</keyword>
|
|
<keyword>UVM</keyword>
|
|
<keyword>Streams</keyword>
|
|
</keywords>
|
|
<libraries>
|
|
</libraries>
|
|
<librarypaths>
|
|
</librarypaths>
|
|
<nsight_eclipse>true</nsight_eclipse>
|
|
<primary_file>matrixMultiplyPerf.cu</primary_file>
|
|
<required_dependencies>
|
|
<dependency>UVM</dependency>
|
|
</required_dependencies>
|
|
<scopes>
|
|
<scope>1:CUDA Basic Topics</scope>
|
|
<scope>1:CUDA Systems Integration</scope>
|
|
<scope>1:Unified Memory</scope>
|
|
</scopes>
|
|
<supported_envs>
|
|
<env>
|
|
<arch>x86_64</arch>
|
|
<platform>linux</platform>
|
|
</env>
|
|
<env>
|
|
<arch>x86_64</arch>
|
|
<platform>macosx</platform>
|
|
</env>
|
|
<env>
|
|
<platform>windows7</platform>
|
|
</env>
|
|
<env>
|
|
<arch>arm</arch>
|
|
</env>
|
|
<env>
|
|
<arch>sbsa</arch>
|
|
</env>
|
|
<env>
|
|
<arch>aarch64</arch>
|
|
</env>
|
|
<env>
|
|
<arch>ppc64le</arch>
|
|
<platform>linux</platform>
|
|
</env>
|
|
</supported_envs>
|
|
<supported_sm_architectures>
|
|
<from>3.5</from>
|
|
</supported_sm_architectures>
|
|
<title>Unified and other CUDA Memories Performance</title>
|
|
<type>exe</type>
|
|
</entry>
|