mirror of
				https://github.com/NVIDIA/cuda-samples.git
				synced 2025-10-31 03:07:49 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			88 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			XML
		
	
	
	
	
	
			
		
		
	
	
			88 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			XML
		
	
	
	
	
	
| <?xml version="1.0" encoding="UTF-8"?> 
 | |
| <!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
 | |
| <entry>
 | |
|   <name>UnifiedMemoryPerf</name>
 | |
|   <cuda_api_list>
 | |
|     <toolkit>cudaMemcpy</toolkit>
 | |
|     <toolkit>cudaStreamDestroy</toolkit>
 | |
|     <toolkit>cudaMemPrefetchAsync</toolkit>
 | |
|     <toolkit>cudaFree</toolkit>
 | |
|     <toolkit>cudaMallocHost</toolkit>
 | |
|     <toolkit>cudaMallocManaged</toolkit>
 | |
|     <toolkit>cudaStreamAttachMemAsync</toolkit>
 | |
|     <toolkit>cudaHostGetDevicePointer</toolkit>
 | |
|     <toolkit>cudaFreeHost</toolkit>
 | |
|     <toolkit>cudaStreamSynchronize</toolkit>
 | |
|     <toolkit>cudaMalloc</toolkit>
 | |
|     <toolkit>cudaMemcpyAsync</toolkit>
 | |
|     <toolkit>cudaStreamCreate</toolkit>
 | |
|     <toolkit>cudaGetDeviceProperties</toolkit>
 | |
|   </cuda_api_list>
 | |
|   <description><![CDATA[This sample demonstrates the performance comparision using matrix multiplication kernel of Unified Memory with/without hints and other types of memory like zero copy buffers, pageable, pagelocked memory performing synchronous and Asynchronous transfers on a single GPU.]]></description>
 | |
|   <devicecompilation>whole</devicecompilation>
 | |
|   <includepaths>
 | |
|     <path>./</path>
 | |
|     <path>../</path>
 | |
|     <path>../../../Common</path>
 | |
|   </includepaths>
 | |
|   <keyconcepts>
 | |
|     <concept level="basic">CUDA Systems Integration</concept>
 | |
|     <concept level="basic">Unified Memory</concept>
 | |
|     <concept level="basic">CUDA Streams and Events</concept>
 | |
|     <concept level="basic">Pinned System Paged Memory</concept>
 | |
|   </keyconcepts>
 | |
|   <keywords>
 | |
|     <keyword>CUDA</keyword>
 | |
|     <keyword>Unified Memory</keyword>
 | |
|     <keyword>Pinned Memory</keyword>
 | |
|     <keyword>Zero copy buffer</keyword>
 | |
|     <keyword>UVM</keyword>
 | |
|     <keyword>Streams</keyword>
 | |
|   </keywords>
 | |
|   <libraries>
 | |
|   </libraries>
 | |
|   <librarypaths>
 | |
|   </librarypaths>
 | |
|   <nsight_eclipse>true</nsight_eclipse>
 | |
|   <primary_file>matrixMultiplyPerf.cu</primary_file>
 | |
|   <required_dependencies>
 | |
|     <dependency>UVM</dependency>
 | |
|   </required_dependencies>
 | |
|   <scopes>
 | |
|     <scope>1:CUDA Basic Topics</scope>
 | |
|     <scope>1:CUDA Systems Integration</scope>
 | |
|     <scope>1:Unified Memory</scope>
 | |
|   </scopes>
 | |
|   <supported_envs>
 | |
|     <env>
 | |
|       <arch>x86_64</arch>
 | |
|       <platform>linux</platform>
 | |
|     </env>
 | |
|     <env>
 | |
|       <arch>x86_64</arch>
 | |
|       <platform>macosx</platform>
 | |
|     </env>
 | |
|     <env>
 | |
|       <platform>windows7</platform>
 | |
|     </env>
 | |
|     <env>
 | |
|       <arch>arm</arch>
 | |
|     </env>
 | |
|     <env>
 | |
|       <arch>sbsa</arch>
 | |
|     </env>
 | |
|     <env>
 | |
|       <arch>aarch64</arch>
 | |
|     </env>
 | |
|     <env>
 | |
|       <arch>ppc64le</arch>
 | |
|       <platform>linux</platform>
 | |
|     </env>
 | |
|   </supported_envs>
 | |
|   <supported_sm_architectures>
 | |
|     <from>3.5</from>
 | |
|   </supported_sm_architectures>
 | |
|   <title>Unified and other CUDA Memories Performance</title>
 | |
|   <type>exe</type>
 | |
| </entry>
 |