<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE entry SYSTEM "SamplesInfo.dtd"> <entry> <name>p2pBandwidthLatencyTest</name> <cuda_api_list> <toolkit>cudaSetDevice</toolkit> <toolkit>cudaEventDestroy</toolkit> <toolkit>cudaOccupancyMaxPotentialBlockSize</toolkit> <toolkit>cudaCheckError</toolkit> <toolkit>cudaFreeHost</toolkit> <toolkit>cudaGetDeviceCount</toolkit> <toolkit>cudaDeviceCanAccessPeer</toolkit> <toolkit>cudaStreamCreateWithFlags</toolkit> <toolkit>cudaStreamDestroy</toolkit> <toolkit>cudaGetLastError</toolkit> <toolkit>cudaMemset</toolkit> <toolkit>cudaStreamWaitEvent</toolkit> <toolkit>cudaEventElapsedTime</toolkit> <toolkit>cudaEventCreate</toolkit> <toolkit>cudaHostAlloc</toolkit> <toolkit>cudaFree</toolkit> <toolkit>cudaGetErrorString</toolkit> <toolkit>cudaMemcpyPeerAsync</toolkit> <toolkit>cudaDeviceDisablePeerAccess</toolkit> <toolkit>cudaEventRecord</toolkit> <toolkit>cudaStreamSynchronize</toolkit> <toolkit>cudaDeviceEnablePeerAccess</toolkit> <toolkit>cudaMalloc</toolkit> <toolkit>cudaGetDeviceProperties</toolkit> </cuda_api_list> <description><![CDATA[This application demonstrates the CUDA Peer-To-Peer (P2P) data transfers between pairs of GPUs and computes latency and bandwidth. Tests on GPU pairs using P2P and without P2P are tested.]]></description> <devicecompilation>whole</devicecompilation> <includepaths> <path>./</path> <path>../</path> <path>../../../Common</path> </includepaths> <keyconcepts> <concept level="basic">Performance Strategies</concept> <concept level="basic">Asynchronous Data Transfers</concept> <concept level="basic">Unified Virtual Address Space</concept> <concept level="basic">Peer to Peer Data Transfers</concept> <concept level="basic">Multi-GPU</concept> </keyconcepts> <keywords> <keyword>CUDA</keyword> <keyword>Performance</keyword> <keyword>multi-GPU support</keyword> <keyword>peer to peer</keyword> </keywords> <libraries> </libraries> <librarypaths> </librarypaths> <nsight_eclipse>true</nsight_eclipse> <primary_file>p2pBandwidthLatencyTest.cu</primary_file> <scopes> <scope>1:CUDA Basic Topics</scope> <scope>1:Performance Strategies</scope> </scopes> <sm-arch>sm50</sm-arch> <sm-arch>sm52</sm-arch> <sm-arch>sm53</sm-arch> <sm-arch>sm60</sm-arch> <sm-arch>sm61</sm-arch> <sm-arch>sm70</sm-arch> <sm-arch>sm72</sm-arch> <sm-arch>sm75</sm-arch> <sm-arch>sm80</sm-arch> <sm-arch>sm86</sm-arch> <sm-arch>sm87</sm-arch> <sm-arch>sm89</sm-arch> <sm-arch>sm90</sm-arch> <supported_envs> <env> <arch>x86_64</arch> <platform>linux</platform> </env> <env> <platform>windows7</platform> </env> <env> <arch>x86_64</arch> <platform>macosx</platform> </env> <env> <arch>arm</arch> </env> <env> <arch>sbsa</arch> </env> <env> <arch>ppc64le</arch> <platform>linux</platform> </env> </supported_envs> <supported_sm_architectures> <include>all</include> </supported_sm_architectures> <title>Peer-to-Peer Bandwidth Latency Test with Multi-GPUs</title> <type>exe</type> </entry>