cuda-samples/Samples/2_Concepts_and_Techniques/shfl_scan/NsightEclipse.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
  <name>shfl_scan</name>
  <cflags>
    <flag>--std=c++11</flag>
    <flag>-O3</flag>
  </cflags>
  <cuda_api_list>
    <toolkit>cudaMemset</toolkit>
    <toolkit>cudaFree</toolkit>
    <toolkit>cudaEventRecord</toolkit>
    <toolkit>cudaMallocHost</toolkit>
    <toolkit>cudaEventCreate</toolkit>
    <toolkit>cudaEventElapsedTime</toolkit>
    <toolkit>cudaEventSynchronize</toolkit>
    <toolkit>cudaFreeHost</toolkit>
    <toolkit>cudaMalloc</toolkit>
    <toolkit>cudaMemcpy</toolkit>
    <toolkit>cudaGetDeviceProperties</toolkit>
    <toolkit>cudaGetDevice</toolkit>
  </cuda_api_list>
  <description><![CDATA[This example demonstrates how to use the shuffle intrinsic __shfl_up_sync to perform a scan operation across a thread block. ]]></description>
  <devicecompilation>whole</devicecompilation>
  <includepaths>
    <path>./</path>
    <path>../</path>
    <path>../../../Common</path>
  </includepaths>
  <keyconcepts>
    <concept level="advanced">Data-Parallel Algorithms</concept>
    <concept level="advanced">Performance Strategies</concept>
  </keyconcepts>
  <keywords>
    <keyword>GPGPU</keyword>
    <keyword>CPP11</keyword>
    <keyword>CUDA</keyword>
    <keyword>scan</keyword>
    <keyword>parallel prefix sum</keyword>
    <keyword>Data-Parallel Algorithms</keyword>
  </keywords>
  <libraries>
  </libraries>
  <librarypaths>
  </librarypaths>
  <nsight_eclipse>true</nsight_eclipse>
  <primary_file>shfl_scan.cu</primary_file>
  <required_dependencies>
    <dependency>CPP11</dependency>
  </required_dependencies>
  <scopes>
    <scope>1:CUDA Advanced Topics</scope>
    <scope>1:Data-Parallel Algorithms</scope>
    <scope>1:Performance Strategies</scope>
  </scopes>
  <sm-arch>sm35</sm-arch>
  <sm-arch>sm37</sm-arch>
  <sm-arch>sm50</sm-arch>
  <sm-arch>sm52</sm-arch>
  <sm-arch>sm53</sm-arch>
  <sm-arch>sm60</sm-arch>
  <sm-arch>sm61</sm-arch>
  <sm-arch>sm70</sm-arch>
  <sm-arch>sm72</sm-arch>
  <sm-arch>sm75</sm-arch>
  <sm-arch>sm80</sm-arch>
  <sm-arch>sm86</sm-arch>
  <sm-arch>sm87</sm-arch>
  <supported_envs>
    <env>
      <arch>x86_64</arch>
      <platform>linux</platform>
    </env>
    <env>
      <platform>windows7</platform>
    </env>
    <env>
      <arch>x86_64</arch>
      <platform>macosx</platform>
    </env>
    <env>
      <arch>arm</arch>
    </env>
    <env>
      <arch>aarch64</arch>
    </env>
    <env>
      <arch>sbsa</arch>
    </env>
    <env>
      <arch>ppc64le</arch>
      <platform>linux</platform>
    </env>
  </supported_envs>
  <supported_sm_architectures>
    <from>3.5</from>
  </supported_sm_architectures>
  <title>CUDA Parallel Prefix Sum with Shuffle Intrinsics (SHFL_Scan)</title>
  <type>exe</type>
</entry>