cuda-samples/Samples/2_Concepts_and_Techniques/reduction/NsightEclipse.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
  <name>reduction</name>
  <cflags>
    <flag>--std=c++11</flag>
  </cflags>
  <cuda_api_list>
    <toolkit>cudaFree</toolkit>
    <toolkit>cudaDeviceSynchronize</toolkit>
    <toolkit>cudaMalloc</toolkit>
    <toolkit>cudaSetDevice</toolkit>
    <toolkit>cudaMemcpy</toolkit>
    <toolkit>cudaGetDeviceProperties</toolkit>
    <toolkit>cudaGetDevice</toolkit>
  </cuda_api_list>
  <description><![CDATA[A parallel sum reduction that computes the sum of a large arrays of values. This sample demonstrates several important optimization strategies for Data-Parallel Algorithms like reduction using shared memory, __shfl_down_sync, __reduce_add_sync and cooperative_groups reduce.]]></description>
  <devicecompilation>whole</devicecompilation>
  <includepaths>
    <path>./</path>
    <path>../</path>
    <path>../../../Common</path>
  </includepaths>
  <keyconcepts>
    <concept level="advanced">Data-Parallel Algorithms</concept>
    <concept level="advanced">Performance Strategies</concept>
  </keyconcepts>
  <keywords>
    <keyword>CUDA</keyword>
    <keyword>GPGPU</keyword>
    <keyword>Parallel Reduction</keyword>
    <keyword>CPP11</keyword>
  </keywords>
  <libraries>
  </libraries>
  <librarypaths>
  </librarypaths>
  <nsight_eclipse>true</nsight_eclipse>
  <primary_file>reduction.cpp</primary_file>
  <qatests>
    <qatest>-kernel 0</qatest>
    <qatest>-kernel 1</qatest>
    <qatest>-kernel 2</qatest>
    <qatest>-kernel 3</qatest>
    <qatest>-kernel 4</qatest>
    <qatest>-kernel 5</qatest>
    <qatest>-kernel 6</qatest>
  </qatests>
  <required_dependencies>
    <dependency>CPP11</dependency>
  </required_dependencies>
  <scopes>
    <scope>1:CUDA Advanced Topics</scope>
    <scope>1:Data-Parallel Algorithms</scope>
    <scope>1:Performance Strategies</scope>
  </scopes>
  <sm-arch>sm35</sm-arch>
  <sm-arch>sm37</sm-arch>
  <sm-arch>sm50</sm-arch>
  <sm-arch>sm52</sm-arch>
  <sm-arch>sm53</sm-arch>
  <sm-arch>sm60</sm-arch>
  <sm-arch>sm61</sm-arch>
  <sm-arch>sm70</sm-arch>
  <sm-arch>sm72</sm-arch>
  <sm-arch>sm75</sm-arch>
  <sm-arch>sm80</sm-arch>
  <sm-arch>sm86</sm-arch>
  <sm-arch>sm87</sm-arch>
  <supported_envs>
    <env>
      <arch>x86_64</arch>
      <platform>linux</platform>
    </env>
    <env>
      <platform>windows7</platform>
    </env>
    <env>
      <arch>x86_64</arch>
      <platform>macosx</platform>
    </env>
    <env>
      <arch>arm</arch>
    </env>
    <env>
      <arch>sbsa</arch>
    </env>
    <env>
      <arch>ppc64le</arch>
      <platform>linux</platform>
    </env>
  </supported_envs>
  <supported_sm_architectures>
    <include>all</include>
  </supported_sm_architectures>
  <title>CUDA Parallel Reduction</title>
  <type>exe</type>
</entry>