diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json new file mode 100644 index 00000000..f0066b0f --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/../../../Common" + ], + "defines": [], + "compilerPath": "/usr/local/cuda/bin/nvcc", + "cStandard": "gnu17", + "cppStandard": "gnu++14", + "intelliSenseMode": "linux-gcc-x64", + "configurationProvider": "ms-vscode.makefile-tools" + } + ], + "version": 4 +} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json new file mode 100644 index 00000000..c7eb54dc --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "nvidia.nsight-vscode-edition", + "ms-vscode.cpptools", + "ms-vscode.makefile-tools" + ] +} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json new file mode 100644 index 00000000..0c68d127 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/launch.json @@ -0,0 +1,10 @@ +{ + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "program": "${workspaceFolder}/graphConditionalNodes" + } + ] +} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/tasks.json b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/tasks.json new file mode 100644 index 00000000..4509aeb1 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "sample", + "type": "shell", + "command": "make dbg=1", + "problemMatcher": ["$nvcc"], + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/Makefile b/Samples/3_CUDA_Features/graphConditionalNodes/Makefile new file mode 100644 index 00000000..d32d3432 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/Makefile @@ -0,0 +1,350 @@ +################################################################################ +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +################################################################################ +# +# Makefile project only supported on Mac OS X and Linux Platforms) +# +################################################################################ + +# Location of the CUDA Toolkit +CUDA_PATH ?= /usr/local/cuda + +############################## +# start deprecated interface # +############################## +ifeq ($(x86_64),1) + $(info WARNING - x86_64 variable has been deprecated) + $(info WARNING - please use TARGET_ARCH=x86_64 instead) + TARGET_ARCH ?= x86_64 +endif +ifeq ($(ARMv7),1) + $(info WARNING - ARMv7 variable has been deprecated) + $(info WARNING - please use TARGET_ARCH=armv7l instead) + TARGET_ARCH ?= armv7l +endif +ifeq ($(aarch64),1) + $(info WARNING - aarch64 variable has been deprecated) + $(info WARNING - please use TARGET_ARCH=aarch64 instead) + TARGET_ARCH ?= aarch64 +endif +ifeq ($(ppc64le),1) + $(info WARNING - ppc64le variable has been deprecated) + $(info WARNING - please use TARGET_ARCH=ppc64le instead) + TARGET_ARCH ?= ppc64le +endif +ifneq ($(GCC),) + $(info WARNING - GCC variable has been deprecated) + $(info WARNING - please use HOST_COMPILER=$(GCC) instead) + HOST_COMPILER ?= $(GCC) +endif +ifneq ($(abi),) + $(error ERROR - abi variable has been removed) +endif +############################ +# end deprecated interface # +############################ + +# architecture +HOST_ARCH := $(shell uname -m) +TARGET_ARCH ?= $(HOST_ARCH) +ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) + ifneq ($(TARGET_ARCH),$(HOST_ARCH)) + ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) + TARGET_SIZE := 64 + else ifneq (,$(filter $(TARGET_ARCH),armv7l)) + TARGET_SIZE := 32 + endif + else + TARGET_SIZE := $(shell getconf LONG_BIT) + endif +else + $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) +endif + +# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. +ifeq ($(HOST_ARCH),aarch64) + ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) + HOST_ARCH := sbsa + TARGET_ARCH := sbsa + endif +endif + +ifneq ($(TARGET_ARCH),$(HOST_ARCH)) + ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) + $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) + endif +endif + +# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l +ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) + TARGET_ARCH = armv7l +endif + +# operating system +HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") +TARGET_OS ?= $(HOST_OS) +ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) + $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) +endif + +# host compiler +ifdef HOST_COMPILER + CUSTOM_HOST_COMPILER = 1 +endif + +ifeq ($(TARGET_OS),darwin) + ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) + HOST_COMPILER ?= clang++ + endif +else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) + ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) + ifeq ($(TARGET_OS),linux) + HOST_COMPILER ?= arm-linux-gnueabihf-g++ + else ifeq ($(TARGET_OS),qnx) + ifeq ($(QNX_HOST),) + $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) + endif + ifeq ($(QNX_TARGET),) + $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) + endif + export QNX_HOST + export QNX_TARGET + HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ + else ifeq ($(TARGET_OS),android) + HOST_COMPILER ?= arm-linux-androideabi-g++ + endif + else ifeq ($(TARGET_ARCH),aarch64) + ifeq ($(TARGET_OS), linux) + HOST_COMPILER ?= aarch64-linux-gnu-g++ + else ifeq ($(TARGET_OS),qnx) + ifeq ($(QNX_HOST),) + $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) + endif + ifeq ($(QNX_TARGET),) + $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) + endif + export QNX_HOST + export QNX_TARGET + HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ + else ifeq ($(TARGET_OS), android) + HOST_COMPILER ?= aarch64-linux-android-clang++ + endif + else ifeq ($(TARGET_ARCH),sbsa) + HOST_COMPILER ?= aarch64-linux-gnu-g++ + else ifeq ($(TARGET_ARCH),ppc64le) + HOST_COMPILER ?= powerpc64le-linux-gnu-g++ + endif +endif +HOST_COMPILER ?= g++ +NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) + +# internal flags +NVCCFLAGS := -m${TARGET_SIZE} +CCFLAGS := +LDFLAGS := + +# build flags +ifeq ($(TARGET_OS),darwin) + LDFLAGS += -rpath $(CUDA_PATH)/lib + CCFLAGS += -arch $(HOST_ARCH) +else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) + LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 + CCFLAGS += -mfloat-abi=hard +else ifeq ($(TARGET_OS),android) + LDFLAGS += -pie + CCFLAGS += -fpie -fpic -fexceptions +endif + +ifneq ($(TARGET_ARCH),$(HOST_ARCH)) + ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) + ifneq ($(TARGET_FS),) + GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) + ifeq ($(GCCVERSIONLTEQ46),1) + CCFLAGS += --sysroot=$(TARGET_FS) + endif + LDFLAGS += --sysroot=$(TARGET_FS) + LDFLAGS += -rpath-link=$(TARGET_FS)/lib + LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib + LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf + endif + endif + ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) + ifneq ($(TARGET_FS),) + GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) + ifeq ($(GCCVERSIONLTEQ46),1) + CCFLAGS += --sysroot=$(TARGET_FS) + endif + LDFLAGS += --sysroot=$(TARGET_FS) + LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib + LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu + LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib + LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu + LDFLAGS += --unresolved-symbols=ignore-in-shared-libs + CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm + CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu + endif + endif + ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) + NVCCFLAGS += -D_QNX_SOURCE + NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le + CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu + LDFLAGS += -lsocket + LDFLAGS += -L/usr/lib/aarch64-qnx-gnu + CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" + ifdef TARGET_OVERRIDE + LDFLAGS += -lslog2 + endif + + ifneq ($(TARGET_FS),) + LDFLAGS += -L$(TARGET_FS)/usr/lib + CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" + LDFLAGS += -L$(TARGET_FS)/usr/libnvidia + CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" + CCFLAGS += -I$(TARGET_FS)/../include + endif + endif +endif + +ifdef TARGET_OVERRIDE # cuda toolkit targets override + NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) +endif + +# Install directory of different arch +CUDA_INSTALL_TARGET_DIR := +ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) + CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) + CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) + CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) + CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) + CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) + CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ +else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) + CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ +else ifeq ($(TARGET_ARCH),ppc64le) + CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ +endif + +# Debug build flags +ifeq ($(dbg),1) + NVCCFLAGS += -g -G + BUILD_TYPE := debug +else + BUILD_TYPE := release +endif + +ALL_CCFLAGS := +ALL_CCFLAGS += $(NVCCFLAGS) +ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) +ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) + +SAMPLE_ENABLED := 1 + +# This sample is not supported on Mac OSX +ifeq ($(TARGET_OS),darwin) + $(info >>> WARNING - graphConditionalNodes is not supported on Mac OSX - waiving sample <<<) + SAMPLE_ENABLED := 0 +endif + +ALL_LDFLAGS := +ALL_LDFLAGS += $(ALL_CCFLAGS) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) +ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) + +# Common includes and paths for CUDA +INCLUDES := -I../../../Common +LIBRARIES := + +################################################################################ + +# Gencode arguments +ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) +SMS ?= 53 61 70 72 75 80 86 87 90 +else +SMS ?= 50 52 60 61 70 75 80 86 89 90 +endif + +ifeq ($(SMS),) +$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<) +SAMPLE_ENABLED := 0 +endif + +ifeq ($(GENCODE_FLAGS),) +# Generate SASS code for each SM architecture listed in $(SMS) +$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) + +# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility +HIGHEST_SM := $(lastword $(sort $(SMS))) +ifneq ($(HIGHEST_SM),) +GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) +endif +endif + +ALL_CCFLAGS += --threads 0 --std=c++11 + +ifeq ($(SAMPLE_ENABLED),0) +EXEC ?= @echo "[@]" +endif + +################################################################################ + +# Target rules +all: build + +build: graphConditionalNodes + +check.deps: +ifeq ($(SAMPLE_ENABLED),0) + @echo "Sample will be waived due to the above missing dependencies" +else + @echo "Sample is ready - all dependencies have been met" +endif + +graphConditionalNodes.o:graphConditionalNodes.cu + $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< + +graphConditionalNodes: graphConditionalNodes.o + $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) + $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) + $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) + +run: build + $(EXEC) ./graphConditionalNodes + +testrun: build + +clean: + rm -f graphConditionalNodes graphConditionalNodes.o + rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/graphConditionalNodes + +clobber: clean diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/NsightEclipse.xml b/Samples/3_CUDA_Features/graphConditionalNodes/NsightEclipse.xml new file mode 100644 index 00000000..9736cedb --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/NsightEclipse.xml @@ -0,0 +1,86 @@ + + + + graphConditionalNodes + + cudaDeviceSynchronize + cudaDriverGetVersion + cudaFree + cudaGraphAddNode + cudaGraphConditionalHandleCreate + cudaGraphCreate + cudaGraphDestroy + cudaGraphExecDestroy + cudaGraphInstantiate + cudaGraphLaunch + cudaGraphSetConditional + cudaMalloc + cudaMemset + cudaStreamBeginCapture + cudaStreamBeginCaptureToGraph + cudaStreamCreate + cudaStreamDestroy + cudaStreamEndCapture + cudaStreamGetCaptureInfo + + + whole + + ./ + ../ + ../../../Common + + + CUDA Graphs + + + CUDA + CUDA Graphs + Streams + GPGPU + Graph Conditional Nodes + + + + + + true + graphConditionalNodes.cu + + 1:CUDA Basic Topics + + sm60 + sm61 + sm70 + sm72 + sm75 + sm80 + sm86 + sm87 + sm89 + sm90 + + + x86_64 + linux + + + windows7 + + + arm + + + sbsa + + + ppc64le + linux + + + + 6.0 + + Graph Conditional Nodes + exe + diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/README.md b/Samples/3_CUDA_Features/graphConditionalNodes/README.md new file mode 100644 index 00000000..a67268b6 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/README.md @@ -0,0 +1,70 @@ +# graphConditionalNodes - Graph Conditional Nodes + +## Description + +A demonstration of CUDA graphs conditional nodes. + +## Key Concepts + +CUDA Graphs + +## Supported SM Architectures + +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) + +## Supported OSes + +Linux, Windows + +## Supported CPU Architecture + +x86_64, ppc64le, armv7l + +## CUDA APIs involved + +### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) +cudaDeviceSynchronize, cudaDriverGetVersion, cudaFree, cudaGraphAddNode, cudaGraphConditionalHandleCreate, cudaGraphCreate, cudaGraphDestroy, cudaGraphExecDestroy, cudaGraphInstantiate, cudaGraphLaunch, cudaGraphSetConditional, cudaMalloc, cudaMemset, cudaStreamBeginCapture, cudaStreamBeginCaptureToGraph, cudaStreamCreate, cudaStreamDestroy, cudaStreamEndCapture, cudaStreamGetCaptureInfo + +## Prerequisites + +Download and install the [CUDA Toolkit 12.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. + +## Build and Run + +### Windows +The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format: +``` +*_vs.sln - for Visual Studio +``` +Each individual sample has its own set of solution files in its directory: + +To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used. +> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details." + +### Linux +The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make: +``` +$ cd +$ make +``` +The samples makefiles can take advantage of certain options: +* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l. + By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
+`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
`$ make TARGET_ARCH=armv7l`
+ See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details. +* **dbg=1** - build with debug symbols + ``` + $ make dbg=1 + ``` +* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`. + ``` + $ make SMS="50 60" + ``` + +* **HOST_COMPILER=** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers. +``` + $ make HOST_COMPILER=g++ +``` + +## References (for more details) + diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu new file mode 100644 index 00000000..55990f04 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes.cu @@ -0,0 +1,356 @@ +/* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file demonstrates the usage of conditional graph nodes with + * a series of *simple* example graphs. + * + * For more information on conditional nodes, see the programming guide: + * + * https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#conditional-graph-nodes + * + */ + +// System includes +#include +#include + +// CUDA runtime +#include + +// helper functions and utilities to work with CUDA +#include +#include + +/* + * Create a graph containing two nodes. + * The first node, A, is a kernel and the second node, B, is a conditional IF node. + * The kernel sets the condition variable to true if a device memory location + * contains an odd number. Otherwise the condition variable is set to false. + * There is a single kernel, C, within the conditional body which prints a message. + * + * A -> B [ C ] + * + */ + +__global__ void ifGraphKernelA(char *dPtr, cudaGraphConditionalHandle handle) +{ + // In this example, condition is set if *dPtr is odd + unsigned int value = *dPtr & 0x01; + cudaGraphSetConditional(handle, value); + printf("GPU: Handle set to %d\n", value); +} + +// This kernel will only be executed if the condition is true +__global__ void ifGraphKernelC(void) +{ + printf("GPU: Hello from the GPU!\n"); +} + +// Setup and launch the graph +void simpleIfGraph(void) +{ + cudaGraph_t graph; + cudaGraphExec_t graphExec; + cudaGraphNode_t node; + + void *kernelArgs[2]; + + // Allocate a byte of device memory to use as input + char *dPtr; + checkCudaErrors(cudaMalloc((void**)&dPtr, 1)); + + printf("simpleIfGraph: Building graph...\n"); + cudaGraphCreate(&graph, 0); + + // Create conditional handle. + cudaGraphConditionalHandle handle; + cudaGraphConditionalHandleCreate(&handle, graph); + + // Use a kernel upstream of the conditional to set the handle value + cudaGraphNodeParams params = { cudaGraphNodeTypeKernel }; + params.kernel.func = (void *)ifGraphKernelA; + params.kernel.gridDim.x = params.kernel.gridDim.y = params.kernel.gridDim.z = 1; + params.kernel.blockDim.x = params.kernel.blockDim.y = params.kernel.blockDim.z = 1; + params.kernel.kernelParams = kernelArgs; + kernelArgs[0] = &dPtr; + kernelArgs[1] = &handle; + checkCudaErrors(cudaGraphAddNode(&node, graph, NULL, 0, ¶ms)); + + cudaGraphNodeParams cParams = { cudaGraphNodeTypeConditional }; + cParams.conditional.handle = handle; + cParams.conditional.type = cudaGraphCondTypeIf; + cParams.conditional.size = 1; + checkCudaErrors(cudaGraphAddNode(&node, graph, &node, 1, &cParams)); + + cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; + + // Populate the body of the conditional node + cudaGraphNode_t bodyNode; + params.kernel.func = (void *)ifGraphKernelC; + params.kernel.kernelParams = nullptr; + checkCudaErrors(cudaGraphAddNode(&bodyNode, bodyGraph, NULL, 0, ¶ms)); + + checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + + // Initialize device memory and launch the graph + checkCudaErrors(cudaMemset(dPtr, 0, 1)); // Set dPtr to 0 + printf("Host: Launching graph with conditional value set to false\n"); + checkCudaErrors(cudaGraphLaunch(graphExec, 0)); + checkCudaErrors(cudaDeviceSynchronize()); + + // Initialize device memory and launch the graph + checkCudaErrors(cudaMemset(dPtr, 1, 1)); // Set dPtr to 1 + printf("Host: Launching graph with conditional value set to true\n"); + checkCudaErrors(cudaGraphLaunch(graphExec, 0)); + checkCudaErrors(cudaDeviceSynchronize()); + + // Cleanup + checkCudaErrors(cudaGraphExecDestroy(graphExec)); + checkCudaErrors(cudaGraphDestroy(graph)); + checkCudaErrors(cudaFree(dPtr)); + + printf("simpleIfGraph: Complete\n\n"); +} + +/* + * Create a graph containing a single conditional while node. + * The default value of the conditional variable is set to true, so this + * effectively becomes a do-while loop as the conditional body will always + * execute at least once. The body of the conditional contains 3 kernel nodes: + * A [ B -> C -> D ] + * Nodes B and C are just dummy nodes for demonstrative purposes. Node D + * will decrement a device memory location and set the condition value to false + * when the value reaches zero, terminating the loop. + * In this example, stream capture is used to populate the conditional body. + */ + +// This kernel will only be executed if the condition is true +__global__ void doWhileEmptyKernel(void) +{ + printf("GPU: doWhileEmptyKernel()\n"); + return; +} + +__global__ void doWhileLoopKernel(char *dPtr, cudaGraphConditionalHandle handle) +{ + if (--(*dPtr) == 0) { + cudaGraphSetConditional(handle, 0); + } + printf("GPU: counter = %d\n", *dPtr); +} + +void simpleDoWhileGraph(void) +{ + cudaGraph_t graph; + cudaGraphExec_t graphExec; + cudaGraphNode_t node; + + // Allocate a byte of device memory to use as input + char *dPtr; + checkCudaErrors(cudaMalloc((void**)&dPtr, 1)); + + printf("simpleDoWhileGraph: Building graph...\n"); + checkCudaErrors(cudaGraphCreate(&graph, 0)); + + cudaGraphConditionalHandle handle; + checkCudaErrors(cudaGraphConditionalHandleCreate(&handle, graph, 1, cudaGraphCondAssignDefault)); + + cudaGraphNodeParams cParams = { cudaGraphNodeTypeConditional }; + cParams.conditional.handle = handle; + cParams.conditional.type = cudaGraphCondTypeWhile; + cParams.conditional.size = 1; + checkCudaErrors(cudaGraphAddNode(&node, graph, NULL, 0, &cParams)); + + cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; + + cudaStream_t captureStream; + checkCudaErrors(cudaStreamCreate(&captureStream)); + + checkCudaErrors(cudaStreamBeginCaptureToGraph(captureStream, bodyGraph, nullptr, nullptr, 0, cudaStreamCaptureModeRelaxed)); + doWhileEmptyKernel<<<1, 1, 0, captureStream>>>(); + doWhileEmptyKernel<<<1, 1, 0, captureStream>>>(); + doWhileLoopKernel<<<1, 1, 0, captureStream>>>(dPtr, handle); + checkCudaErrors(cudaStreamEndCapture(captureStream, nullptr)); + checkCudaErrors(cudaStreamDestroy(captureStream)); + + checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + + // Initialize device memory and launch the graph + checkCudaErrors(cudaMemset(dPtr, 10, 1)); // Set dPtr to 10 + printf("Host: Launching graph with loop counter set to 10\n"); + checkCudaErrors(cudaGraphLaunch(graphExec, 0)); + checkCudaErrors(cudaDeviceSynchronize()); + + // Cleanup + checkCudaErrors(cudaGraphExecDestroy(graphExec)); + checkCudaErrors(cudaGraphDestroy(graph)); + checkCudaErrors(cudaFree(dPtr)); + + printf("simpleDoWhileGraph: Complete\n\n"); +} + + +/* + * Create a graph containing a conditional while loop using stream capture. + * This demonstrates how to insert a conditional node into a stream which is + * being captured. The graph consists of a kernel node followed by a conditional + * while node which contains a single kernel node: + * + * A -> B [ C ] + * + * The same kernel will be used for both nodes A and C. This kernel will test + * a device memory location and set the condition when the location is non-zero. + * We must run the kernel before the loop as well as inside the loop in order + * to behave like a while loop. We need to evaluate the device memory location + * before the conditional node is evaluated in order to set the condition variable + * properly. Because we're using a kernel upstream of the conditional node, + * there is no need to use the handle default value to initialize the conditional + * value. + */ + +__global__ void capturedWhileKernel(char *dPtr, cudaGraphConditionalHandle handle) +{ + printf("GPU: counter = %d\n", *dPtr); + if (*dPtr) { + (*dPtr)--; + } + cudaGraphSetConditional(handle, *dPtr); +} + +__global__ void capturedWhileEmptyKernel(void) +{ + printf("GPU: capturedWhileEmptyKernel()\n"); + return; +} + +void capturedWhileGraph(void) +{ + cudaGraph_t graph; + cudaGraphExec_t graphExec; + + cudaStreamCaptureStatus status; + const cudaGraphNode_t *dependencies; + size_t numDependencies; + + // Allocate a byte of device memory to use as input + char *dPtr; + checkCudaErrors(cudaMalloc((void**)&dPtr, 1)); + + printf("capturedWhileGraph: Building graph...\n"); + cudaStream_t captureStream; + checkCudaErrors(cudaStreamCreate(&captureStream)); + + checkCudaErrors(cudaStreamBeginCapture(captureStream, cudaStreamCaptureModeRelaxed)); + + // Obtain the handle of the graph + checkCudaErrors(cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, &numDependencies)); + + // Create the conditional handle + cudaGraphConditionalHandle handle; + checkCudaErrors(cudaGraphConditionalHandleCreate(&handle, graph)); + + // Insert kernel node A + capturedWhileKernel<<<1, 1, 0, captureStream>>>(dPtr, handle); + + // Obtain the handle for node A + checkCudaErrors(cudaStreamGetCaptureInfo(captureStream, &status, NULL, &graph, &dependencies, &numDependencies)); + + // Insert conditional node B + cudaGraphNode_t node; + cudaGraphNodeParams cParams = { cudaGraphNodeTypeConditional }; + cParams.conditional.handle = handle; + cParams.conditional.type = cudaGraphCondTypeWhile; + cParams.conditional.size = 1; + checkCudaErrors(cudaGraphAddNode(&node, graph, dependencies, numDependencies, &cParams)); + + cudaGraph_t bodyGraph = cParams.conditional.phGraph_out[0]; + + // Update stream capture dependencies to account for the node we manually added + checkCudaErrors(cudaStreamUpdateCaptureDependencies(captureStream, &node, 1, cudaStreamSetCaptureDependencies)); + + // Insert kernel node D + capturedWhileEmptyKernel<<<1, 1, 0, captureStream>>>(); + + checkCudaErrors(cudaStreamEndCapture(captureStream, &graph)); + checkCudaErrors(cudaStreamDestroy(captureStream)); + + // Populate conditional body graph using stream capture + cudaStream_t bodyStream; + checkCudaErrors(cudaStreamCreate(&bodyStream)); + + checkCudaErrors(cudaStreamBeginCaptureToGraph(bodyStream, bodyGraph, nullptr, nullptr, 0, cudaStreamCaptureModeRelaxed)); + + // Insert kernel node C + capturedWhileKernel<<<1, 1, 0, bodyStream>>>(dPtr, handle); + checkCudaErrors(cudaStreamEndCapture(bodyStream, nullptr)); + checkCudaErrors(cudaStreamDestroy(bodyStream)); + + checkCudaErrors(cudaGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + + // Initialize device memory and launch the graph + // Device memory is zero, so the conditional node will not execute + checkCudaErrors(cudaMemset(dPtr, 0, 1)); // Set dPtr to 0 + printf("Host: Launching graph with loop counter set to 0\n"); + checkCudaErrors(cudaGraphLaunch(graphExec, 0)); + checkCudaErrors(cudaDeviceSynchronize()); + + // Initialize device memory and launch the graph + checkCudaErrors(cudaMemset(dPtr, 10, 1)); // Set dPtr to 10 + printf("Host: Launching graph with loop counter set to 10\n"); + checkCudaErrors(cudaGraphLaunch(graphExec, 0)); + checkCudaErrors(cudaDeviceSynchronize()); + + // Cleanup + checkCudaErrors(cudaGraphExecDestroy(graphExec)); + checkCudaErrors(cudaGraphDestroy(graph)); + checkCudaErrors(cudaFree(dPtr)); + + printf("capturedWhileGraph: Complete\n\n"); +} + + +int main(int argc, char **argv) { + int device = findCudaDevice(argc, (const char **)argv); + + int driverVersion = 0; + + cudaDriverGetVersion(&driverVersion); + printf("Driver version is: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + + if (driverVersion < 12030) { + printf("Waiving execution as driver does not support Graph Conditional Nodes\n"); + exit(EXIT_WAIVED); + } + + simpleIfGraph(); + simpleDoWhileGraph(); + capturedWhileGraph(); + + return 0; +} diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.sln b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.sln new file mode 100644 index 00000000..857f3194 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2017 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "graphConditionalNodes", "graphConditionalNodes_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.vcxproj b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.vcxproj new file mode 100644 index 00000000..67ba0b34 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2017.vcxproj @@ -0,0 +1,112 @@ + + + + $(VCTargetsPath)\BuildCustomizations + + + + Debug + x64 + + + Release + x64 + + + + {997E0757-EA74-4A4E-A0FC-47D8C8831A15} + graphConditionalNodes_vs2017 + graphConditionalNodes + + + + $([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0')) + $(LatestTargetPlatformVersion) + $(WindowsTargetPlatformVersion) + + + + Application + MultiByte + v141 + + + true + + + true + + + + + + + + + + + $(Platform)/$(Configuration)/ + $(IncludePath) + AllRules.ruleset + + + + + ../../../bin/win64/$(Configuration)/ + + + + Level3 + WIN32;_MBCS;%(PreprocessorDefinitions) + ./;$(CudaToolkitDir)/include;../../../Common; + + + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(CudaToolkitLibDir); + $(OutDir)/graphConditionalNodes.exe + + + compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90; + -Xcompiler "/wd 4819" --threads 0 + ./;../../../Common + WIN32 + + + + + Disabled + MultiThreadedDebug + + + true + Default + + + MTd + 64 + + + + + MaxSpeed + MultiThreaded + + + false + UseLinkTimeCodeGeneration + + + MT + 64 + + + + + + + + + + + diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.sln b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.sln new file mode 100644 index 00000000..5a2d4cdb --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2019 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "graphConditionalNodes", "graphConditionalNodes_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.vcxproj b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.vcxproj new file mode 100644 index 00000000..63d2f354 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2019.vcxproj @@ -0,0 +1,108 @@ + + + + $(VCTargetsPath)\BuildCustomizations + + + + Debug + x64 + + + Release + x64 + + + + {997E0757-EA74-4A4E-A0FC-47D8C8831A15} + graphConditionalNodes_vs2019 + graphConditionalNodes + + + + + Application + MultiByte + v142 + 10.0 + + + true + + + true + + + + + + + + + + + $(Platform)/$(Configuration)/ + $(IncludePath) + AllRules.ruleset + + + + + ../../../bin/win64/$(Configuration)/ + + + + Level3 + WIN32;_MBCS;%(PreprocessorDefinitions) + ./;$(CudaToolkitDir)/include;../../../Common; + + + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(CudaToolkitLibDir); + $(OutDir)/graphConditionalNodes.exe + + + compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90; + -Xcompiler "/wd 4819" --threads 0 + ./;../../../Common + WIN32 + + + + + Disabled + MultiThreadedDebug + + + true + Default + + + MTd + 64 + + + + + MaxSpeed + MultiThreaded + + + false + UseLinkTimeCodeGeneration + + + MT + 64 + + + + + + + + + + + diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.sln b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.sln new file mode 100644 index 00000000..2245b3b5 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2022 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "graphConditionalNodes", "graphConditionalNodes_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64 + {997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.vcxproj b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.vcxproj new file mode 100644 index 00000000..06c23e44 --- /dev/null +++ b/Samples/3_CUDA_Features/graphConditionalNodes/graphConditionalNodes_vs2022.vcxproj @@ -0,0 +1,108 @@ + + + + $(VCTargetsPath)\BuildCustomizations + + + + Debug + x64 + + + Release + x64 + + + + {997E0757-EA74-4A4E-A0FC-47D8C8831A15} + graphConditionalNodes_vs2022 + graphConditionalNodes + + + + + Application + MultiByte + v143 + 10.0 + + + true + + + true + + + + + + + + + + + $(Platform)/$(Configuration)/ + $(IncludePath) + AllRules.ruleset + + + + + ../../../bin/win64/$(Configuration)/ + + + + Level3 + WIN32;_MBCS;%(PreprocessorDefinitions) + ./;$(CudaToolkitDir)/include;../../../Common; + + + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + $(CudaToolkitLibDir); + $(OutDir)/graphConditionalNodes.exe + + + compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90; + -Xcompiler "/wd 4819" --threads 0 + ./;../../../Common + WIN32 + + + + + Disabled + MultiThreadedDebug + + + true + Default + + + MTd + 64 + + + + + MaxSpeed + MultiThreaded + + + false + UseLinkTimeCodeGeneration + + + MT + 64 + + + + + + + + + + +