diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb4ff376..661987b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,22 @@
* `simpleD3D9Texture`
* `SLID3D10Texture`
* `VFlockingD3D10`
+ * `8_Platform_Specific/Tegra`
+ * Temporarily removed the following two samples pending updates:
+ * `nbody_screen` demonstrating the nbody sample in QNX
+ * `simpleGLES_screen` demonstrating GLES interop in QNX
+* Moved the following Tegra-specific samples to a dedicated subdirectory: `8_Platform_Specific/Tegra`
+ * `EGLSync_CUDAEvent_Interop`
+ * `cuDLAErrorReporting`
+ * `cuDLAHybridMode`
+ * `cuDLALayerwiseStatsHybrid`
+ * `cuDLALayerwiseStatsStandalone`
+ * `cuDLAStandaloneMode`
+ * `cudaNvSciNvMedia`
+ * `fluidsGLES`
+ * `nbody_opengles`
+ * `simpleGLES`
+ * `simpleGLES_EGLOutput`
diff --git a/Samples/0_Introduction/vectorAddMMAP/Makefile b/Samples/0_Introduction/vectorAddMMAP/Makefile
deleted file mode 100644
index 8bddeb07..00000000
--- a/Samples/0_Introduction/vectorAddMMAP/Makefile
+++ /dev/null
@@ -1,460 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on Mac OSX
-ifeq ($(TARGET_OS),darwin)
- $(info >>> WARNING - vectorAddMMAP is not supported on Mac OSX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on ARMv7
-ifeq ($(TARGET_ARCH),armv7l)
- $(info >>> WARNING - vectorAddMMAP is not supported on ARMv7 - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on aarch64
-ifeq ($(TARGET_ARCH),aarch64)
- $(info >>> WARNING - vectorAddMMAP is not supported on aarch64 - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on sbsa
-ifeq ($(TARGET_ARCH),sbsa)
- $(info >>> WARNING - vectorAddMMAP is not supported on sbsa - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-FATBIN_FILE := vectorAdd_kernel${TARGET_SIZE}.fatbin
-
-# Gencode arguments
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87 90
-else
-SMS ?= 50 52 60 61 70 75 80 86 89 90
-endif
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
-else
- ifeq ($(TARGET_ARCH),x86_64)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
- CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
- CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
- endif
-
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
- ifdef TARGET_OVERRIDE
- CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
- endif
- endif
-
- ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
- endif
-
- ifeq ($(HOST_ARCH),ppc64le)
- CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
- endif
-
- CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
- ifeq ("$(CUDALIB)","")
- $(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
- SAMPLE_ENABLED := 0
- else
- CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
- LIBRARIES += -L$(CUDALIB) -lcuda
- endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: vectorAddMMAP $(FATBIN_FILE)
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-$(FATBIN_FILE): vectorAdd_kernel.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -fatbin $<
- $(EXEC) mkdir -p data
- $(EXEC) cp -f $@ ./data
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp -f $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-multidevicealloc_memmap.o:multidevicealloc_memmap.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-vectorAddMMAP.o:vectorAddMMAP.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-vectorAddMMAP: multidevicealloc_memmap.o vectorAddMMAP.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./vectorAddMMAP
-
-testrun: build
-
-clean:
- rm -f vectorAddMMAP multidevicealloc_memmap.o vectorAddMMAP.o data/$(FATBIN_FILE) $(FATBIN_FILE)
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/vectorAddMMAP
-
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/$(FATBIN_FILE)
-
-clobber: clean
diff --git a/Samples/2_Concepts_and_Techniques/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/CMakeLists.txt
index 69caf48d..63cceff7 100644
--- a/Samples/2_Concepts_and_Techniques/CMakeLists.txt
+++ b/Samples/2_Concepts_and_Techniques/CMakeLists.txt
@@ -1,6 +1,5 @@
add_subdirectory(EGLStream_CUDA_CrossGPU)
add_subdirectory(EGLStream_CUDA_Interop)
-#add_subdirectory(EGLSync_CUDAEvent_Interop)
add_subdirectory(FunctionPointers)
add_subdirectory(MC_EstimatePiInlineP)
add_subdirectory(MC_EstimatePiInlineQ)
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile b/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
deleted file mode 100644
index 58a1f676..00000000
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/Makefile
+++ /dev/null
@@ -1,357 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87 90
-else
-SMS ?= 50 52 60 61 70 75 80 86 89 90
-endif
-
-ifeq ($(SMS),)
-$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: threadFenceReduction
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-threadFenceReduction.o:threadFenceReduction.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-threadFenceReduction: threadFenceReduction.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./threadFenceReduction
-
-testrun: build
-
-clean:
- rm -f threadFenceReduction threadFenceReduction.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/threadFenceReduction
-
-clobber: clean
diff --git a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml b/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
deleted file mode 100644
index 860bc4b9..00000000
--- a/Samples/2_Concepts_and_Techniques/threadFenceReduction/NsightEclipse.xml
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
-
- threadFenceReduction
-
- cudaMemcpy
- cudaFree
- cudaDeviceSynchronize
- cudaMalloc
- cudaGetDeviceProperties
-
-
- whole
-
- ./
- ../
- ../../../Common
-
-
- Cooperative Groups
- Data-Parallel Algorithms
- Performance Strategies
-
-
- reduction
-
-
-
-
-
- true
- threadFenceReduction.cu
-
- 1:CUDA Advanced Topics
- 1:Data-Parallel Algorithms
- 1:Performance Strategies
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
-
- x86_64
- linux
-
-
- windows7
-
-
- x86_64
- macosx
-
-
- arm
-
-
- sbsa
-
-
- ppc64le
- linux
-
-
-
- all
-
- threadFenceReduction
- exe
-
diff --git a/Samples/7_libNVVM/Makefile b/Samples/7_libNVVM/Makefile
deleted file mode 100644
index 9c890257..00000000
--- a/Samples/7_libNVVM/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-all: build
-
-.PHONY: build
-build:
- @mkdir -p build ; \
- cd build && \
- cmake -DCMAKE_INSTALL_PREFIX=../install .. && $(MAKE) install
-
-.PHONY: testrun
-testrun: build
- +@$(MAKE) -s -C build test
-
-# The cuda-samples toplevel Makefile will invoke the CMake generated 'clean'
-# target for each libNVVM samples.
-clean:
-
-.PHONY: clobber
-clobber:
- $(RM) -r install
- $(RM) -r build
diff --git a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt
index e71a0a04..0872a885 100644
--- a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt
+++ b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(cuDLAHybridMode)
add_subdirectory(cuDLALayerwiseStatsHybrid)
add_subdirectory(cuDLALayerwiseStatsStandalone)
add_subdirectory(cuDLAStandaloneMode)
+#add_subdirectory(EGLSync_CUDAEvent_Interop)
add_subdirectory(fluidsGLES)
add_subdirectory(nbody_opengles)
add_subdirectory(simpleGLES)
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/c_cpp_properties.json
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/extensions.json
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/extensions.json
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/launch.json
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/launch.json
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/tasks.json
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/.vscode/tasks.json
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/.vscode/tasks.json
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/CMakeLists.txt
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/CMakeLists.txt
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/EGLSync_CUDAEvent_Interop.cu
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/Makefile
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/Makefile
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/Makefile
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/NsightEclipse.xml
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/README.md
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/README.md
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/egl_common.h b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/egl_common.h
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/egl_common.h
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/findegl.mk
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/findegl.mk
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/findegl.mk
diff --git a/Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/graphics_interface.h b/Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h
similarity index 100%
rename from Samples/2_Concepts_and_Techniques/EGLSync_CUDAEvent_Interop/graphics_interface.h
rename to Samples/8_Platform_Specific/Tegra/EGLSync_CUDAEvent_Interop/graphics_interface.h
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json
deleted file mode 100644
index f0066b0f..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
- "configurations": [
- {
- "name": "Linux",
- "includePath": [
- "${workspaceFolder}/**",
- "${workspaceFolder}/../../../Common"
- ],
- "defines": [],
- "compilerPath": "/usr/local/cuda/bin/nvcc",
- "cStandard": "gnu17",
- "cppStandard": "gnu++14",
- "intelliSenseMode": "linux-gcc-x64",
- "configurationProvider": "ms-vscode.makefile-tools"
- }
- ],
- "version": 4
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json
deleted file mode 100644
index c7eb54dc..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
- "recommendations": [
- "nvidia.nsight-vscode-edition",
- "ms-vscode.cpptools",
- "ms-vscode.makefile-tools"
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json
deleted file mode 100644
index 65b45c08..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
- "configurations": [
- {
- "name": "CUDA C++: Launch",
- "type": "cuda-gdb",
- "request": "launch",
- "program": "${workspaceFolder}/nbody_screen"
- }
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json
deleted file mode 100644
index 4509aeb1..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- "version": "2.0.0",
- "tasks": [
- {
- "label": "sample",
- "type": "shell",
- "command": "make dbg=1",
- "problemMatcher": ["$nvcc"],
- "group": {
- "kind": "build",
- "isDefault": true
- }
- }
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt
deleted file mode 100644
index 73e9b2dd..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-cmake_minimum_required(VERSION 3.20)
-
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
-
-project(nbody_screen LANGUAGES C CXX CUDA)
-
-find_package(CUDAToolkit REQUIRED)
-
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
- # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
-endif()
-
-# Include directories and libraries
-include_directories(../../../Common)
-
-# Source file
-# Add target for nbody_screen
-add_executable(nbody_screen nbody_screen.cu)
-
-target_compile_options(nbody_screen PRIVATE $<$:--extended-lambda>)
-
-target_compile_features(nbody_screen PRIVATE cxx_std_17 cuda_std_17)
-
-set_target_properties(nbody_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile b/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile
deleted file mode 100644
index 74278b08..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile
+++ /dev/null
@@ -1,405 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on Linux x86_64
-ifeq ($(TARGET_OS),linux)
- ifeq ($(TARGET_ARCH),x86_64)
- $(info >>> WARNING - nbody_screen is not supported on Linux x86_64 - waiving sample <<<)
- SAMPLE_ENABLED := 0
- endif
-endif
-
-# This sample is not supported on Mac OSX
-ifeq ($(TARGET_OS),darwin)
- $(info >>> WARNING - nbody_screen is not supported on Mac OSX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on ARMv7
-ifeq ($(TARGET_ARCH),armv7l)
- $(info >>> WARNING - nbody_screen is not supported on ARMv7 - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on aarch64
-ifeq ($(TARGET_ARCH),aarch64)
- ifneq ($(TARGET_OS),qnx)
- $(info >>> WARNING - nbody_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
- SAMPLE_ENABLED := 0
- endif
-endif
-# This sample is not supported on sbsa
-ifeq ($(TARGET_ARCH),sbsa)
- $(info >>> WARNING - nbody_screen is not supported on sbsa - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Makefile include to help find GLES Libraries
-include ./findgleslib.mk
-
-# OpenGLES specific libraries
-ifneq ($(TARGET_OS),darwin)
- LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
-endif
-
-# Gencode arguments
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87 90
-else
-SMS ?= 50 52 60 61 70 75 80 86 89 90
-endif
-
-ifeq ($(SMS),)
-$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += -ftz=true --threads 0 --std=c++11
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: nbody_screen
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-bodysystemcuda.o:bodysystemcuda.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-nbody_screen.o:nbody_screen.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-render_particles.o:render_particles.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-nbody_screen: bodysystemcuda.o nbody_screen.o render_particles.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./nbody_screen
-
-testrun: build
- $(EXEC) ./nbody_screen -benchmark -compare -cpu
-
-clean:
- rm -f nbody_screen bodysystemcuda.o nbody_screen.o render_particles.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/nbody_screen
-
-clobber: clean
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml b/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml
deleted file mode 100644
index 28d52f31..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml
+++ /dev/null
@@ -1,92 +0,0 @@
-
-
-
- nbody_screen
-
- -ftz=true
-
-
- cudaGraphicsUnmapResources
- cudaSetDeviceFlags
- cudaGraphicsResourceSetMapFlags
- cudaGraphicsResourceGetMappedPointer
- cudaGraphicsMapResources
- cudaSetDevice
- cudaEventSynchronize
- cudaGetDeviceProperties
- cudaDeviceSynchronize
- cudaEventRecord
- cudaGetDevice
- cudaMemcpyToSymbol
- cudaStreamQuery
- cudaEventDestroy
- cudaEventElapsedTime
- cudaGetDeviceCount
- cudaEventCreate
-
-
- whole
-
- ./galaxy_20K.bin
-
-
- ./
- ../
- ../../../Common
-
-
- Graphics Interop
- Data Parallel Algorithms
- Physically-Based Simulation
-
-
- CUDA
- GPGPU
- n-body
- simulation
- astrophysics
- OpenGL ES
-
-
-
-
-
- true
- nbody.cpp
-
- -benchmark -compare -cpu
-
-
- screen
- GLES
-
-
- 2:Graphics Interop
- 1:CUDA Advanced Topics
- 1:Data-Parallel Algorithms
- 3:Physically-Based Simulation
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
-
- qnx
-
-
-
- all
-
- CUDA N-Body Simulation on Screen
- exe
-
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md b/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md
deleted file mode 100644
index 3d8a057a..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# nbody_screen - CUDA N-Body Simulation on Screen
-
-## Description
-
-This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.
-
-## Key Concepts
-
-Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
-
-## Supported SM Architectures
-
-[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
-
-## Supported OSes
-
-QNX
-
-## Supported CPU Architecture
-
-aarch64
-
-## CUDA APIs involved
-
-### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
-
-## Dependencies needed to build/run
-[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
-
-## Prerequisites
-
-Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
-Make sure the dependencies mentioned in [Dependencies]() section above are installed.
-
-## References (for more details)
-
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h
deleted file mode 100644
index 2d11c1ed..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h
+++ /dev/null
@@ -1,286 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BODYSYSTEM_H__
-#define __BODYSYSTEM_H__
-
-#include
-
-enum NBodyConfig {
- NBODY_CONFIG_RANDOM,
- NBODY_CONFIG_SHELL,
- NBODY_CONFIG_EXPAND,
- NBODY_NUM_CONFIGS
-};
-
-enum BodyArray {
- BODYSYSTEM_POSITION,
- BODYSYSTEM_VELOCITY,
-};
-
-template
-struct vec3 {
- typedef float Type;
-}; // dummy
-template <>
-struct vec3 {
- typedef float3 Type;
-};
-template <>
-struct vec3 {
- typedef double3 Type;
-};
-
-template
-struct vec4 {
- typedef float Type;
-}; // dummy
-template <>
-struct vec4 {
- typedef float4 Type;
-};
-template <>
-struct vec4 {
- typedef double4 Type;
-};
-
-class string;
-
-// BodySystem abstract base class
-template
-class BodySystem {
- public: // methods
- BodySystem(int numBodies) {}
- virtual ~BodySystem() {}
-
- virtual void loadTipsyFile(const std::string &filename) = 0;
-
- virtual void update(T deltaTime) = 0;
-
- virtual void setSoftening(T softening) = 0;
- virtual void setDamping(T damping) = 0;
-
- virtual T *getArray(BodyArray array) = 0;
- virtual void setArray(BodyArray array, const T *data) = 0;
-
- virtual unsigned int getCurrentReadBuffer() const = 0;
-
- virtual unsigned int getNumBodies() const = 0;
-
- virtual void synchronizeThreads() const {};
-
- protected: // methods
- BodySystem() {} // default constructor
-
- virtual void _initialize(int numBodies) = 0;
- virtual void _finalize() = 0;
-};
-
-inline float3 scalevec(float3 &vector, float scalar) {
- float3 rt = vector;
- rt.x *= scalar;
- rt.y *= scalar;
- rt.z *= scalar;
- return rt;
-}
-
-inline float normalize(float3 &vector) {
- float dist =
- sqrtf(vector.x * vector.x + vector.y * vector.y + vector.z * vector.z);
-
- if (dist > 1e-6) {
- vector.x /= dist;
- vector.y /= dist;
- vector.z /= dist;
- }
-
- return dist;
-}
-
-inline float dot(float3 v0, float3 v1) {
- return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
-}
-
-inline float3 cross(float3 v0, float3 v1) {
- float3 rt;
- rt.x = v0.y * v1.z - v0.z * v1.y;
- rt.y = v0.z * v1.x - v0.x * v1.z;
- rt.z = v0.x * v1.y - v0.y * v1.x;
- return rt;
-}
-
-// utility function
-template
-void randomizeBodies(NBodyConfig config, T *pos, T *vel, float *color,
- float clusterScale, float velocityScale, int numBodies,
- bool vec4vel) {
- switch (config) {
- default:
- case NBODY_CONFIG_RANDOM: {
- float scale = clusterScale * std::max(1.0f, numBodies / (1024.0f));
- float vscale = velocityScale * scale;
-
- int p = 0, v = 0;
- int i = 0;
-
- while (i < numBodies) {
- float3 point;
- // const int scale = 16;
- point.x = rand() / (float)RAND_MAX * 2 - 1;
- point.y = rand() / (float)RAND_MAX * 2 - 1;
- point.z = rand() / (float)RAND_MAX * 2 - 1;
- float lenSqr = dot(point, point);
-
- if (lenSqr > 1) continue;
-
- float3 velocity;
- velocity.x = rand() / (float)RAND_MAX * 2 - 1;
- velocity.y = rand() / (float)RAND_MAX * 2 - 1;
- velocity.z = rand() / (float)RAND_MAX * 2 - 1;
- lenSqr = dot(velocity, velocity);
-
- if (lenSqr > 1) continue;
-
- pos[p++] = point.x * scale; // pos.x
- pos[p++] = point.y * scale; // pos.y
- pos[p++] = point.z * scale; // pos.z
- pos[p++] = 1.0f; // mass
-
- vel[v++] = velocity.x * vscale; // pos.x
- vel[v++] = velocity.y * vscale; // pos.x
- vel[v++] = velocity.z * vscale; // pos.x
-
- if (vec4vel) vel[v++] = 1.0f; // inverse mass
-
- i++;
- }
- } break;
-
- case NBODY_CONFIG_SHELL: {
- float scale = clusterScale;
- float vscale = scale * velocityScale;
- float inner = 2.5f * scale;
- float outer = 4.0f * scale;
-
- int p = 0, v = 0;
- int i = 0;
-
- while (i < numBodies) // for(int i=0; i < numBodies; i++)
- {
- float x, y, z;
- x = rand() / (float)RAND_MAX * 2 - 1;
- y = rand() / (float)RAND_MAX * 2 - 1;
- z = rand() / (float)RAND_MAX * 2 - 1;
-
- float3 point = {x, y, z};
- float len = normalize(point);
-
- if (len > 1) continue;
-
- pos[p++] =
- point.x * (inner + (outer - inner) * rand() / (float)RAND_MAX);
- pos[p++] =
- point.y * (inner + (outer - inner) * rand() / (float)RAND_MAX);
- pos[p++] =
- point.z * (inner + (outer - inner) * rand() / (float)RAND_MAX);
- pos[p++] = 1.0f;
-
- x = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
- y = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
- z = 1.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
- float3 axis = {x, y, z};
- normalize(axis);
-
- if (1 - dot(point, axis) < 1e-6) {
- axis.x = point.y;
- axis.y = point.x;
- normalize(axis);
- }
-
- // if (point.y < 0) axis = scalevec(axis, -1);
- float3 vv = {(float)pos[4 * i], (float)pos[4 * i + 1],
- (float)pos[4 * i + 2]};
- vv = cross(vv, axis);
- vel[v++] = vv.x * vscale;
- vel[v++] = vv.y * vscale;
- vel[v++] = vv.z * vscale;
-
- if (vec4vel) vel[v++] = 1.0f;
-
- i++;
- }
- } break;
-
- case NBODY_CONFIG_EXPAND: {
- float scale = clusterScale * numBodies / (1024.f);
-
- if (scale < 1.0f) scale = clusterScale;
-
- float vscale = scale * velocityScale;
-
- int p = 0, v = 0;
-
- for (int i = 0; i < numBodies;) {
- float3 point;
-
- point.x = rand() / (float)RAND_MAX * 2 - 1;
- point.y = rand() / (float)RAND_MAX * 2 - 1;
- point.z = rand() / (float)RAND_MAX * 2 - 1;
-
- float lenSqr = dot(point, point);
-
- if (lenSqr > 1) continue;
-
- pos[p++] = point.x * scale; // pos.x
- pos[p++] = point.y * scale; // pos.y
- pos[p++] = point.z * scale; // pos.z
- pos[p++] = 1.0f; // mass
- vel[v++] = point.x * vscale; // pos.x
- vel[v++] = point.y * vscale; // pos.x
- vel[v++] = point.z * vscale; // pos.x
-
- if (vec4vel) vel[v++] = 1.0f; // inverse mass
-
- i++;
- }
- } break;
- }
-
- if (color) {
- int v = 0;
-
- for (int i = 0; i < numBodies; i++) {
- // const int scale = 16;
- color[v++] = rand() / (float)RAND_MAX;
- color[v++] = rand() / (float)RAND_MAX;
- color[v++] = rand() / (float)RAND_MAX;
- color[v++] = 1.0f;
- }
- }
-}
-
-#endif // __BODYSYSTEM_H__
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h
deleted file mode 100644
index 700e385a..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BODYSYSTEMCPU_H__
-#define __BODYSYSTEMCPU_H__
-
-#include "bodysystem.h"
-
-// CPU Body System
-template
-class BodySystemCPU : public BodySystem {
- public:
- BodySystemCPU(int numBodies);
- virtual ~BodySystemCPU();
-
- virtual void loadTipsyFile(const std::string &filename);
-
- virtual void update(T deltaTime);
-
- virtual void setSoftening(T softening) {
- m_softeningSquared = softening * softening;
- }
- virtual void setDamping(T damping) { m_damping = damping; }
-
- virtual T *getArray(BodyArray array);
- virtual void setArray(BodyArray array, const T *data);
-
- virtual unsigned int getCurrentReadBuffer() const { return 0; }
-
- virtual unsigned int getNumBodies() const { return m_numBodies; }
-
- protected: // methods
- BodySystemCPU() {} // default constructor
-
- virtual void _initialize(int numBodies);
- virtual void _finalize();
-
- void _computeNBodyGravitation();
- void _integrateNBodySystem(T deltaTime);
-
- protected: // data
- int m_numBodies;
- bool m_bInitialized;
-
- T *m_pos;
- T *m_vel;
- T *m_force;
-
- T m_softeningSquared;
- T m_damping;
-};
-
-#include "bodysystemcpu_impl.h"
-
-#endif // __BODYSYSTEMCPU_H__
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h
deleted file mode 100644
index 14130064..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "bodysystemcpu.h"
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include "tipsy.h"
-
-#ifdef OPENMP
-#include
-#endif
-
-template
-BodySystemCPU::BodySystemCPU(int numBodies)
- : m_numBodies(numBodies),
- m_bInitialized(false),
- m_force(0),
- m_softeningSquared(.00125f),
- m_damping(0.995f) {
- m_pos = 0;
- m_vel = 0;
-
- _initialize(numBodies);
-}
-
-template
-BodySystemCPU::~BodySystemCPU() {
- _finalize();
- m_numBodies = 0;
-}
-
-template
-void BodySystemCPU::_initialize(int numBodies) {
- assert(!m_bInitialized);
-
- m_numBodies = numBodies;
-
- m_pos = new T[m_numBodies * 4];
- m_vel = new T[m_numBodies * 4];
- m_force = new T[m_numBodies * 3];
-
- memset(m_pos, 0, m_numBodies * 4 * sizeof(T));
- memset(m_vel, 0, m_numBodies * 4 * sizeof(T));
- memset(m_force, 0, m_numBodies * 3 * sizeof(T));
-
- m_bInitialized = true;
-}
-
-template
-void BodySystemCPU::_finalize() {
- assert(m_bInitialized);
-
- delete[] m_pos;
- delete[] m_vel;
- delete[] m_force;
-
- m_bInitialized = false;
-}
-
-template
-void BodySystemCPU::loadTipsyFile(const std::string &filename) {
- if (m_bInitialized) _finalize();
-
- vector::Type> positions;
- vector::Type> velocities;
- vector ids;
-
- int nBodies = 0;
- int nFirst = 0, nSecond = 0, nThird = 0;
-
- read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
- nSecond, nThird);
-
- _initialize(nBodies);
-
- memcpy(m_pos, &positions[0], sizeof(vec4) * nBodies);
- memcpy(m_vel, &velocities[0], sizeof(vec4) * nBodies);
-}
-
-template
-void BodySystemCPU::update(T deltaTime) {
- assert(m_bInitialized);
-
- _integrateNBodySystem(deltaTime);
-
- // std::swap(m_currentRead, m_currentWrite);
-}
-
-template
-T *BodySystemCPU::getArray(BodyArray array) {
- assert(m_bInitialized);
-
- T *data = 0;
-
- switch (array) {
- default:
- case BODYSYSTEM_POSITION:
- data = m_pos;
- break;
-
- case BODYSYSTEM_VELOCITY:
- data = m_vel;
- break;
- }
-
- return data;
-}
-
-template
-void BodySystemCPU::setArray(BodyArray array, const T *data) {
- assert(m_bInitialized);
-
- T *target = 0;
-
- switch (array) {
- default:
- case BODYSYSTEM_POSITION:
- target = m_pos;
- break;
-
- case BODYSYSTEM_VELOCITY:
- target = m_vel;
- break;
- }
-
- memcpy(target, data, m_numBodies * 4 * sizeof(T));
-}
-
-template
-T sqrt_T(T x) {
- return sqrt(x);
-}
-
-template <>
-float sqrt_T(float x) {
- return sqrtf(x);
-}
-
-template
-void bodyBodyInteraction(T accel[3], T posMass0[4], T posMass1[4],
- T softeningSquared) {
- T r[3];
-
- // r_01 [3 FLOPS]
- r[0] = posMass1[0] - posMass0[0];
- r[1] = posMass1[1] - posMass0[1];
- r[2] = posMass1[2] - posMass0[2];
-
- // d^2 + e^2 [6 FLOPS]
- T distSqr = r[0] * r[0] + r[1] * r[1] + r[2] * r[2];
- distSqr += softeningSquared;
-
- // invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
- T invDist = (T)1.0 / (T)sqrt((double)distSqr);
- T invDistCube = invDist * invDist * invDist;
-
- // s = m_j * invDistCube [1 FLOP]
- T s = posMass1[3] * invDistCube;
-
- // (m_1 * r_01) / (d^2 + e^2)^(3/2) [6 FLOPS]
- accel[0] += r[0] * s;
- accel[1] += r[1] * s;
- accel[2] += r[2] * s;
-}
-
-template
-void BodySystemCPU::_computeNBodyGravitation() {
-#ifdef OPENMP
-#pragma omp parallel for
-#endif
-
- for (int i = 0; i < m_numBodies; i++) {
- int indexForce = 3 * i;
-
- T acc[3] = {0, 0, 0};
-
- // We unroll this loop 4X for a small performance boost.
- int j = 0;
-
- while (j < m_numBodies) {
- bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j],
- m_softeningSquared);
- j++;
- bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j],
- m_softeningSquared);
- j++;
- bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j],
- m_softeningSquared);
- j++;
- bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j],
- m_softeningSquared);
- j++;
- }
-
- m_force[indexForce] = acc[0];
- m_force[indexForce + 1] = acc[1];
- m_force[indexForce + 2] = acc[2];
- }
-}
-
-template
-void BodySystemCPU::_integrateNBodySystem(T deltaTime) {
- _computeNBodyGravitation();
-
-#ifdef OPENMP
-#pragma omp parallel for
-#endif
-
- for (int i = 0; i < m_numBodies; ++i) {
- int index = 4 * i;
- int indexForce = 3 * i;
-
- T pos[3], vel[3], force[3];
- pos[0] = m_pos[index + 0];
- pos[1] = m_pos[index + 1];
- pos[2] = m_pos[index + 2];
- T invMass = m_pos[index + 3];
-
- vel[0] = m_vel[index + 0];
- vel[1] = m_vel[index + 1];
- vel[2] = m_vel[index + 2];
-
- force[0] = m_force[indexForce + 0];
- force[1] = m_force[indexForce + 1];
- force[2] = m_force[indexForce + 2];
-
- // acceleration = force / mass;
- // new velocity = old velocity + acceleration * deltaTime
- vel[0] += (force[0] * invMass) * deltaTime;
- vel[1] += (force[1] * invMass) * deltaTime;
- vel[2] += (force[2] * invMass) * deltaTime;
-
- vel[0] *= m_damping;
- vel[1] *= m_damping;
- vel[2] *= m_damping;
-
- // new position = old position + velocity * deltaTime
- pos[0] += vel[0] * deltaTime;
- pos[1] += vel[1] * deltaTime;
- pos[2] += vel[2] * deltaTime;
-
- m_pos[index + 0] = pos[0];
- m_pos[index + 1] = pos[1];
- m_pos[index + 2] = pos[2];
-
- m_vel[index + 0] = vel[0];
- m_vel[index + 1] = vel[1];
- m_vel[index + 2] = vel[2];
- }
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu
deleted file mode 100644
index 1c95980e..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu
+++ /dev/null
@@ -1,276 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include
-#include
-
-//#include
-//#include
-
-// CUDA standard includes
-#include
-//#include
-
-#include "bodysystem.h"
-
-__constant__ float softeningSquared;
-__constant__ double softeningSquared_fp64;
-
-cudaError_t setSofteningSquared(float softeningSq) {
- return cudaMemcpyToSymbol(softeningSquared, &softeningSq, sizeof(float), 0,
- cudaMemcpyHostToDevice);
-}
-
-cudaError_t setSofteningSquared(double softeningSq) {
- return cudaMemcpyToSymbol(softeningSquared_fp64, &softeningSq, sizeof(double),
- 0, cudaMemcpyHostToDevice);
-}
-
-template
-struct SharedMemory {
- __device__ inline operator T *() {
- extern __shared__ int __smem[];
- return (T *)__smem;
- }
-
- __device__ inline operator const T *() const {
- extern __shared__ int __smem[];
- return (T *)__smem;
- }
-};
-
-template
-__device__ T rsqrt_T(T x) {
- return rsqrt(x);
-}
-
-template <>
-__device__ float rsqrt_T(float x) {
- return rsqrtf(x);
-}
-
-template <>
-__device__ double rsqrt_T(double x) {
- return rsqrt(x);
-}
-
-// Macros to simplify shared memory addressing
-#define SX(i) sharedPos[i + blockDim.x * threadIdx.y]
-// This macro is only used when multithreadBodies is true (below)
-#define SX_SUM(i, j) sharedPos[i + blockDim.x * j]
-
-template
-__device__ T getSofteningSquared() {
- return softeningSquared;
-}
-template <>
-__device__ double getSofteningSquared() {
- return softeningSquared_fp64;
-}
-
-template
-struct DeviceData {
- T *dPos[2]; // mapped host pointers
- T *dVel;
- cudaEvent_t event;
- unsigned int offset;
- unsigned int numBodies;
-};
-
-template
-__device__ typename vec3::Type bodyBodyInteraction(
- typename vec3::Type ai, typename vec4::Type bi,
- typename vec4::Type bj) {
- typename vec3::Type r;
-
- // r_ij [3 FLOPS]
- r.x = bj.x - bi.x;
- r.y = bj.y - bi.y;
- r.z = bj.z - bi.z;
-
- // distSqr = dot(r_ij, r_ij) + EPS^2 [6 FLOPS]
- T distSqr = r.x * r.x + r.y * r.y + r.z * r.z;
- distSqr += getSofteningSquared();
-
- // invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
- T invDist = rsqrt_T(distSqr);
- T invDistCube = invDist * invDist * invDist;
-
- // s = m_j * invDistCube [1 FLOP]
- T s = bj.w * invDistCube;
-
- // a_i = a_i + s * r_ij [6 FLOPS]
- ai.x += r.x * s;
- ai.y += r.y * s;
- ai.z += r.z * s;
-
- return ai;
-}
-
-template
-__device__ typename vec3::Type computeBodyAccel(
- typename vec4::Type bodyPos, typename vec4::Type *positions,
- int numTiles) {
- typename vec4::Type *sharedPos = SharedMemory::Type>();
-
- typename vec3::Type acc = {0.0f, 0.0f, 0.0f};
-
- for (int tile = 0; tile < numTiles; tile++) {
- sharedPos[threadIdx.x] = positions[tile * blockDim.x + threadIdx.x];
-
- __syncthreads();
-
- // This is the "tile_calculation" from the GPUG3 article.
-#pragma unroll 128
-
- for (unsigned int counter = 0; counter < blockDim.x; counter++) {
- acc = bodyBodyInteraction(acc, bodyPos, sharedPos[counter]);
- }
-
- __syncthreads();
- }
-
- return acc;
-}
-
-template
-__global__ void integrateBodies(typename vec4::Type *__restrict__ newPos,
- typename vec4::Type *__restrict__ oldPos,
- typename vec4::Type *vel,
- unsigned int deviceOffset,
- unsigned int deviceNumBodies, float deltaTime,
- float damping, int numTiles) {
- int index = blockIdx.x * blockDim.x + threadIdx.x;
-
- if (index >= deviceNumBodies) {
- return;
- }
-
- typename vec4::Type position = oldPos[deviceOffset + index];
-
- typename vec3::Type accel =
- computeBodyAccel(position, oldPos, numTiles);
-
- // acceleration = force / mass;
- // new velocity = old velocity + acceleration * deltaTime
- // note we factor out the body's mass from the equation, here and in
- // bodyBodyInteraction (because they cancel out). Thus here force ==
- // acceleration
- typename vec4::Type velocity = vel[deviceOffset + index];
-
- velocity.x += accel.x * deltaTime;
- velocity.y += accel.y * deltaTime;
- velocity.z += accel.z * deltaTime;
-
- velocity.x *= damping;
- velocity.y *= damping;
- velocity.z *= damping;
-
- // new position = old position + velocity * deltaTime
- position.x += velocity.x * deltaTime;
- position.y += velocity.y * deltaTime;
- position.z += velocity.z * deltaTime;
-
- // store new position and velocity
- newPos[deviceOffset + index] = position;
- vel[deviceOffset + index] = velocity;
-}
-
-template
-void integrateNbodySystem(DeviceData *deviceData,
- cudaGraphicsResource **pgres,
- unsigned int currentRead, float deltaTime,
- float damping, unsigned int numBodies,
- unsigned int numDevices, int blockSize,
- bool bUsePBO) {
- if (bUsePBO) {
- checkCudaErrors(cudaGraphicsResourceSetMapFlags(
- pgres[currentRead], cudaGraphicsMapFlagsReadOnly));
- checkCudaErrors(cudaGraphicsResourceSetMapFlags(
- pgres[1 - currentRead], cudaGraphicsMapFlagsWriteDiscard));
- checkCudaErrors(cudaGraphicsMapResources(2, pgres, 0));
- size_t bytes;
- checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
- (void **)&(deviceData[0].dPos[currentRead]), &bytes,
- pgres[currentRead]));
- checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
- (void **)&(deviceData[0].dPos[1 - currentRead]), &bytes,
- pgres[1 - currentRead]));
- }
-
- for (unsigned int dev = 0; dev != numDevices; dev++) {
- if (numDevices > 1) {
- cudaSetDevice(dev);
- }
-
- int numBlocks = (deviceData[dev].numBodies + blockSize - 1) / blockSize;
- int numTiles = (numBodies + blockSize - 1) / blockSize;
- int sharedMemSize = blockSize * 4 * sizeof(T); // 4 floats for pos
-
- integrateBodies<<>>(
- (typename vec4::Type *)deviceData[dev].dPos[1 - currentRead],
- (typename vec4::Type *)deviceData[dev].dPos[currentRead],
- (typename vec4::Type *)deviceData[dev].dVel, deviceData[dev].offset,
- deviceData[dev].numBodies, deltaTime, damping, numTiles);
-
- if (numDevices > 1) {
- checkCudaErrors(cudaEventRecord(deviceData[dev].event));
- // MJH: Hack on older driver versions to force kernel launches to flush!
- cudaStreamQuery(0);
- }
-
- // check if kernel invocation generated an error
- getLastCudaError("Kernel execution failed");
- }
-
- if (numDevices > 1) {
- for (unsigned int dev = 0; dev < numDevices; dev++) {
- checkCudaErrors(cudaEventSynchronize(deviceData[dev].event));
- }
- }
-
- if (bUsePBO) {
- checkCudaErrors(cudaGraphicsUnmapResources(2, pgres, 0));
- }
-}
-
-// Explicit specializations needed to generate code
-template void integrateNbodySystem(DeviceData *deviceData,
- cudaGraphicsResource **pgres,
- unsigned int currentRead,
- float deltaTime, float damping,
- unsigned int numBodies,
- unsigned int numDevices,
- int blockSize, bool bUsePBO);
-
-template void integrateNbodySystem(DeviceData *deviceData,
- cudaGraphicsResource **pgres,
- unsigned int currentRead,
- float deltaTime, float damping,
- unsigned int numBodies,
- unsigned int numDevices,
- int blockSize, bool bUsePBO);
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h
deleted file mode 100644
index 977d4856..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __BODYSYSTEMCUDA_H__
-#define __BODYSYSTEMCUDA_H__
-
-#include "bodysystem.h"
-
-template
-struct DeviceData {
- T *dPos[2]; // mapped host pointers
- T *dVel;
- cudaEvent_t event;
- unsigned int offset;
- unsigned int numBodies;
-};
-
-// CUDA BodySystem: runs on the GPU
-template
-class BodySystemCUDA : public BodySystem {
- public:
- BodySystemCUDA(unsigned int numBodies, unsigned int numDevices,
- unsigned int blockSize, bool usePBO, bool useSysMem = false);
- virtual ~BodySystemCUDA();
-
- virtual void loadTipsyFile(const std::string &filename);
-
- virtual void update(T deltaTime);
-
- virtual void setSoftening(T softening);
- virtual void setDamping(T damping);
-
- virtual T *getArray(BodyArray array);
- virtual void setArray(BodyArray array, const T *data);
-
- virtual unsigned int getCurrentReadBuffer() const {
- return m_pbo[m_currentRead];
- }
-
- virtual unsigned int getNumBodies() const { return m_numBodies; }
-
- protected: // methods
- BodySystemCUDA() {}
-
- virtual void _initialize(int numBodies);
- virtual void _finalize();
-
- protected: // data
- unsigned int m_numBodies;
- unsigned int m_numDevices;
- bool m_bInitialized;
-
- // Host data
- T *m_hPos[2];
- T *m_hVel;
-
- DeviceData *m_deviceData;
-
- bool m_bUsePBO;
- bool m_bUseSysMem;
- unsigned int m_SMVersion;
-
- T m_damping;
-
- unsigned int m_pbo[2];
- cudaGraphicsResource *m_pGRes[2];
- unsigned int m_currentRead;
- unsigned int m_currentWrite;
-
- unsigned int m_blockSize;
-};
-
-#include "bodysystemcuda_impl.h"
-
-#endif // __BODYSYSTEMCUDA_H__
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h
deleted file mode 100644
index 3e4c85d6..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h
+++ /dev/null
@@ -1,373 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-//#include
-
-#include
-
-template
-void integrateNbodySystem(DeviceData *deviceData,
- cudaGraphicsResource **pgres,
- unsigned int currentRead, float deltaTime,
- float damping, unsigned int numBodies,
- unsigned int numDevices, int blockSize, bool bUsePBO);
-
-cudaError_t setSofteningSquared(float softeningSq);
-cudaError_t setSofteningSquared(double softeningSq);
-
-template
-BodySystemCUDA::BodySystemCUDA(unsigned int numBodies,
- unsigned int numDevices,
- unsigned int blockSize, bool usePBO,
- bool useSysMem)
- : m_numBodies(numBodies),
- m_numDevices(numDevices),
- m_bInitialized(false),
- m_bUsePBO(usePBO),
- m_bUseSysMem(useSysMem),
- m_currentRead(0),
- m_currentWrite(1),
- m_blockSize(blockSize) {
- m_hPos[0] = m_hPos[1] = 0;
- m_hVel = 0;
-
- m_deviceData = 0;
-
- _initialize(numBodies);
- setSoftening(0.00125f);
- setDamping(0.995f);
-}
-
-template
-BodySystemCUDA::~BodySystemCUDA() {
- _finalize();
- m_numBodies = 0;
-}
-
-template
-void BodySystemCUDA::_initialize(int numBodies) {
- assert(!m_bInitialized);
-
- m_numBodies = numBodies;
-
- unsigned int memSize = sizeof(T) * 4 * numBodies;
-
- m_deviceData = new DeviceData[m_numDevices];
-
- // divide up the workload amongst Devices
- float *weights = new float[m_numDevices];
- int *numSms = new int[m_numDevices];
- float total = 0;
-
- for (unsigned int i = 0; i < m_numDevices; i++) {
- cudaDeviceProp props;
- checkCudaErrors(cudaGetDeviceProperties(&props, i));
-
- // Choose the weight based on the Compute Capability
- // We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for
- // this application (since a 15-SM GF100 is about 2X faster than a 30-SM
- // GT200).
- numSms[i] = props.multiProcessorCount;
- weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f);
- total += weights[i];
- }
-
- unsigned int offset = 0;
- unsigned int remaining = m_numBodies;
-
- for (unsigned int i = 0; i < m_numDevices; i++) {
- unsigned int count = (int)((weights[i] / total) * m_numBodies);
- unsigned int round = numSms[i] * 256;
- count = round * ((count + round - 1) / round);
-
- if (count > remaining) {
- count = remaining;
- }
-
- remaining -= count;
- m_deviceData[i].offset = offset;
- m_deviceData[i].numBodies = count;
- offset += count;
-
- if ((i == m_numDevices - 1) && (offset < m_numBodies - 1)) {
- m_deviceData[i].numBodies += m_numBodies - offset;
- }
- }
-
- delete[] weights;
- delete[] numSms;
-
- if (m_bUseSysMem) {
- checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize,
- cudaHostAllocMapped | cudaHostAllocPortable));
- checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize,
- cudaHostAllocMapped | cudaHostAllocPortable));
- checkCudaErrors(cudaHostAlloc((void **)&m_hVel, memSize,
- cudaHostAllocMapped | cudaHostAllocPortable));
-
- memset(m_hPos[0], 0, memSize);
- memset(m_hPos[1], 0, memSize);
- memset(m_hVel, 0, memSize);
-
- for (unsigned int i = 0; i < m_numDevices; i++) {
- if (m_numDevices > 1) {
- checkCudaErrors(cudaSetDevice(i));
- }
-
- checkCudaErrors(cudaEventCreate(&m_deviceData[i].event));
- checkCudaErrors(cudaHostGetDevicePointer(
- (void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0));
- checkCudaErrors(cudaHostGetDevicePointer(
- (void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0));
- checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel,
- (void *)m_hVel, 0));
- }
- } else {
- m_hPos[0] = new T[m_numBodies * 4];
- m_hVel = new T[m_numBodies * 4];
-
- memset(m_hPos[0], 0, memSize);
- memset(m_hVel, 0, memSize);
-
- checkCudaErrors(cudaEventCreate(&m_deviceData[0].event));
-
- if (m_bUsePBO) {
- // create the position pixel buffer objects for rendering
- // we will actually compute directly from this memory in CUDA too
- glGenBuffers(2, (GLuint *)m_pbo);
-
- for (int i = 0; i < 2; ++i) {
- glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]);
- glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW);
-
- int size = 0;
- glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
-
- if ((unsigned)size != memSize) {
- fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n");
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, 0);
- checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i], m_pbo[i],
- cudaGraphicsMapFlagsNone));
- }
- } else {
- checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize));
- checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize));
- }
-
- checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize));
- }
-
- m_bInitialized = true;
-}
-
-template
-void BodySystemCUDA::_finalize() {
- assert(m_bInitialized);
-
- if (m_bUseSysMem) {
- checkCudaErrors(cudaFreeHost(m_hPos[0]));
- checkCudaErrors(cudaFreeHost(m_hPos[1]));
- checkCudaErrors(cudaFreeHost(m_hVel));
-
- for (unsigned int i = 0; i < m_numDevices; i++) {
- cudaEventDestroy(m_deviceData[i].event);
- }
- } else {
- delete[] m_hPos[0];
- delete[] m_hPos[1];
- delete[] m_hVel;
-
- checkCudaErrors(cudaFree((void **)m_deviceData[0].dVel));
-
- if (m_bUsePBO) {
- checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[0]));
- checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[1]));
- glDeleteBuffers(2, (const GLuint *)m_pbo);
- } else {
- checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[0]));
- checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[1]));
- }
- }
-
- delete[] m_deviceData;
-
- m_bInitialized = false;
-}
-
-template
-void BodySystemCUDA::loadTipsyFile(const std::string &filename) {
- if (m_bInitialized) _finalize();
-
- std::vector::Type> positions;
- std::vector::Type> velocities;
- std::vector ids;
-
- int nBodies = 0;
- int nFirst = 0, nSecond = 0, nThird = 0;
-
- read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
- nSecond, nThird);
-
- _initialize(nBodies);
-
- setArray(BODYSYSTEM_POSITION, (T *)&positions[0]);
- setArray(BODYSYSTEM_VELOCITY, (T *)&velocities[0]);
-}
-
-template
-void BodySystemCUDA::setSoftening(T softening) {
- T softeningSq = softening * softening;
-
- for (unsigned int i = 0; i < m_numDevices; i++) {
- if (m_numDevices > 1) {
- checkCudaErrors(cudaSetDevice(i));
- }
-
- checkCudaErrors(setSofteningSquared(softeningSq));
- }
-}
-
-template
-void BodySystemCUDA::setDamping(T damping) {
- m_damping = damping;
-}
-
-template
-void BodySystemCUDA::update(T deltaTime) {
- assert(m_bInitialized);
-
- integrateNbodySystem(m_deviceData, m_pGRes, m_currentRead,
- (float)deltaTime, (float)m_damping, m_numBodies,
- m_numDevices, m_blockSize, m_bUsePBO);
-
- std::swap(m_currentRead, m_currentWrite);
-}
-
-template
-T *BodySystemCUDA::getArray(BodyArray array) {
- assert(m_bInitialized);
-
- T *hdata = 0;
- T *ddata = 0;
-
- cudaGraphicsResource *pgres = NULL;
-
- int currentReadHost = m_bUseSysMem ? m_currentRead : 0;
-
- switch (array) {
- default:
- case BODYSYSTEM_POSITION:
- hdata = m_hPos[currentReadHost];
- ddata = m_deviceData[0].dPos[m_currentRead];
-
- if (m_bUsePBO) {
- pgres = m_pGRes[m_currentRead];
- }
-
- break;
-
- case BODYSYSTEM_VELOCITY:
- hdata = m_hVel;
- ddata = m_deviceData[0].dVel;
- break;
- }
-
- if (!m_bUseSysMem) {
- if (pgres) {
- checkCudaErrors(
- cudaGraphicsResourceSetMapFlags(pgres, cudaGraphicsMapFlagsReadOnly));
- checkCudaErrors(cudaGraphicsMapResources(1, &pgres, 0));
- size_t bytes;
- checkCudaErrors(
- cudaGraphicsResourceGetMappedPointer((void **)&ddata, &bytes, pgres));
- }
-
- checkCudaErrors(cudaMemcpy(hdata, ddata, m_numBodies * 4 * sizeof(T),
- cudaMemcpyDeviceToHost));
-
- if (pgres) {
- checkCudaErrors(cudaGraphicsUnmapResources(1, &pgres, 0));
- }
- }
-
- return hdata;
-}
-
-template
-void BodySystemCUDA::setArray(BodyArray array, const T *data) {
- assert(m_bInitialized);
-
- m_currentRead = 0;
- m_currentWrite = 1;
-
- switch (array) {
- default:
- case BODYSYSTEM_POSITION: {
- if (m_bUsePBO) {
- glBindBuffer(GL_ARRAY_BUFFER, m_pbo[m_currentRead]);
- glBufferSubData(GL_ARRAY_BUFFER, 0, 4 * sizeof(T) * m_numBodies, data);
-
- int size = 0;
- glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
-
- if ((unsigned)size != 4 * (sizeof(T) * m_numBodies)) {
- fprintf(stderr, "WARNING: Pixel Buffer Object download failed!n");
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, 0);
- } else {
- if (m_bUseSysMem) {
- memcpy(m_hPos[m_currentRead], data, m_numBodies * 4 * sizeof(T));
- } else
- checkCudaErrors(cudaMemcpy(m_deviceData[0].dPos[m_currentRead], data,
- m_numBodies * 4 * sizeof(T),
- cudaMemcpyHostToDevice));
- }
- } break;
-
- case BODYSYSTEM_VELOCITY:
- if (m_bUseSysMem) {
- memcpy(m_hVel, data, m_numBodies * 4 * sizeof(T));
- } else
- checkCudaErrors(cudaMemcpy(m_deviceData[0].dVel, data,
- m_numBodies * 4 * sizeof(T),
- cudaMemcpyHostToDevice));
-
- break;
- }
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk b/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk
deleted file mode 100644
index 6da2f078..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk
+++ /dev/null
@@ -1,149 +0,0 @@
-################################################################################
-#
-# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
-#
-# NOTICE TO USER:
-#
-# This source code is subject to NVIDIA ownership rights under U.S. and
-# international Copyright laws.
-#
-# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
-# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
-# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
-# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
-# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
-# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-# OR PERFORMANCE OF THIS SOURCE CODE.
-#
-# U.S. Government End Users. This source code is a "commercial item" as
-# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
-# "commercial computer software" and "commercial computer software
-# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
-# and is provided to the U.S. Government only as a commercial end item.
-# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
-# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
-# source code with only those rights set forth herein.
-#
-################################################################################
-#
-# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
-# this is supported on Linux
-#
-################################################################################
-
-# Determine OS platform and unix distribution
-ifeq ("$(TARGET_OS)","linux")
- # first search lsb_release
- DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
- ifeq ("$(DISTRO)","")
- # second search and parse /etc/issue
- DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
- # ensure data from /etc/issue is valid
- ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
- DISTRO :=
- endif
- ifeq ("$(DISTRO)","")
- # third, we can search in /etc/os-release or /etc/{distro}-release
- DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
- endif
- endif
-endif
-
-ifeq ("$(TARGET_OS)","linux")
- # $(info) >> findgllib.mk -> LINUX path <<<)
- # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
- UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
- FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
- RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
- CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
- SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
- KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
- ifeq ("$(UBUNTU)","0")
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- GLPATH := /usr/arm-linux-gnueabihf/lib
- GLLINK := -L/usr/arm-linux-gnueabihf/lib
- ifneq ($(TARGET_FS),)
- GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
- GLPATH := /usr/aarch64-linux-gnu/lib
- GLLINK := -L/usr/aarch64-linux-gnu/lib
- ifneq ($(TARGET_FS),)
- GLPATH += $(TARGET_FS)/usr/lib
- GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
- GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- endif
- else
- UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
- ifneq ("$(UBUNTU_PKG_NAME)","")
- GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
- GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
- endif
-
- DFLT_PATH ?= /usr/lib
- endif
- endif
-
- ifeq ("$(SUSE)","0")
- GLPATH ?= /usr/X11R6/lib64
- GLLINK ?= -L/usr/X11R6/lib64
- DFLT_PATH ?= /usr/lib64
- else
- GLPATH ?= /usr/lib64/nvidia
- GLLINK ?= -L/usr/lib64/nvidia
- DFLT_PATH ?= /usr/lib64
- endif
-
- # find libGL, libGLU, libXi,
- EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
- GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
- X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
-
- ifeq ("$(EGLLIB)","")
- $(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(GLESLIB)","")
- $(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(X11LIB)","")
- $(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
- SAMPLE_ENABLED := 0
- endif
-
- HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
- else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
- HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
- endif
-
- EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
- EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
- GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
- X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
-
- ifeq ("$(EGLHEADER)","")
- $(info >>> WARNING - egl.h not found, please install egl.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(EGLEXTHEADER)","")
- $(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(GL31HEADER)","")
- $(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(X11HEADER)","")
- $(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
- SAMPLE_ENABLED := 0
- endif
-else
-endif
-
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin b/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin
deleted file mode 100644
index 193029b5..00000000
Binary files a/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin and /dev/null differ
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp b/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp
deleted file mode 100644
index 0b2ab770..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp
+++ /dev/null
@@ -1,1223 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "render_particles.h"
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-
-#include
-
-#include "bodysystemcuda.h"
-#include "bodysystemcpu.h"
-#include "cuda_runtime.h"
-
-int screen;
-screen_window_t screen_window;
-screen_context_t screen_context;
-
-EGLDisplay eglDisplay = EGL_NO_DISPLAY;
-EGLSurface eglSurface = EGL_NO_SURFACE;
-EGLContext eglContext = EGL_NO_CONTEXT;
-
-// view params
-int ox = 0, oy = 0;
-int buttonState = 0;
-float camera_trans[] = {0, -2, -150};
-float camera_rot[] = {0, 0, 0};
-float camera_trans_lag[] = {0, -2, -150};
-float camera_rot_lag[] = {0, 0, 0};
-const float inertia = 0.1f;
-
-bool benchmark = false;
-bool compareToCPU = false;
-bool QATest = false;
-int blockSize = 256;
-bool useHostMem = false;
-bool fp64 = false;
-bool useCpu = false;
-int numDevsRequested = 1;
-bool displayEnabled = true;
-unsigned int dispno = 0;
-unsigned int window_width = 720;
-unsigned int window_height = 480;
-bool bPause = false;
-bool bFullscreen = false;
-bool bDispInteractions = false;
-bool bSupportDouble = false;
-int flopsPerInteraction = 20;
-
-char deviceName[100];
-
-enum { M_VIEW = 0, M_MOVE };
-
-int numBodies = 16384;
-
-std::string tipsyFile = "";
-
-int numIterations = 0; // run until exit
-
-void computePerfStats(double &interactionsPerSecond, double &gflops,
- float milliseconds, int iterations) {
- // double precision uses intrinsic operation followed by refinement,
- // resulting in higher operation count per interaction.
- // (Note Astrophysicists use 38 flops per interaction no matter what,
- // based on "historical precedent", but they are using FLOP/s as a
- // measure of "science throughput". We are using it as a measure of
- // hardware throughput. They should really use interactions/s...
- // const int flopsPerInteraction = fp64 ? 30 : 20;
- interactionsPerSecond = (float)numBodies * (float)numBodies;
- interactionsPerSecond *= 1e-9 * iterations * 1000 / milliseconds;
- gflops = interactionsPerSecond * (float)flopsPerInteraction;
-}
-
-////////////////////////////////////////
-// Demo Parameters
-////////////////////////////////////////
-struct NBodyParams {
- float m_timestep;
- float m_clusterScale;
- float m_velocityScale;
- float m_softening;
- float m_damping;
- float m_pointSize;
- float m_x, m_y, m_z;
-
- void print() {
- printf("{ %f, %f, %f, %f, %f, %f, %f, %f, %f },\n", m_timestep,
- m_clusterScale, m_velocityScale, m_softening, m_damping, m_pointSize,
- m_x, m_y, m_z);
- }
-};
-
-NBodyParams demoParams[] = {
- {0.016f, 1.54f, 8.0f, 0.1f, 1.0f, 1.0f, 0, -2, -100},
- {0.016f, 0.68f, 20.0f, 0.1f, 1.0f, 0.8f, 0, -2, -30},
- {0.0006f, 0.16f, 1000.0f, 1.0f, 1.0f, 0.07f, 0, 0, -1.5f},
- {0.0006f, 0.16f, 1000.0f, 1.0f, 1.0f, 0.07f, 0, 0, -1.5f},
- {0.0019f, 0.32f, 276.0f, 1.0f, 1.0f, 0.07f, 0, 0, -5},
- {0.0016f, 0.32f, 272.0f, 0.145f, 1.0f, 0.08f, 0, 0, -5},
- {0.016000f, 6.040000f, 0.000000f, 1.000000f, 1.000000f, 0.760000f, 0, 0,
- -50},
-};
-
-int numDemos = sizeof(demoParams) / sizeof(NBodyParams);
-bool cycleDemo = true;
-int activeDemo = 0;
-float demoTime = 10000.0f; // ms
-StopWatchInterface *demoTimer = NULL, *timer = NULL;
-
-// run multiple iterations to compute an average sort time
-
-NBodyParams activeParams = demoParams[activeDemo];
-
-// The UI.
-bool bShowSliders = true;
-
-// fps
-static int fpsCount = 0;
-static int fpsLimit = 5;
-cudaEvent_t startEvent, stopEvent;
-cudaEvent_t hostMemSyncEvent;
-
-template
-class NBodyDemo {
- public:
- static void Create() { m_singleton = new NBodyDemo; }
- static void Destroy() { delete m_singleton; }
-
- static void init(int numBodies, int numDevices, int blockSize, bool usePBO,
- bool useHostMem, bool useCpu) {
- m_singleton->_init(numBodies, numDevices, blockSize, usePBO, useHostMem,
- useCpu);
- }
-
- static void reset(int numBodies, NBodyConfig config) {
- m_singleton->_reset(numBodies, config);
- }
-
- static void selectDemo(int index) { m_singleton->_selectDemo(index); }
-
- static bool compareResults(int numBodies) {
- return m_singleton->_compareResults(numBodies);
- }
-
- static void runBenchmark(int iterations) {
- m_singleton->_runBenchmark(iterations);
- }
-
- static void updateParams() {
- m_singleton->m_nbody->setSoftening(activeParams.m_softening);
- m_singleton->m_nbody->setDamping(activeParams.m_damping);
- }
-
- static void updateSimulation() {
- m_singleton->m_nbody->update(activeParams.m_timestep);
- }
-
- static void display() {
- m_singleton->m_renderer->setSpriteSize(activeParams.m_pointSize);
-
- if (useHostMem) {
- // This event sync is required because we are rendering from the host
- // memory that CUDA is writing. If we don't wait until CUDA is done
- // updating it, we will render partially updated data, resulting in a
- // jerky frame rate.
- if (!useCpu) {
- cudaEventSynchronize(hostMemSyncEvent);
- }
-
- m_singleton->m_renderer->setPositions(
- m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION),
- m_singleton->m_nbody->getNumBodies());
- } else {
- m_singleton->m_renderer->setPBO(
- m_singleton->m_nbody->getCurrentReadBuffer(),
- m_singleton->m_nbody->getNumBodies(), (sizeof(T) > 4));
- }
-
- // display particles
- m_singleton->m_renderer->display();
- }
-
- static void getArrays(T *pos, T *vel) {
- T *_pos = m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION);
- T *_vel = m_singleton->m_nbody->getArray(BODYSYSTEM_VELOCITY);
- memcpy(pos, _pos, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
- memcpy(vel, _vel, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T));
- }
-
- static void setArrays(const T *pos, const T *vel) {
- if (pos != m_singleton->m_hPos) {
- memcpy(m_singleton->m_hPos, pos, numBodies * 4 * sizeof(T));
- }
-
- if (vel != m_singleton->m_hVel) {
- memcpy(m_singleton->m_hVel, vel, numBodies * 4 * sizeof(T));
- }
-
- m_singleton->m_nbody->setArray(BODYSYSTEM_POSITION, m_singleton->m_hPos);
- m_singleton->m_nbody->setArray(BODYSYSTEM_VELOCITY, m_singleton->m_hVel);
-
- if (!benchmark && !useCpu && !compareToCPU) {
- m_singleton->_resetRenderer();
- }
- }
-
- private:
- static NBodyDemo *m_singleton;
-
- BodySystem *m_nbody;
- BodySystemCUDA *m_nbodyCuda;
- BodySystemCPU *m_nbodyCpu;
-
- ParticleRenderer *m_renderer;
-
- T *m_hPos;
- T *m_hVel;
- float *m_hColor;
-
- private:
- NBodyDemo()
- : m_nbody(0),
- m_nbodyCuda(0),
- m_nbodyCpu(0),
- m_renderer(0),
- m_hPos(0),
- m_hVel(0),
- m_hColor(0) {}
-
- ~NBodyDemo() {
- if (m_nbodyCpu) {
- delete m_nbodyCpu;
- }
-
- if (m_nbodyCuda) {
- delete m_nbodyCuda;
- }
-
- if (m_hPos) {
- delete[] m_hPos;
- }
-
- if (m_hVel) {
- delete[] m_hVel;
- }
-
- if (m_hColor) {
- delete[] m_hColor;
- }
-
- sdkDeleteTimer(&demoTimer);
-
- if (!benchmark && !compareToCPU) delete m_renderer;
- }
-
- void _init(int numBodies, int numDevices, int blockSize, bool bUsePBO,
- bool useHostMem, bool useCpu) {
- if (useCpu) {
- m_nbodyCpu = new BodySystemCPU(numBodies);
- m_nbody = m_nbodyCpu;
- m_nbodyCuda = 0;
- } else {
- m_nbodyCuda = new BodySystemCUDA(numBodies, numDevices, blockSize,
- bUsePBO, useHostMem);
- m_nbody = m_nbodyCuda;
- m_nbodyCpu = 0;
- }
-
- // allocate host memory
- m_hPos = new T[numBodies * 4];
- m_hVel = new T[numBodies * 4];
- m_hColor = new float[numBodies * 4];
-
- m_nbody->setSoftening(activeParams.m_softening);
- m_nbody->setDamping(activeParams.m_damping);
-
- if (useCpu) {
- sdkCreateTimer(&timer);
- sdkStartTimer(&timer);
- } else {
- checkCudaErrors(cudaEventCreate(&startEvent));
- checkCudaErrors(cudaEventCreate(&stopEvent));
- checkCudaErrors(cudaEventCreate(&hostMemSyncEvent));
- }
-
- if (!benchmark && !compareToCPU) {
- m_renderer = new ParticleRenderer(window_width, window_height);
- _resetRenderer();
- }
-
- sdkCreateTimer(&demoTimer);
- sdkStartTimer(&demoTimer);
- }
-
- void _reset(int numBodies, NBodyConfig config) {
- if (tipsyFile == "") {
- randomizeBodies(config, m_hPos, m_hVel, m_hColor,
- activeParams.m_clusterScale, activeParams.m_velocityScale,
- numBodies, true);
- setArrays(m_hPos, m_hVel);
- } else {
- m_nbody->loadTipsyFile(tipsyFile);
- ::numBodies = m_nbody->getNumBodies();
- }
- }
-
- void _resetRenderer() {
- if (fp64) {
- float color[4] = {0.4f, 0.8f, 0.1f, 1.0f};
- m_renderer->setBaseColor(color);
- } else {
- float color[4] = {1.0f, 0.6f, 0.3f, 1.0f};
- m_renderer->setBaseColor(color);
- }
-
- m_renderer->setColors(m_hColor, m_nbody->getNumBodies());
- m_renderer->setSpriteSize(activeParams.m_pointSize);
- m_renderer->setCameraPos(camera_trans);
- }
-
- void _selectDemo(int index) {
- assert(index < numDemos);
-
- activeParams = demoParams[index];
- camera_trans[0] = camera_trans_lag[0] = activeParams.m_x;
- camera_trans[1] = camera_trans_lag[1] = activeParams.m_y;
- camera_trans[2] = camera_trans_lag[2] = activeParams.m_z;
- reset(numBodies, NBODY_CONFIG_SHELL);
- sdkResetTimer(&demoTimer);
-
- m_singleton->m_renderer->setCameraPos(camera_trans);
- }
-
- bool _compareResults(int numBodies) {
- assert(m_nbodyCuda);
-
- bool passed = true;
-
- m_nbody->update(0.001f);
-
- {
- m_nbodyCpu = new BodySystemCPU(numBodies);
-
- m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos);
- m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel);
-
- m_nbodyCpu->update(0.001f);
-
- T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION);
- T *cpuPos = m_nbodyCpu->getArray(BODYSYSTEM_POSITION);
-
- T tolerance = 0.0005f;
-
- for (int i = 0; i < numBodies; i++) {
- if (fabs(cpuPos[i] - cudaPos[i]) > tolerance) {
- passed = false;
- printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]);
- }
- }
- }
- return passed;
- }
-
- void _runBenchmark(int iterations) {
- // once without timing to prime the device
- if (!useCpu) {
- m_nbody->update(activeParams.m_timestep);
- }
-
- if (useCpu) {
- sdkCreateTimer(&timer);
- sdkStartTimer(&timer);
- } else {
- checkCudaErrors(cudaEventRecord(startEvent, 0));
- }
-
- for (int i = 0; i < iterations; ++i) {
- m_nbody->update(activeParams.m_timestep);
- }
-
- float milliseconds = 0;
-
- if (useCpu) {
- sdkStopTimer(&timer);
- milliseconds = sdkGetTimerValue(&timer);
- sdkStartTimer(&timer);
- } else {
- checkCudaErrors(cudaEventRecord(stopEvent, 0));
- checkCudaErrors(cudaEventSynchronize(stopEvent));
- checkCudaErrors(
- cudaEventElapsedTime(&milliseconds, startEvent, stopEvent));
- }
-
- double interactionsPerSecond = 0;
- double gflops = 0;
- computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations);
-
- printf("%d bodies, total time for %d iterations: %.3f ms\n", numBodies,
- iterations, milliseconds);
- printf("= %.3f billion interactions per second\n", interactionsPerSecond);
- printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops,
- (sizeof(T) > 4) ? "double" : "single", flopsPerInteraction);
- }
-};
-
-void finalize() {
- if (!useCpu) {
- checkCudaErrors(cudaEventDestroy(startEvent));
- checkCudaErrors(cudaEventDestroy(stopEvent));
- checkCudaErrors(cudaEventDestroy(hostMemSyncEvent));
- }
-
- NBodyDemo::Destroy();
-
- if (bSupportDouble) NBodyDemo::Destroy();
-}
-
-template <>
-NBodyDemo *NBodyDemo::m_singleton = 0;
-template <>
-NBodyDemo *NBodyDemo::m_singleton = 0;
-
-template
-void switchDemoPrecision() {
- cudaDeviceSynchronize();
-
- fp64 = !fp64;
- flopsPerInteraction = fp64 ? 30 : 20;
-
- T_old *oldPos = new T_old[numBodies * 4];
- T_old *oldVel = new T_old[numBodies * 4];
-
- NBodyDemo::getArrays(oldPos, oldVel);
-
- // convert float to double
- T_new *newPos = new T_new[numBodies * 4];
- T_new *newVel = new T_new[numBodies * 4];
-
- for (int i = 0; i < numBodies * 4; i++) {
- newPos[i] = (T_new)oldPos[i];
- newVel[i] = (T_new)oldVel[i];
- }
-
- NBodyDemo::setArrays(newPos, newVel);
-
- cudaDeviceSynchronize();
-
- delete[] oldPos;
- delete[] oldVel;
- delete[] newPos;
- delete[] newVel;
-}
-
-void initGL(int *argc, char **argv) {
- EGLint configAttrs[] = {EGL_RED_SIZE,
- 1,
- EGL_GREEN_SIZE,
- 1,
- EGL_BLUE_SIZE,
- 1,
- EGL_DEPTH_SIZE,
- 16,
- EGL_SAMPLE_BUFFERS,
- 0,
- EGL_SAMPLES,
- 0,
- EGL_RENDERABLE_TYPE,
- EGL_OPENGL_ES2_BIT,
- EGL_ALPHA_SIZE,
- 1,
- EGL_NONE};
-
- EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE};
-
- EGLint windowAttrs[] = {EGL_NONE};
- EGLConfig *configList = NULL;
- EGLint configCount;
-
- screen_context = 0;
-
- screen_display_t *screenDisplayHandle = NULL;
-
- if (screen_create_context(&screen_context, 0)) {
- printf("Error creating screen context.\n");
- exit(EXIT_FAILURE);
- }
-
- screen = 0;
-
- eglDisplay = eglGetDisplay(0);
-
- if (eglDisplay == EGL_NO_DISPLAY) {
- printf("EGL failed to obtain display\n");
- exit(EXIT_FAILURE);
- }
-
- if (!eglInitialize(eglDisplay, 0, 0)) {
- printf("EGL failed to initialize\n");
- exit(EXIT_FAILURE);
- }
-
- if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) ||
- !configCount) {
- printf("EGL failed to return matching configs\n");
- exit(EXIT_FAILURE);
- }
-
- configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig));
-
- if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount,
- &configCount) ||
- !configCount) {
- printf("EGL failed to populate config list\n");
- exit(EXIT_FAILURE);
- }
-
- screen_window = 0;
- if (screen_create_window(&screen_window, screen_context)) {
- printf("Error creating screen window\n");
- exit(EXIT_FAILURE);
- }
-
- // query the total no of display available from QNX CAR2 screen
- int displayCount = 0;
- if (screen_get_context_property_iv(
- screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) {
- printf("Error getting context property\n");
- exit(EXIT_FAILURE);
- }
-
- screenDisplayHandle =
- (screen_display_t *)malloc(displayCount * sizeof(screen_display_t));
- if (!screenDisplayHandle) {
- printf("Error allocating screen display handle\n");
- exit(EXIT_FAILURE);
- }
-
- // query the display handle from QNX CAR2 screen
- if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS,
- (void **)screenDisplayHandle)) {
- printf("Error getting display handle\n");
- exit(EXIT_FAILURE);
- }
-
- int dispno_it;
- for (dispno_it = 0; dispno_it < displayCount; dispno_it++) {
- int active = 0;
- // Query the connected status from QNX CAR2 screen
- screen_get_display_property_iv(screenDisplayHandle[dispno_it],
- SCREEN_PROPERTY_ATTACHED, &active);
- if (active) {
- if (dispno == dispno_it) {
- // Map the window buffer to user requested display port
- screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY,
- (void **)&screenDisplayHandle[dispno]);
- break;
- }
- }
- }
-
- if (dispno_it == displayCount) {
- printf("Failed to set the requested display\n");
- exit(EXIT_FAILURE);
- }
-
- free(screenDisplayHandle);
-
- int format = SCREEN_FORMAT_RGBA8888;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT,
- &format)) {
- printf("Error setting SCREEN_PROPERTY_FORMAT\n");
- exit(EXIT_FAILURE);
- }
-
- int usage = (1 << 11);
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE,
- &usage)) {
- printf("Error setting SCREEN_PROPERTY_USAGE\n");
- exit(EXIT_FAILURE);
- }
-
- EGLint interval = 1;
- if (screen_set_window_property_iv(screen_window,
- SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) {
- printf("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n");
- exit(EXIT_FAILURE);
- }
-
- if (bFullscreen) {
- // QNX screen will use the full screen resolution by default
- int windowSize[2];
- if (screen_get_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE,
- windowSize)) {
- printf("Error getting default SCREEN_PROPERTY_SIZE\n");
- exit(EXIT_FAILURE);
- }
- } else {
- int windowSize[2];
- windowSize[0] = window_width;
- windowSize[1] = window_height;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE,
- windowSize)) {
- printf("Error setting SCREEN_PROPERTY_SIZE\n");
- exit(EXIT_FAILURE);
- }
- }
-
- int windowOffset[2];
- windowOffset[0] = 0;
- windowOffset[1] = 0;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION,
- windowOffset)) {
- printf("Error setting SCREEN_PROPERTY_POSITION\n");
- exit(EXIT_FAILURE);
- }
-
- if (screen_create_window_buffers(screen_window, 2)) {
- printf("Error creating two window buffers.\n");
- exit(EXIT_FAILURE);
- }
-
- eglSurface =
- eglCreateWindowSurface(eglDisplay, configList[0],
- (EGLNativeWindowType)screen_window, windowAttrs);
- if (!eglSurface) {
- printf("EGL couldn't create window\n");
- exit(EXIT_FAILURE);
- }
-
- eglBindAPI(EGL_OPENGL_ES_API);
-
- eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs);
- if (!eglContext) {
- printf("EGL couldn't create context\n");
- exit(EXIT_FAILURE);
- }
-
- if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) {
- printf("EGL couldn't make context/surface current\n");
- exit(EXIT_FAILURE);
- }
-
- EGLint contextRendererType;
- eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE,
- &contextRendererType);
-
- switch (contextRendererType) {
- case EGL_OPENGL_ES_API:
- printf("Using OpenGL ES API\n");
- break;
- case EGL_OPENGL_API:
- printf("Using OpenGL API - this is unsupported\n");
- exit(EXIT_FAILURE);
- case EGL_OPENVG_API:
- printf("Using OpenVG API - this is unsupported\n");
- exit(EXIT_FAILURE);
- default:
- printf("Unknown context type\n");
- exit(EXIT_FAILURE);
- }
-}
-
-void selectDemo(int activeDemo) {
- if (fp64) {
- NBodyDemo::selectDemo(activeDemo);
- } else {
- NBodyDemo::selectDemo(activeDemo);
- }
-}
-
-void updateSimulation() {
- if (fp64) {
- NBodyDemo::updateSimulation();
- } else {
- NBodyDemo::updateSimulation();
- }
-}
-
-void displayNBodySystem() {
- if (fp64) {
- NBodyDemo::display();
- } else {
- NBodyDemo::display();
- }
-}
-
-void display() {
- static double gflops = 0;
- static double ifps = 0;
- static double interactionsPerSecond = 0;
-
- // update the simulation
- if (!bPause) {
- if (cycleDemo && (sdkGetTimerValue(&demoTimer) > demoTime)) {
- activeDemo = (activeDemo + 1) % numDemos;
- selectDemo(activeDemo);
- }
-
- updateSimulation();
-
- if (!useCpu) {
- cudaEventRecord(hostMemSyncEvent,
- 0); // insert an event to wait on before rendering
- }
- }
-
- glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-
- if (displayEnabled) {
- // view transform
- for (int c = 0; c < 3; ++c) {
- camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia;
- camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia;
- }
-
- displayNBodySystem();
- }
-
- fpsCount++;
-
- // this displays the frame rate updated every second (independent of frame
- // rate)
- if (fpsCount >= fpsLimit) {
- char fps[256];
-
- float milliseconds = 1;
-
- // stop timer
- if (useCpu) {
- milliseconds = sdkGetTimerValue(&timer);
- sdkResetTimer(&timer);
- } else {
- checkCudaErrors(cudaEventRecord(stopEvent, 0));
- checkCudaErrors(cudaEventSynchronize(stopEvent));
- }
-
- milliseconds /= (float)fpsCount;
- computePerfStats(interactionsPerSecond, gflops, milliseconds, 1);
-
- ifps = 1.f / (milliseconds / 1000.f);
- sprintf(fps,
- "CUDA N-Body (%d bodies): "
- "%0.1f fps | %0.1f BIPS | %0.1f GFLOP/s | %s",
- numBodies, ifps, interactionsPerSecond, gflops,
- fp64 ? "double precision" : "single precision");
-
- fpsCount = 0;
- fpsLimit = (ifps > 1.f) ? (int)ifps : 1;
-
- if (bPause) {
- fpsLimit = 0;
- }
-
- // restart timer
- if (!useCpu) {
- checkCudaErrors(cudaEventRecord(startEvent, 0));
- }
- }
-}
-
-void updateParams() {
- if (fp64) {
- NBodyDemo::updateParams();
- } else {
- NBodyDemo::updateParams();
- }
-}
-
-// commented out to remove unused parameter warnings in Linux
-void key(unsigned char key, int /*x*/, int /*y*/) {
- switch (key) {
- case ' ':
- bPause = !bPause;
- break;
-
- case 27: // escape
- case 'q':
- case 'Q':
- finalize();
-
- exit(EXIT_SUCCESS);
- break;
-
- case 13: // return
- if (bSupportDouble) {
- if (fp64) {
- switchDemoPrecision();
- } else {
- switchDemoPrecision();
- }
-
- printf("> %s precision floating point simulation\n",
- fp64 ? "Double" : "Single");
- }
-
- break;
-
- case '`':
- bShowSliders = !bShowSliders;
- break;
-
- case 'g':
- case 'G':
- bDispInteractions = !bDispInteractions;
- break;
-
- case 'c':
- case 'C':
- cycleDemo = !cycleDemo;
- printf("Cycle Demo Parameters: %s\n", cycleDemo ? "ON" : "OFF");
- break;
-
- case '[':
- activeDemo =
- (activeDemo == 0) ? numDemos - 1 : (activeDemo - 1) % numDemos;
- selectDemo(activeDemo);
- break;
-
- case ']':
- activeDemo = (activeDemo + 1) % numDemos;
- selectDemo(activeDemo);
- break;
-
- case 'd':
- case 'D':
- displayEnabled = !displayEnabled;
- break;
-
- case 'o':
- case 'O':
- activeParams.print();
- break;
-
- case '1':
- if (fp64) {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL);
- } else {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL);
- }
-
- break;
-
- case '2':
- if (fp64) {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_RANDOM);
- } else {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_RANDOM);
- }
-
- break;
-
- case '3':
- if (fp64) {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_EXPAND);
- } else {
- NBodyDemo::reset(numBodies, NBODY_CONFIG_EXPAND);
- }
-
- break;
- }
-}
-
-void showHelp() {
- printf("\t-fullscreen (run n-body simulation in fullscreen mode)\n");
- printf(
- "\t-fp64 (use double precision floating point values for "
- "simulation)\n");
- printf("\t-hostmem (stores simulation data in host memory)\n");
- printf("\t-benchmark (run benchmark to measure performance) \n");
- printf(
- "\t-numbodies= (number of bodies (>= 1) to run in simulation) \n");
- printf(
- "\t-device= (where d=0,1,2.... for the CUDA device to use)\n");
- printf("\t-dispno= (where n represents the display to use)\n");
- printf(
- "\t-width= (where w represents the width of the window to "
- "open)\n");
- printf(
- "\t-width= (where h represents the height of the window to "
- "open)\n");
- printf(
- "\t-numdevices= (where i=(number of CUDA devices > 0) to use for "
- "simulation)\n");
- printf(
- "\t-compare (compares simulation results running once on the "
- "default GPU and once on the CPU)\n");
- printf("\t-cpu (run n-body simulation on the CPU)\n");
- printf("\t-tipsy= (load a tipsy model file for simulation)\n\n");
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Program main
-//////////////////////////////////////////////////////////////////////////////
-int main(int argc, char **argv) {
- bool bTestResults = true;
-
-#if defined(__linux__)
- setenv("DISPLAY", ":0", 0);
-#endif
-
- if (checkCmdLineFlag(argc, (const char **)argv, "help")) {
- printf("\n> Command line options\n");
- showHelp();
- return 0;
- }
-
- printf(
- "Run \"nbody_screen -benchmark [-numbodies=]\" to measure "
- "performance.\n");
- showHelp();
-
- bFullscreen =
- (checkCmdLineFlag(argc, (const char **)argv, "fullscreen") != 0);
-
- if (bFullscreen) {
- bShowSliders = false;
- }
-
- benchmark = (checkCmdLineFlag(argc, (const char **)argv, "benchmark") != 0);
-
- compareToCPU =
- ((checkCmdLineFlag(argc, (const char **)argv, "compare") != 0) ||
- (checkCmdLineFlag(argc, (const char **)argv, "qatest") != 0));
-
- QATest = (checkCmdLineFlag(argc, (const char **)argv, "qatest") != 0);
- useHostMem = (checkCmdLineFlag(argc, (const char **)argv, "hostmem") != 0);
- fp64 = (checkCmdLineFlag(argc, (const char **)argv, "fp64") != 0);
-
- flopsPerInteraction = fp64 ? 30 : 20;
-
- useCpu = (checkCmdLineFlag(argc, (const char **)argv, "cpu") != 0);
-
- if (checkCmdLineFlag(argc, (const char **)argv, "numdevices")) {
- numDevsRequested =
- getCmdLineArgumentInt(argc, (const char **)argv, "numdevices");
-
- if (numDevsRequested < 1) {
- printf(
- "Error: \"number of CUDA devices\" specified %d is invalid. Value "
- "should be >= 1\n",
- numDevsRequested);
- exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE);
- } else {
- printf("number of CUDA devices = %d\n", numDevsRequested);
- }
- }
-
- if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) {
- dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno");
- }
-
- if (checkCmdLineFlag(argc, (const char **)argv, "width")) {
- window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width");
- }
-
- if (checkCmdLineFlag(argc, (const char **)argv, "height")) {
- window_height = getCmdLineArgumentInt(argc, (const char **)argv, "height");
- }
-
- // for multi-device we currently require using host memory -- the devices
- // share data via the host
- if (numDevsRequested > 1) {
- useHostMem = true;
- }
-
- int numDevsAvailable = 0;
- bool customGPU = false;
- cudaGetDeviceCount(&numDevsAvailable);
-
- if (numDevsAvailable < numDevsRequested) {
- printf("Error: only %d Devices available, %d requested. Exiting.\n",
- numDevsAvailable, numDevsRequested);
- exit(EXIT_SUCCESS);
- }
-
- printf("> %s mode\n", bFullscreen ? "Fullscreen" : "Windowed");
- printf("> Simulation data stored in %s memory\n",
- useHostMem ? "system" : "video");
- printf("> %s precision floating point simulation\n",
- fp64 ? "Double" : "Single");
- printf("> %d Devices used for simulation\n", numDevsRequested);
-
- int devID;
- cudaDeviceProp props;
-
- if (useCpu) {
- useHostMem = true;
- compareToCPU = false;
- bSupportDouble = true;
-
-#ifdef OPENMP
- printf("> Simulation with CPU using OpenMP\n");
-#else
- printf("> Simulation with CPU\n");
-#endif
- }
-
- if (!benchmark && !compareToCPU) {
- initGL(&argc, argv);
- }
-
- if (!useCpu) {
- if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
- customGPU = true;
- }
-
- devID = findCudaDevice(argc, (const char **)argv);
-
- checkCudaErrors(cudaGetDevice(&devID));
- checkCudaErrors(cudaGetDeviceProperties(&props, devID));
-
- bSupportDouble = true;
-
- // Initialize devices
- if (numDevsRequested > 1 && customGPU) {
- printf("You can't use --numdevices and --device at the same time.\n");
- exit(EXIT_SUCCESS);
- }
-
- if (customGPU || numDevsRequested == 1) {
- cudaDeviceProp props;
- checkCudaErrors(cudaGetDeviceProperties(&props, devID));
- printf("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor,
- props.name);
- } else {
- for (int i = 0; i < numDevsRequested; i++) {
- cudaDeviceProp props;
- checkCudaErrors(cudaGetDeviceProperties(&props, i));
-
- printf("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor,
- props.name);
-
- if (useHostMem) {
- if (!props.canMapHostMemory) {
- fprintf(stderr, "Device %d cannot map host memory!\n", devID);
- exit(EXIT_SUCCESS);
- }
-
- if (numDevsRequested > 1) {
- checkCudaErrors(cudaSetDevice(i));
- }
-
- checkCudaErrors(cudaSetDeviceFlags(cudaDeviceMapHost));
- }
- }
-
- // CC 1.2 and earlier do not support double precision
- if (props.major * 10 + props.minor <= 12) {
- bSupportDouble = false;
- }
- }
-
- // if(numDevsRequested > 1)
- // checkCudaErrors(cudaSetDevice(devID));
-
- if (fp64 && !bSupportDouble) {
- fprintf(stderr,
- "One or more of the requested devices does not support double "
- "precision floating-point\n");
- exit(EXIT_SUCCESS);
- }
- }
-
- numIterations = 0;
- blockSize = 0;
-
- if (checkCmdLineFlag(argc, (const char **)argv, "i")) {
- numIterations = getCmdLineArgumentInt(argc, (const char **)argv, "i");
- }
-
- if (checkCmdLineFlag(argc, (const char **)argv, "blockSize")) {
- blockSize = getCmdLineArgumentInt(argc, (const char **)argv, "blockSize");
- }
-
- if (blockSize == 0) // blockSize not set on command line
- blockSize = 256;
-
- // default number of bodies is #SMs * 4 * CTA size
- if (useCpu)
-#ifdef OPENMP
- numBodies = 8192;
-
-#else
- numBodies = 4096;
-#endif
- else if (numDevsRequested == 1) {
- numBodies = compareToCPU ? 4096 : blockSize * 4 * props.multiProcessorCount;
- } else {
- numBodies = 0;
-
- for (int i = 0; i < numDevsRequested; i++) {
- cudaDeviceProp props;
- checkCudaErrors(cudaGetDeviceProperties(&props, i));
- numBodies +=
- blockSize * (props.major >= 2 ? 4 : 1) * props.multiProcessorCount;
- }
- }
-
- if (checkCmdLineFlag(argc, (const char **)argv, "numbodies")) {
- numBodies = getCmdLineArgumentInt(argc, (const char **)argv, "numbodies");
-
- if (numBodies < 1) {
- printf(
- "Error: \"number of bodies\" specified %d is invalid. Value should "
- "be >= 1\n",
- numBodies);
- exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE);
- } else if (numBodies % blockSize) {
- int newNumBodies = ((numBodies / blockSize) + 1) * blockSize;
- printf(
- "Warning: \"number of bodies\" specified %d is not a multiple of "
- "%d.\n",
- numBodies, blockSize);
- printf("Rounding up to the nearest multiple: %d.\n", newNumBodies);
- numBodies = newNumBodies;
- } else {
- printf("number of bodies = %d\n", numBodies);
- }
- }
-
- char *fname;
-
- if (getCmdLineArgumentString(argc, (const char **)argv, "tipsy", &fname)) {
- tipsyFile.assign(fname, strlen(fname));
- cycleDemo = false;
- bShowSliders = false;
- }
-
- if (numBodies <= 1024) {
- activeParams.m_clusterScale = 1.52f;
- activeParams.m_velocityScale = 2.f;
- } else if (numBodies <= 2048) {
- activeParams.m_clusterScale = 1.56f;
- activeParams.m_velocityScale = 2.64f;
- } else if (numBodies <= 4096) {
- activeParams.m_clusterScale = 1.68f;
- activeParams.m_velocityScale = 2.98f;
- } else if (numBodies <= 8192) {
- activeParams.m_clusterScale = 1.98f;
- activeParams.m_velocityScale = 2.9f;
- } else if (numBodies <= 16384) {
- activeParams.m_clusterScale = 1.54f;
- activeParams.m_velocityScale = 8.f;
- } else if (numBodies <= 32768) {
- activeParams.m_clusterScale = 1.44f;
- activeParams.m_velocityScale = 11.f;
- }
-
- NBodyDemo::Create();
-
- NBodyDemo::init(numBodies, numDevsRequested, blockSize,
- !(benchmark || compareToCPU || useHostMem), useHostMem,
- useCpu);
- NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL);
-
- if (bSupportDouble) {
- NBodyDemo::Create();
- NBodyDemo::init(numBodies, numDevsRequested, blockSize,
- !(benchmark || compareToCPU || useHostMem),
- useHostMem, useCpu);
- NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL);
- }
-
- if (benchmark) {
- if (numIterations <= 0) {
- numIterations = 10;
- }
-
- NBodyDemo::runBenchmark(numIterations);
- } else if (compareToCPU) {
- bTestResults = NBodyDemo::compareResults(numBodies);
- } else {
- glClear(GL_COLOR_BUFFER_BIT);
-
- eglSwapBuffers(eglDisplay, eglSurface);
-
- while (1) {
- display();
- usleep(1000);
- eglSwapBuffers(eglDisplay, eglSurface);
- }
-
- if (!useCpu) {
- checkCudaErrors(cudaEventRecord(startEvent, 0));
- }
- }
-
- finalize();
- exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE);
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp b/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp
deleted file mode 100644
index 3af7c32a..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "render_particles.h"
-
-#include
-#include
-
-#include
-
-#include
-#include
-
-void mat_identity(matrix4 m) {
- m[0][1] = m[0][2] = m[0][3] = m[1][0] = m[1][2] = m[1][3] = m[2][0] =
- m[2][1] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
- m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f;
-}
-
-void mat_multiply(matrix4 m0, matrix4 m1) {
- float m[4];
-
- for (int r = 0; r < 4; r++) {
- m[0] = m[1] = m[2] = m[3] = 0.0f;
-
- for (int c = 0; c < 4; c++) {
- for (int i = 0; i < 4; i++) {
- m[c] += m0[i][r] * m1[c][i];
- }
- }
-
- for (int c = 0; c < 4; c++) {
- m0[c][r] = m[c];
- }
- }
-}
-
-void mat_translate(matrix4 m, vector3 v) {
- matrix4 m2;
- m2[0][0] = m2[1][1] = m2[2][2] = m2[3][3] = 1.0f;
- m2[0][1] = m2[0][2] = m2[0][3] = m2[1][0] = m2[1][2] = m2[1][3] = m2[2][0] =
- m2[2][1] = m2[2][3] = 0.0f;
- m2[3][0] = v[0];
- m2[3][1] = v[1];
- m2[3][2] = v[2];
- mat_multiply(m, m2);
-}
-
-void mat_perspective(matrix4 m, GLfloat fovy, GLfloat aspect, GLfloat znear,
- GLfloat zfar) {
- matrix4 m2;
- m2[1][0] = m2[2][0] = m2[3][0] = m2[0][1] = m2[2][1] = m2[3][1] = m2[0][2] =
- m2[1][2] = m2[0][3] = m2[1][3] = m2[3][3] = 0.0f;
- m2[2][3] = -1.0f;
-
- float f = 1 / tan((fovy * M_PI / 180) / 2);
- m2[0][0] = f / aspect;
- m2[1][1] = f;
-
- m2[2][2] = ((znear + zfar) / (znear - zfar));
- m2[3][2] = ((2 * znear * zfar) / (znear - zfar));
-
- mat_multiply(m, m2);
-}
-
-ParticleRenderer::ParticleRenderer(unsigned int windowWidth,
- unsigned int windowHeight)
- : m_pos(0),
- m_numParticles(0),
- m_pointSize(1.0f),
- m_spriteSize(2.0f),
- m_vertexShader(0),
- m_vertexShaderPoints(0),
- m_fragmentShader(0),
- m_programPoints(0),
- m_programSprites(0),
- m_texture(0),
- m_pbo(0),
- m_vboColor(0),
- m_windowWidth(windowWidth),
- m_windowHeight(windowHeight),
- m_bFp64Positions(false) {
- m_camera[0] = 0;
- m_camera[1] = 0;
- m_camera[2] = 0;
- _initGL();
-}
-
-ParticleRenderer::~ParticleRenderer() { m_pos = 0; }
-
-void ParticleRenderer::resetPBO() { glDeleteBuffers(1, (GLuint *)&m_pbo); }
-
-void ParticleRenderer::setPositions(float *pos, int numParticles) {
- m_pos = pos;
- m_numParticles = numParticles;
-
- if (!m_pbo) {
- glGenBuffers(1, (GLuint *)&m_pbo);
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
- glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), pos,
- GL_STATIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
- checkGLErrors("Setting particle float position");
-}
-
-void ParticleRenderer::setPositions(double *pos, int numParticles) {
- m_bFp64Positions = true;
- m_pos_fp64 = pos;
- m_numParticles = numParticles;
-
- if (!m_pbo) {
- glGenBuffers(1, (GLuint *)&m_pbo);
- }
-
- glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
- glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(double), pos,
- GL_STATIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
- checkGLErrors("Setting particle double position");
-}
-
-void ParticleRenderer::setColors(float *color, int numParticles) {
- glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
- glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), color,
- GL_STATIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
-}
-
-void ParticleRenderer::setBaseColor(float color[4]) {
- for (int i = 0; i < 4; i++) m_baseColor[i] = color[i];
-}
-
-void ParticleRenderer::setPBO(unsigned int pbo, int numParticles, bool fp64) {
- m_pbo = pbo;
- m_numParticles = numParticles;
-
- if (fp64) m_bFp64Positions = true;
-}
-
-void ParticleRenderer::display() {
- glEnable(GL_BLEND);
- glBlendFunc(GL_SRC_ALPHA, GL_ONE);
- glDepthMask(GL_FALSE);
-
- glUseProgram(m_programSprites);
-
- // Set modelview and projection matrices
- GLint h_ModelViewMatrix = glGetUniformLocation(m_programSprites, "modelview");
- GLint h_ProjectionMatrix =
- glGetUniformLocation(m_programSprites, "projection");
- matrix4 modelview;
- matrix4 projection;
- mat_identity(modelview);
- mat_identity(projection);
- mat_translate(modelview, m_camera);
- mat_perspective(projection, 60, (float)m_windowWidth / (float)m_windowHeight,
- 0.1, 1000.0);
- glUniformMatrix4fv(h_ModelViewMatrix, 1, GL_FALSE, (GLfloat *)modelview);
- glUniformMatrix4fv(h_ProjectionMatrix, 1, GL_FALSE, (GLfloat *)projection);
-
- // Set point size
- GLint h_PointSize = glGetUniformLocation(m_programSprites, "size");
- glUniform1f(h_PointSize, m_spriteSize);
-
- // Set base and secondary colors
- GLint h_BaseColor = glGetUniformLocation(m_programSprites, "baseColor");
- GLint h_SecondaryColor =
- glGetUniformLocation(m_programSprites, "secondaryColor");
- glUniform4f(h_BaseColor, 1.0, 1.0, 1.0, 1.0);
- glUniform4f(h_SecondaryColor, m_baseColor[0], m_baseColor[1], m_baseColor[2],
- m_baseColor[3]);
-
- // Set position coords
- GLint h_position = glGetAttribLocation(m_programSprites, "a_position");
- glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
- glEnableVertexAttribArray(h_position);
- glVertexAttribPointer(h_position, 4, GL_FLOAT, GL_FALSE, 0, 0);
-
- GLuint texLoc = glGetUniformLocation(m_programSprites, "splatTexture");
- glUniform1i(texLoc, 0);
- glActiveTexture(GL_TEXTURE0);
- glBindTexture(GL_TEXTURE_2D, m_texture);
-
- glDrawArrays(GL_POINTS, 0, m_numParticles);
-
- glDisableVertexAttribArray(h_position);
-
- glUseProgram(0);
-
- glDisable(GL_BLEND);
- glDepthMask(GL_TRUE);
-}
-
-const char vertexShader[] = {
- "attribute vec4 a_position;"
-
- "uniform mat4 projection;"
- "uniform mat4 modelview;"
- "uniform float size;"
-
- "void main()"
- "{"
- "float pointSize = 500.0 * size;"
- "vec4 vert = a_position;"
- "vert.w = 1.0;"
- "vec3 pos_eye = vec3(modelview * vert);"
- "gl_PointSize = max(1.0, pointSize / (1.0 - pos_eye.z));"
- "gl_Position = projection * modelview * a_position;"
- "}"};
-
-const char fragmentShader[] = {
- "uniform sampler2D splatTexture;"
- "uniform lowp vec4 baseColor;"
- "uniform lowp vec4 secondaryColor;"
-
- "void main()"
- "{"
- "lowp vec4 textureColor = (0.6 + 0.4 * baseColor) * "
- "texture2D(splatTexture, gl_PointCoord);"
- "gl_FragColor = textureColor * secondaryColor;"
- "}"};
-
-// Checks if the shader is compiled.
-static int CheckCompiled(GLuint shader) {
- GLint isCompiled = 0;
- glGetShaderiv(shader, GL_COMPILE_STATUS, &isCompiled);
-
- if (!isCompiled) {
- GLint infoLen = 0;
- glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);
-
- if (infoLen > 1) {
- char *infoLog = (char *)malloc(sizeof(char) * infoLen);
-
- glGetShaderInfoLog(shader, infoLen, NULL, infoLog);
- printf("Error compiling program:\n%s\n", infoLog);
- free(infoLog);
- }
-
- return 0;
- }
-
- return 1;
-}
-
-void ParticleRenderer::_initGL() {
- m_vertexShader = glCreateShader(GL_VERTEX_SHADER);
- m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
-
- const char *v = vertexShader;
- const char *f = fragmentShader;
- glShaderSource(m_vertexShader, 1, &v, 0);
- glShaderSource(m_fragmentShader, 1, &f, 0);
-
- checkGLErrors("Shader Source");
-
- glCompileShader(m_vertexShader);
- glCompileShader(m_fragmentShader);
-
- if (!CheckCompiled(m_vertexShader) || !CheckCompiled(m_fragmentShader)) {
- printf("A shader failed to compile.\n");
- exit(1);
- }
-
- m_programSprites = glCreateProgram();
-
- checkGLErrors("create program");
-
- glAttachShader(m_programSprites, m_vertexShader);
- glAttachShader(m_programSprites, m_fragmentShader);
-
- checkGLErrors("attaching shaders");
-
- glLinkProgram(m_programSprites);
-
- checkGLErrors("linking program");
-
- EGLint linked;
- glGetProgramiv(m_programSprites, GL_LINK_STATUS, &linked);
- if (!linked) {
- printf("A shader failed to link.\n");
- exit(1);
- }
-
- _createTexture(32);
-
- glGenBuffers(1, (GLuint *)&m_vboColor);
- glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
- glBufferData(GL_ARRAY_BUFFER, m_numParticles * 4 * sizeof(float), 0,
- GL_STATIC_DRAW);
- glBindBuffer(GL_ARRAY_BUFFER, 0);
-}
-
-//------------------------------------------------------------------------------
-// Function : EvalHermite
-// Description :
-//------------------------------------------------------------------------------
-/**
- * EvalHermite(float pA, float pB, float vA, float vB, float u)
- * @brief Evaluates Hermite basis functions for the specified coefficients.
- */
-inline float evalHermite(float pA, float pB, float vA, float vB, float u) {
- float u2 = (u * u), u3 = u2 * u;
- float B0 = 2 * u3 - 3 * u2 + 1;
- float B1 = -2 * u3 + 3 * u2;
- float B2 = u3 - 2 * u2 + u;
- float B3 = u3 - u;
- return (B0 * pA + B1 * pB + B2 * vA + B3 * vB);
-}
-
-unsigned char *createGaussianMap(int N) {
- float *M = new float[2 * N * N];
- unsigned char *B = new unsigned char[4 * N * N];
- float X, Y, Y2, Dist;
- float Incr = 2.0f / N;
- int i = 0;
- int j = 0;
- Y = -1.0f;
-
- // float mmax = 0;
- for (int y = 0; y < N; y++, Y += Incr) {
- Y2 = Y * Y;
- X = -1.0f;
-
- for (int x = 0; x < N; x++, X += Incr, i += 2, j += 4) {
- Dist = (float)sqrtf(X * X + Y2);
-
- if (Dist > 1) Dist = 1;
-
- M[i + 1] = M[i] = evalHermite(1.0f, 0, 0, 0, Dist);
- B[j + 3] = B[j + 2] = B[j + 1] = B[j] = (unsigned char)(M[i] * 255);
- }
- }
-
- delete[] M;
- return (B);
-}
-
-void ParticleRenderer::_createTexture(int resolution) {
- unsigned char *data = createGaussianMap(resolution);
- glGenTextures(1, (GLuint *)&m_texture);
- glBindTexture(GL_TEXTURE_2D, m_texture);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
- GL_LINEAR); //_MIPMAP_LINEAR);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, resolution, resolution, 0, GL_RGBA,
- GL_UNSIGNED_BYTE, data);
-}
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h
deleted file mode 100644
index e028c8e5..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __RENDER_PARTICLES__
-#define __RENDER_PARTICLES__
-
-#include
-#include
-#include
-
-#include
-
-typedef float matrix4[4][4];
-typedef float vector3[3];
-
-// check for OpenGL errors
-inline void checkGLErrors(const char *s) {
- EGLenum error;
-
- while ((error = glGetError()) != GL_NO_ERROR) {
- fprintf(stderr, "%s: error - %d\n", s, error);
- }
-}
-
-class ParticleRenderer {
- public:
- ParticleRenderer(unsigned int windowWidth = 720,
- unsigned int windowHeight = 480);
- ~ParticleRenderer();
-
- void setPositions(float *pos, int numParticles);
- void setPositions(double *pos, int numParticles);
- void setBaseColor(float color[4]);
- void setColors(float *color, int numParticles);
- void setPBO(unsigned int pbo, int numParticles, bool fp64);
-
- enum DisplayMode {
- PARTICLE_POINTS,
- PARTICLE_SPRITES,
- PARTICLE_SPRITES_COLOR,
- PARTICLE_NUM_MODES
- };
-
- void display();
-
- void setPointSize(float size) { m_pointSize = size; }
- void setSpriteSize(float size) { m_spriteSize = size; }
-
- void setCameraPos(vector3 camera_pos) {
- m_camera[0] = camera_pos[0];
- m_camera[1] = camera_pos[1];
- m_camera[2] = camera_pos[2];
- }
-
- void resetPBO();
-
- protected: // methods
- void _initGL();
- void _createTexture(int resolution);
-
- protected: // data
- float *m_pos;
- double *m_pos_fp64;
- int m_numParticles;
-
- float m_pointSize;
- float m_spriteSize;
- vector3 m_camera;
-
- unsigned int m_vertexShader;
- unsigned int m_vertexShaderPoints;
- unsigned int m_fragmentShader;
- unsigned int m_programPoints;
- unsigned int m_programSprites;
- unsigned int m_texture;
- unsigned int m_pbo;
- unsigned int m_vboColor;
- unsigned int m_windowWidth;
- unsigned int m_windowHeight;
-
- float m_baseColor[4];
-
- bool m_bFp64Positions;
-};
-
-#endif //__ RENDER_PARTICLES__
diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h
deleted file mode 100644
index 99692a9c..00000000
--- a/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h
+++ /dev/null
@@ -1,172 +0,0 @@
-#ifndef __TIPSY_H__
-#define __TIPSY_H__
-
-#include
-
-using namespace std;
-
-#define MAXDIM 3
-
-typedef float Real;
-
-struct gas_particle
-{
- Real mass;
- Real pos[MAXDIM];
- Real vel[MAXDIM];
- Real rho;
- Real temp;
- Real hsmooth;
- Real metals ;
- Real phi ;
-} ;
-
-//struct gas_particle *gas_particles;
-
-struct dark_particle
-{
- Real mass;
- Real pos[MAXDIM];
- Real vel[MAXDIM];
- Real eps;
- int phi ;
-} ;
-
-//struct dark_particle *dark_particles;
-
-struct star_particle
-{
- Real mass;
- Real pos[MAXDIM];
- Real vel[MAXDIM];
- Real metals ;
- Real tform ;
- Real eps;
- int phi ;
-} ;
-
-//struct star_particle *star_particles;
-
-struct dump
-{
- double time ;
- int nbodies ;
- int ndim ;
- int nsph ;
- int ndark ;
- int nstar ;
-} ;
-
-typedef struct dump header ;
-
-template
-void read_tipsy_file(vector &bodyPositions,
- vector &bodyVelocities,
- vector &bodiesIDs,
- const std::string &fileName,
- int &NTotal,
- int &NFirst,
- int &NSecond,
- int &NThird)
-{
- /*
- Read in our custom version of the tipsy file format written by
- Jeroen Bedorf. Most important change is that we store particle id on the
- location where previously the potential was stored.
- */
-
- char fullFileName[256];
- sprintf(fullFileName, "%s", fileName.c_str());
-
- cout << "Trying to read file: " << fullFileName << endl;
-
- ifstream inputFile(fullFileName, ios::in | ios::binary);
-
- if (!inputFile.is_open())
- {
- cout << "Can't open input file \n";
- exit(EXIT_SUCCESS);
- }
-
- dump h;
- inputFile.read((char *)&h, sizeof(h));
-
- int idummy;
- real4 positions;
- real4 velocity;
-
-
- //Read tipsy header
- NTotal = h.nbodies;
- NFirst = h.ndark;
- NSecond = h.nstar;
- NThird = h.nsph;
-
- //Start reading
- int particleCount = 0;
-
- dark_particle d;
- star_particle s;
-
- for (int i=0; i < NTotal; i++)
- {
- if (i < NFirst)
- {
- inputFile.read((char *)&d, sizeof(d));
- velocity.w = d.eps;
- positions.w = d.mass;
- positions.x = d.pos[0];
- positions.y = d.pos[1];
- positions.z = d.pos[2];
- velocity.x = d.vel[0];
- velocity.y = d.vel[1];
- velocity.z = d.vel[2];
- idummy = d.phi;
- }
- else
- {
- inputFile.read((char *)&s, sizeof(s));
- velocity.w = s.eps;
- positions.w = s.mass;
- positions.x = s.pos[0];
- positions.y = s.pos[1];
- positions.z = s.pos[2];
- velocity.x = s.vel[0];
- velocity.y = s.vel[1];
- velocity.z = s.vel[2];
- idummy = s.phi;
- }
-
- bodyPositions.push_back(positions);
- bodyVelocities.push_back(velocity);
- bodiesIDs.push_back(idummy);
-
- particleCount++;
- }//end for
-
- // round up to a multiple of 256 bodies since our kernel only supports that...
- int newTotal = NTotal;
-
- if (NTotal % 256)
- {
- newTotal = ((NTotal / 256) + 1) * 256;
- }
-
- for (int i = NTotal; i < newTotal; i++)
- {
- positions.w = positions.x = positions.y = positions.z = 0;
- velocity.x = velocity.y = velocity.z = 0;
- bodyPositions.push_back(positions);
- bodyVelocities.push_back(velocity);
- bodiesIDs.push_back(i);
- NFirst++;
- }
-
- NTotal = newTotal;
-
- inputFile.close();
-
- cerr << "Read " << NTotal << " bodies" << endl;
-}
-
-#endif //__TIPSY_H__
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json
deleted file mode 100644
index f0066b0f..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
- "configurations": [
- {
- "name": "Linux",
- "includePath": [
- "${workspaceFolder}/**",
- "${workspaceFolder}/../../../Common"
- ],
- "defines": [],
- "compilerPath": "/usr/local/cuda/bin/nvcc",
- "cStandard": "gnu17",
- "cppStandard": "gnu++14",
- "intelliSenseMode": "linux-gcc-x64",
- "configurationProvider": "ms-vscode.makefile-tools"
- }
- ],
- "version": 4
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json
deleted file mode 100644
index c7eb54dc..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
- "recommendations": [
- "nvidia.nsight-vscode-edition",
- "ms-vscode.cpptools",
- "ms-vscode.makefile-tools"
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json
deleted file mode 100644
index 117dccd9..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
- "configurations": [
- {
- "name": "CUDA C++: Launch",
- "type": "cuda-gdb",
- "request": "launch",
- "program": "${workspaceFolder}/simpleGLES_screen"
- }
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json
deleted file mode 100644
index 4509aeb1..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- "version": "2.0.0",
- "tasks": [
- {
- "label": "sample",
- "type": "shell",
- "command": "make dbg=1",
- "problemMatcher": ["$nvcc"],
- "group": {
- "kind": "build",
- "isDefault": true
- }
- }
- ]
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt
deleted file mode 100644
index 6036abff..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-cmake_minimum_required(VERSION 3.20)
-
-list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
-
-project(simpleGLES_screen LANGUAGES C CXX CUDA)
-
-find_package(CUDAToolkit REQUIRED)
-
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
-if(CMAKE_BUILD_TYPE STREQUAL "Debug")
- # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
-endif()
-
-# Include directories and libraries
-include_directories(../../../Common)
-
-# Source file
-# Add target for simpleGLES_screen
-add_executable(simpleGLES_screen simpleGLES_screen.cu)
-
-target_compile_options(simpleGLES_screen PRIVATE $<$:--extended-lambda>)
-
-target_compile_features(simpleGLES_screen PRIVATE cxx_std_17 cuda_std_17)
-
-set_target_properties(simpleGLES_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile
deleted file mode 100644
index f2672bd8..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile
+++ /dev/null
@@ -1,399 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on Linux x86_64
-ifeq ($(TARGET_OS),linux)
- ifeq ($(TARGET_ARCH),x86_64)
- $(info >>> WARNING - simpleGLES_screen is not supported on Linux x86_64 - waiving sample <<<)
- SAMPLE_ENABLED := 0
- endif
-endif
-
-# This sample is not supported on Mac OSX
-ifeq ($(TARGET_OS),darwin)
- $(info >>> WARNING - simpleGLES_screen is not supported on Mac OSX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on ARMv7
-ifeq ($(TARGET_ARCH),armv7l)
- $(info >>> WARNING - simpleGLES_screen is not supported on ARMv7 - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on aarch64
-ifeq ($(TARGET_ARCH),aarch64)
- ifneq ($(TARGET_OS),qnx)
- $(info >>> WARNING - simpleGLES_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
- SAMPLE_ENABLED := 0
- endif
-endif
-# This sample is not supported on sbsa
-ifeq ($(TARGET_ARCH),sbsa)
- $(info >>> WARNING - simpleGLES_screen is not supported on sbsa - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Makefile include to help find GLES Libraries
-include ./findgleslib.mk
-
-# OpenGLES specific libraries
-ifneq ($(TARGET_OS),darwin)
- LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
-endif
-
-# Gencode arguments
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-SMS ?= 53 61 70 72 75 80 86 87 90
-else
-SMS ?= 50 52 60 61 70 75 80 86 89 90
-endif
-
-ifeq ($(SMS),)
-$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += -DUSE_CUDAINTEROP -DGRAPHICS_SETUP_EGL -DUSE_GLES -DWIN_INTERFACE_CUSTOM --threads 0 --std=c++11
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: simpleGLES_screen
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-simpleGLES_screen.o:simpleGLES_screen.cu
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-simpleGLES_screen: simpleGLES_screen.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./simpleGLES_screen
-
-testrun: build
- $(EXEC) ./simpleGLES_screen -file=ref_simpleGLES_screen.bin
-
-clean:
- rm -f simpleGLES_screen simpleGLES_screen.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/simpleGLES_screen
-
-clobber: clean
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml
deleted file mode 100644
index 325ca176..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-
- simpleGLES_screen
-
- -DUSE_CUDAINTEROP
- -DGRAPHICS_SETUP_EGL
- -DUSE_GLES
- -DWIN_INTERFACE_CUSTOM
-
-
- cudaGraphicsUnmapResources
- cudaMemcpy
- cudaFree
- cudaGraphicsResourceGetMappedPointer
- cudaGraphicsMapResources
- cudaDeviceSynchronize
- cudaGraphicsUnregisterResource
- cudaMalloc
- cudaGraphicsGLRegisterBuffer
-
-
- whole
-
- data\ref_simpleGLES_screen.bin
-
-
- ./
- ../
- ../../../Common
-
-
- Graphics Interop
- Vertex Buffers
- 3D Graphics
-
-
- OpenGL ES
-
-
-
-
-
- true
- simpleGLES_screen.cu
-
- -file=ref_simpleGLES_screen.bin
-
-
- screen
- GLES
-
-
- 1:CUDA Basic Topics
- 2:Graphics Interop
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
- graphics_interface.c
-
-
-
- qnx
-
-
-
- all
-
- Simple OpenGLES on Screen
- exe
-
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md
deleted file mode 100644
index b1f72fa1..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# simpleGLES_screen - Simple OpenGLES on Screen
-
-## Description
-
-Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.
-
-## Key Concepts
-
-Graphics Interop, Vertex Buffers, 3D Graphics
-
-## Supported SM Architectures
-
-[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
-
-## Supported OSes
-
-QNX
-
-## Supported CPU Architecture
-
-aarch64
-
-## CUDA APIs involved
-
-### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
-
-## Dependencies needed to build/run
-[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
-
-## Prerequisites
-
-Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
-Make sure the dependencies mentioned in [Dependencies]() section above are installed.
-
-## References (for more details)
-
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin
deleted file mode 100644
index 3a307293..00000000
Binary files a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin and /dev/null differ
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk
deleted file mode 100644
index 6da2f078..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk
+++ /dev/null
@@ -1,149 +0,0 @@
-################################################################################
-#
-# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
-#
-# NOTICE TO USER:
-#
-# This source code is subject to NVIDIA ownership rights under U.S. and
-# international Copyright laws.
-#
-# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
-# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
-# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
-# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
-# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
-# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-# OR PERFORMANCE OF THIS SOURCE CODE.
-#
-# U.S. Government End Users. This source code is a "commercial item" as
-# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
-# "commercial computer software" and "commercial computer software
-# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
-# and is provided to the U.S. Government only as a commercial end item.
-# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
-# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
-# source code with only those rights set forth herein.
-#
-################################################################################
-#
-# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
-# this is supported on Linux
-#
-################################################################################
-
-# Determine OS platform and unix distribution
-ifeq ("$(TARGET_OS)","linux")
- # first search lsb_release
- DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
- ifeq ("$(DISTRO)","")
- # second search and parse /etc/issue
- DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
- # ensure data from /etc/issue is valid
- ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
- DISTRO :=
- endif
- ifeq ("$(DISTRO)","")
- # third, we can search in /etc/os-release or /etc/{distro}-release
- DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
- endif
- endif
-endif
-
-ifeq ("$(TARGET_OS)","linux")
- # $(info) >> findgllib.mk -> LINUX path <<<)
- # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
- UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
- FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
- RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
- CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
- SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
- KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
- ifeq ("$(UBUNTU)","0")
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- GLPATH := /usr/arm-linux-gnueabihf/lib
- GLLINK := -L/usr/arm-linux-gnueabihf/lib
- ifneq ($(TARGET_FS),)
- GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
- GLPATH := /usr/aarch64-linux-gnu/lib
- GLLINK := -L/usr/aarch64-linux-gnu/lib
- ifneq ($(TARGET_FS),)
- GLPATH += $(TARGET_FS)/usr/lib
- GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
- GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- endif
- else
- UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
- ifneq ("$(UBUNTU_PKG_NAME)","")
- GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
- GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
- endif
-
- DFLT_PATH ?= /usr/lib
- endif
- endif
-
- ifeq ("$(SUSE)","0")
- GLPATH ?= /usr/X11R6/lib64
- GLLINK ?= -L/usr/X11R6/lib64
- DFLT_PATH ?= /usr/lib64
- else
- GLPATH ?= /usr/lib64/nvidia
- GLLINK ?= -L/usr/lib64/nvidia
- DFLT_PATH ?= /usr/lib64
- endif
-
- # find libGL, libGLU, libXi,
- EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
- GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
- X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
-
- ifeq ("$(EGLLIB)","")
- $(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(GLESLIB)","")
- $(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(X11LIB)","")
- $(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
- SAMPLE_ENABLED := 0
- endif
-
- HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
- else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
- HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
- endif
-
- EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
- EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
- GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
- X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
-
- ifeq ("$(EGLHEADER)","")
- $(info >>> WARNING - egl.h not found, please install egl.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(EGLEXTHEADER)","")
- $(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(GL31HEADER)","")
- $(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
- SAMPLE_ENABLED := 0
- endif
- ifeq ("$(X11HEADER)","")
- $(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
- SAMPLE_ENABLED := 0
- endif
-else
-endif
-
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c
deleted file mode 100644
index 530375c7..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c
+++ /dev/null
@@ -1,379 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include
-#include
-#include
-#include
-
-screen_window_t screen_window;
-screen_context_t screen_context;
-screen_event_t screen_ev;
-
-EGLDisplay eglDisplay = EGL_NO_DISPLAY;
-EGLSurface eglSurface = EGL_NO_SURFACE;
-EGLContext eglContext = EGL_NO_CONTEXT;
-
-void error_exit(const char *format, ...) {
- va_list args;
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
- exit(1);
-}
-
-enum { NvGlDemoKeyCode_Escape = 27 };
-
-typedef void (*GlCloseCB)(void);
-typedef void (*GlKeyCB)(char key, int state);
-GlCloseCB closeCB = NULL;
-GlKeyCB keyCB = NULL;
-
-void CHECK_GLERROR() {
- GLenum err = glGetError();
-
- if (err != GL_NO_ERROR) {
- fprintf(stderr, "[%s line %d] OpenGL Error: 0x%x ", __FILE__, __LINE__,
- err);
-
- switch (err) {
- case GL_INVALID_ENUM:
- fprintf(stderr, "(GL_INVALID_ENUM)\n");
- break;
- case GL_INVALID_VALUE:
- fprintf(stderr, "(GL_INVALID_VALUE)\n");
- break;
- case GL_INVALID_OPERATION:
- fprintf(stderr, "(GL_INVALID_OPERATION)\n");
- break;
- case GL_OUT_OF_MEMORY:
- fprintf(stderr, "(GL_OUT_OF_MEMORY)\n");
- break;
- case GL_INVALID_FRAMEBUFFER_OPERATION:
- fprintf(stderr, "(GL_INVALID_FRAMEBUFFER_OPERATION)\n");
- break;
- default:
- break;
- }
-
- fflush(stderr);
- }
-}
-
-static void UpdateEventMask(void) {
- static int rc = 1;
-
- if (rc) {
- rc = screen_create_event(&screen_ev);
- }
-}
-
-void SetCloseCB(GlCloseCB cb) {
- // Call the eglQnxScreenConsumer module if option is enabled
- closeCB = cb;
- UpdateEventMask();
-}
-
-void SetKeyCB(GlKeyCB cb) {
- keyCB = cb;
- UpdateEventMask();
-}
-
-// Add keys here, that are used in demo apps.
-static unsigned char GetKeyPress(int *screenKey) {
- unsigned char key = '\0';
-
- switch (*screenKey) {
- case KEYCODE_ESCAPE:
- key = NvGlDemoKeyCode_Escape;
- break;
- default:
- /* For "normal" keys, Screen KEYCODE is just ASCII. */
- if (*screenKey <= 127) {
- key = *screenKey;
- }
- break;
- }
-
- return key;
-}
-
-void CheckEvents(void) {
- static int vis = 1, val = 1;
- int rc;
-
- /**
- ** We start the loop by processing any events that might be in our
- ** queue. The only event that is of interest to us are the resize
- ** and close events. The timeout variable is set to 0 (no wait) or
- ** forever depending if the window is visible or invisible.
- **/
-
- while (!screen_get_event(screen_context, screen_ev, vis ? 0ull : ~0ull)) {
- // Get QNX CAR 2.1 event property
- rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_TYPE, &val);
- if (rc || val == SCREEN_EVENT_NONE) {
- break;
- }
-
- switch (val) {
- case SCREEN_EVENT_CLOSE:
- /**
- ** All we have to do when we receive the close event is
- ** exit the application loop.
- **/
- if (closeCB) {
- closeCB();
- }
- break;
-
- case SCREEN_EVENT_KEYBOARD:
- rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_FLAGS,
- &val);
- if (rc || val == SCREEN_EVENT_NONE) {
- break;
- }
- if (val & KEY_DOWN) {
- rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_SYM,
- &val);
- if (rc || val == SCREEN_EVENT_NONE) {
- break;
- }
- unsigned char key;
- key = GetKeyPress(&val);
- if (key != '\0') {
- keyCB(key, 1);
- }
- }
- break;
-
- default:
- break;
- }
- }
-}
-
-int graphics_setup_window(int xpos, int ypos, int width, int height,
- const char *windowname, int reqdispno) {
- EGLint configAttrs[] = {
- EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8,
- EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8,
- EGL_DEPTH_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
- EGL_NONE};
-
- EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE};
-
- EGLint windowAttrs[] = {EGL_NONE};
- EGLConfig *configList = NULL;
- EGLint configCount;
-
- int displayCount = 0;
- int dispno;
-
- screen_context = 0;
-
- screen_display_t *screenDisplayHandle = NULL;
-
- if (screen_create_context(&screen_context, 0)) {
- error_exit("Error creating screen context.\n");
- }
-
- eglDisplay = eglGetDisplay(0);
-
- if (eglDisplay == EGL_NO_DISPLAY) {
- error_exit("EGL failed to obtain display\n");
- }
-
- if (!eglInitialize(eglDisplay, 0, 0)) {
- error_exit("EGL failed to initialize\n");
- }
-
- if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) ||
- !configCount) {
- error_exit("EGL failed to return any matching configurations\n");
- }
-
- configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig));
-
- if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount,
- &configCount) ||
- !configCount) {
- error_exit("EGL failed to populate configuration list\n");
- }
-
- screen_window = 0;
- if (screen_create_window(&screen_window, screen_context)) {
- error_exit("Error creating screen window.\n");
- }
-
- // query the total no of display avaibale from QNX CAR2 screen
- if (screen_get_context_property_iv(
- screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) {
- error_exit("Error getting context property\n");
- }
-
- screenDisplayHandle =
- (screen_display_t *)malloc(displayCount * sizeof(screen_display_t));
- if (!screenDisplayHandle) {
- error_exit("Error allocating screen memory handle is getting failed\n");
- }
-
- // query the display handle from QNX CAR2 screen
- if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS,
- (void **)screenDisplayHandle)) {
- error_exit("Error getting display handle\n");
- }
-
- for (dispno = 0; dispno < displayCount; dispno++) {
- int active = 0;
- // Query the connected status from QNX CAR2 screen
- screen_get_display_property_iv(screenDisplayHandle[dispno],
- SCREEN_PROPERTY_ATTACHED, &active);
- if (active) {
- if (reqdispno == dispno) {
- // Map the window buffer to user requested display port
- screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY,
- (void **)&screenDisplayHandle[reqdispno]);
- break;
- }
- }
- }
-
- if (dispno == displayCount) {
- error_exit("Failed to set the requested display\n");
- }
-
- free(screenDisplayHandle);
-
- int format = SCREEN_FORMAT_RGBA8888;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT,
- &format)) {
- error_exit("Error setting SCREEN_PROPERTY_FORMAT\n");
- }
-
- int usage = SCREEN_USAGE_OPENGL_ES2;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE,
- &usage)) {
- error_exit("Error setting SCREEN_PROPERTY_USAGE\n");
- }
-
- EGLint interval = 1;
- if (screen_set_window_property_iv(screen_window,
- SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) {
- error_exit("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n");
- }
-
- int windowSize[2];
- windowSize[0] = width;
- windowSize[1] = height;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE,
- windowSize)) {
- error_exit("Error setting SCREEN_PROPERTY_SIZE\n");
- }
-
- int windowOffset[2];
- windowOffset[0] = xpos;
- windowOffset[1] = ypos;
- if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION,
- windowOffset)) {
- error_exit("Error setting SCREEN_PROPERTY_POSITION\n");
- }
-
- if (screen_create_window_buffers(screen_window, 2)) {
- error_exit("Error creating two window buffers.\n");
- }
-
- eglSurface =
- eglCreateWindowSurface(eglDisplay, configList[0],
- (EGLNativeWindowType)screen_window, windowAttrs);
- if (!eglSurface) {
- error_exit("EGL couldn't create window\n");
- }
-
- eglBindAPI(EGL_OPENGL_ES_API);
-
- eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs);
- if (!eglContext) {
- error_exit("EGL couldn't create context\n");
- }
-
- if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) {
- error_exit("EGL couldn't make context/surface current\n");
- }
-
- EGLint Context_RendererType;
- eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE,
- &Context_RendererType);
-
- switch (Context_RendererType) {
- case EGL_OPENGL_API:
- printf("Using OpenGL API\n");
- break;
- case EGL_OPENGL_ES_API:
- printf("Using OpenGL ES API\n");
- break;
- case EGL_OPENVG_API:
- error_exit("Context Query Returned OpenVG. This is Unsupported\n");
- default:
- error_exit("Unknown Context Type. %04X\n", Context_RendererType);
- }
-
- return 1;
-}
-
-void graphics_set_windowtitle(const char *windowname) {
- // Do nothing on screen
-}
-
-void graphics_swap_buffers() { eglSwapBuffers(eglDisplay, eglSurface); }
-
-void graphics_close_window() {
- if (eglDisplay != EGL_NO_DISPLAY) {
- eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
-
- if (eglContext != EGL_NO_CONTEXT) {
- eglDestroyContext(eglDisplay, eglContext);
- }
-
- if (eglSurface != EGL_NO_SURFACE) {
- eglDestroySurface(eglDisplay, eglSurface);
- }
-
- eglTerminate(eglDisplay);
- }
-
- if (screen_window) {
- screen_destroy_window(screen_window);
- screen_window = NULL;
- }
- if (screen_context) {
- screen_destroy_context(screen_context);
- }
- if (screen_ev) {
- screen_destroy_event(screen_ev);
- }
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl
deleted file mode 100644
index e5864bee..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-void main()
-{
- gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0);
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl
deleted file mode 100644
index 1de61b17..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-attribute vec4 position;
-
-void main()
-{
- gl_Position = position;
-}
diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu
deleted file mode 100644
index c0dd9053..00000000
--- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu
+++ /dev/null
@@ -1,601 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- This example demonstrates how to use the CUDA C bindings to OpenGL ES to
- dynamically modify a vertex buffer using a CUDA C kernel.
-
- The steps are:
- 1. Create an empty vertex buffer object (VBO)
- 2. Register the VBO with CUDA C
- 3. Map the VBO for writing from CUDA C
- 4. Run CUDA C kernel to modify the vertex positions
- 5. Unmap the VBO
- 6. Render the results using OpenGL ES
-
- Host code
- */
-
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "graphics_interface.c"
-
-// includes, cuda
-#include
-#include
-
-// Utilities and timing functions
-#include // includes cuda.h and cuda_runtime_api.h
-
-// CUDA helper functions
-#include // helper functions for CUDA error check
-
-#include
-
-#define MAX_EPSILON_ERROR 0.0f
-#define THRESHOLD 0.0f
-#define REFRESH_DELAY 1 // ms
-
-#define GUI_IDLE 0x100
-#define GUI_ROTATE 0x101
-#define GUI_TRANSLATE 0x102
-
-int gui_mode;
-
-////////////////////////////////////////////////////////////////////////////////
-// Default configuration
-unsigned int window_width = 512;
-unsigned int window_height = 512;
-unsigned int dispno = 0;
-
-// constants
-const unsigned int mesh_width = 256;
-const unsigned int mesh_height = 256;
-
-// OpenGL ES variables and interop with CUDA C
-GLuint mesh_vao, mesh_vbo;
-struct cudaGraphicsResource *cuda_vbo_resource;
-void *d_vbo_buffer = NULL;
-
-float g_fAnim = 0.0;
-
-// UI / mouse controls
-int mouse_old_x, mouse_old_y;
-int mouse_buttons = 0;
-float rotate_x = 0.0, rotate_y = 0.0;
-float translate_z = -3.0;
-
-StopWatchInterface *timer = NULL;
-
-// Frame statistics
-int frame;
-int fpsCount = 0; // FPS count for averaging
-int fpsLimit = 1; // FPS limit for sampling
-int g_Index = 0;
-float avgFPS = 0.0f;
-unsigned int frameCount = 0;
-unsigned int g_TotalErrors = 0;
-
-// The default number of seconds after which the test will end.
-#define TIME_LIMIT 10.0 // 10 secs
-
-// Flag indicating it is time to shut down
-static GLboolean shutdown = GL_FALSE;
-
-// Callback to close window
-static void closeCB_app(void) { shutdown = GL_TRUE; }
-
-// Callback to handle key presses
-static void keyCB_app(char key, int state) {
- // Ignoring releases
- if (!state) return;
-
- if ((key == 'q') || (key == 'Q') || (key == NvGlDemoKeyCode_Escape))
- shutdown = GL_TRUE;
-}
-
-// Auto-Verification Code
-bool g_bQAReadback = false;
-
-int *pArgc = NULL;
-char **pArgv = NULL;
-
-#define MAX(a, b) ((a > b) ? a : b)
-
-////////////////////////////////////////////////////////////////////////////////
-// declaration, forward
-
-// CUDA functionality
-void runCuda(struct cudaGraphicsResource **vbo_resource);
-void runAutoTest(int devID, char **argv, char *ref_file);
-void checkResultCuda(int argc, char **argv, const GLuint &vbo);
-
-const char *sSDKsample = "simpleGLES on Screen (VBO)";
-
-void computeFPS() {
- frameCount++;
- fpsCount++;
-
- if (fpsCount == fpsLimit) {
- avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
- fpsCount = 0;
- fpsLimit = (int)MAX(avgFPS, 1.f);
-
- sdkResetTimer(&timer);
- }
-
- char fps[256];
- sprintf(fps, "Cuda/OpenGL ES Interop (VBO): %3.1f fps (Max 1000 fps)",
- avgFPS);
- graphics_set_windowtitle(fps);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-//! Simple kernel to modify vertex positions in sine wave pattern
-//! @param data data in global memory
-///////////////////////////////////////////////////////////////////////////////
-__global__ void simple_vbo_kernel(float4 *pos, unsigned int width,
- unsigned int height, float time) {
- unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
- unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
-
- // calculate uv coordinates
- float u = x / (float)width;
- float v = y / (float)height;
- u = u * 2.0f - 1.0f;
- v = v * 2.0f - 1.0f;
-
- // calculate simple sine wave pattern
- float freq = 4.0f;
- float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
-
- // write output vertex
- pos[y * width + x] = make_float4(u, w, v, 1.0f);
-}
-
-void launch_kernel(float4 *pos, unsigned int mesh_width,
- unsigned int mesh_height, float time) {
- // execute the kernel
- dim3 block(8, 8, 1);
- dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
- simple_vbo_kernel<<>>(pos, mesh_width, mesh_height, time);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Run the Cuda part of the computation
-////////////////////////////////////////////////////////////////////////////////
-void runCuda(struct cudaGraphicsResource **vbo_resource) {
- // map OpenGL buffer object for writing from CUDA
- float4 *dptr;
- cudaGraphicsMapResources(1, vbo_resource, 0);
- size_t num_bytes;
- cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
- *vbo_resource);
-
- launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
-
- // unmap buffer object
- cudaGraphicsUnmapResources(1, vbo_resource, 0);
-}
-
-#ifndef FOPEN
-#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
-#endif
-
-void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) {
- printf("sdkDumpBin: <%s>\n", filename);
- FILE *fp;
- FOPEN(fp, filename, "wb");
- fwrite(data, bytes, 1, fp);
- fflush(fp);
- fclose(fp);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Run the Cuda part of the computation
-////////////////////////////////////////////////////////////////////////////////
-void runAutoTest(int devID, char **argv, char *ref_file) {
- char *reference_file = NULL;
- void *imageData = malloc(mesh_width * mesh_height * sizeof(float));
-
- // execute the kernel
- launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
-
- cudaDeviceSynchronize();
- getLastCudaError("launch_kernel failed");
-
- cudaMemcpy(imageData, d_vbo_buffer, mesh_width * mesh_height * sizeof(float),
- cudaMemcpyDeviceToHost);
-
- sdkDumpBin2(imageData, mesh_width * mesh_height * sizeof(float),
- "simpleGLES_screen.bin");
- reference_file = sdkFindFilePath(ref_file, argv[0]);
-
- if (reference_file &&
- !sdkCompareBin2BinFloat("simpleGLES_screen.bin", reference_file,
- mesh_width * mesh_height * sizeof(float),
- MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) {
- g_TotalErrors++;
- }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Display callback
-////////////////////////////////////////////////////////////////////////////////
-void display_thisframe(float time_delta) {
- sdkStartTimer(&timer);
-
- // run CUDA kernel to generate vertex positions
- runCuda(&cuda_vbo_resource);
-
- glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-
- glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
-
- glFinish();
-
- g_fAnim += time_delta;
-
- sdkStopTimer(&timer);
- computeFPS();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Check if the result is correct or write data to file for external
-//! regression testing
-////////////////////////////////////////////////////////////////////////////////
-void checkResultCuda(int argc, char **argv, const GLuint &vbo) {
- if (!d_vbo_buffer) {
- printf("%s: Mapping result buffer from OpenGL ES\n", __FUNCTION__);
-
- cudaGraphicsUnregisterResource(cuda_vbo_resource);
-
- // map buffer object
- glBindBuffer(GL_ARRAY_BUFFER, vbo);
- float *data = (float *)glMapBufferRange(
- GL_ARRAY_BUFFER, 0, mesh_width * mesh_height * 4 * sizeof(float),
- GL_READ_ONLY);
-
- // check result
- if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
- // write file for regression test
- sdkWriteFile("./data/regression.dat", data,
- mesh_width * mesh_height * 3, 0.0, false);
- }
-
- // unmap GL buffer object
- if (!glUnmapBuffer(GL_ARRAY_BUFFER)) {
- fprintf(stderr, "Unmap buffer failed.\n");
- fflush(stderr);
- }
-
- checkCudaErrors(cudaGraphicsGLRegisterBuffer(
- &cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));
-
- CHECK_GLERROR();
- }
-}
-
-GLuint mesh_shader = 0;
-
-void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram,
- const char *filename, GLenum shaderType) {
- FILE *file = fopen(filename, "rb"); // open shader text file
- if (!file) {
- error_exit("Filename %s does not exist\n", filename);
- }
-
- // get the size of the file and read it
- fseek(file, 0, SEEK_END);
- GLint size = ftell(file);
- char *data = (char *)malloc(sizeof(char) * (size + 1));
- memset(data, 0, sizeof(char) * (size + 1));
- fseek(file, 0, SEEK_SET);
- size_t res = fread(data, 1, size, file);
- fclose(file);
-
- GLuint shader = glCreateShader(shaderType);
- glShaderSource(shader, 1, (const GLchar **)&data, &size);
- glCompileShader(shader);
-
- CHECK_GLERROR();
- GLint compile_success = 0;
- glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success);
- CHECK_GLERROR();
-
- if (compile_success == GL_FALSE) {
- printf("Compilation of %s failed!\n Reason:\n", filename);
-
- GLint maxLength = 0;
- glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);
-
- char errorLog[maxLength];
- glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]);
-
- printf("%s", errorLog);
-
- glDeleteShader(shader);
- exit(1);
- }
-
- glAttachShader(new_shaderprogram, shader);
- glDeleteShader(shader);
-
- free(data);
-}
-
-GLuint ShaderCreate(const char *vshader_filename,
- const char *fshader_filename) {
- printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename);
-
- GLuint new_shaderprogram = glCreateProgram();
-
- CHECK_GLERROR();
- if (vshader_filename) {
- readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename,
- GL_VERTEX_SHADER);
- }
-
- CHECK_GLERROR();
- if (fshader_filename) {
- readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename,
- GL_FRAGMENT_SHADER);
- }
-
- CHECK_GLERROR();
-
- glLinkProgram(new_shaderprogram);
-
- CHECK_GLERROR();
- GLint link_success;
- glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success);
-
- if (link_success == GL_FALSE) {
- printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename,
- fshader_filename);
-
- GLint maxLength = 0;
- glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength);
-
- char errorLog[maxLength];
- glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]);
-
- printf("%s", errorLog);
-
- exit(EXIT_FAILURE);
- }
-
- return new_shaderprogram;
-}
-
-//===========================================================================
-// InitGraphicsState() - initialize OpenGL
-//===========================================================================
-static void InitGraphicsState(void) {
- char *GL_version = (char *)glGetString(GL_VERSION);
- char *GL_vendor = (char *)glGetString(GL_VENDOR);
- char *GL_renderer = (char *)glGetString(GL_RENDERER);
-
- printf("Version: %s\n", GL_version);
- printf("Vendor: %s\n", GL_vendor);
- printf("Renderer: %s\n", GL_renderer);
-
- // RENDERING SETUP (OpenGL ES or OpenGL Core Profile!)
- glGenVertexArrays(1, &mesh_vao); // Features' Vertex Array Object allocation
- glBindVertexArray(mesh_vao); // bind VAO
-
- // initialize buffer object
- glGenBuffers(1, &mesh_vbo);
- glBindBuffer(GL_ARRAY_BUFFER, mesh_vbo);
-
- unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
- glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
- glVertexAttribPointer((GLuint)0, 4, GL_FLOAT, GL_FALSE, 0, 0);
- glEnableVertexAttribArray(0);
-
- checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, mesh_vbo,
- cudaGraphicsMapFlagsNone));
-
- // GLSL stuff
- char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", pArgv[0]);
- char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", pArgv[0]);
-
- if (vertex_shader_path == NULL || fragment_shader_path == NULL) {
- printf("Error finding shader file\n");
- exit(EXIT_FAILURE);
- }
-
- mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path);
- CHECK_GLERROR();
-
- free(vertex_shader_path);
- free(fragment_shader_path);
-
- glUseProgram(mesh_shader);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Run a simple test for CUDA
-////////////////////////////////////////////////////////////////////////////////
-bool runTest(int argc, char **argv, char *ref_file) {
- // command line mode only
- if (ref_file != NULL) {
- // This will pick the best possible CUDA capable device
- // int devID = findCudaDevice(argc, (const char **)argv);
-#if defined(__aarch64__) || defined(__arm__)
- // find iGPU on the system which is compute capable which will perform
- // GLES-CUDA interop
- int devID = findIntegratedGPU();
-#else
- // use command-line specified CUDA device, otherwise use device with highest
- // Gflops/s
- int devID = findCudaDevice(argc, (const char **)argv);
-#endif
-
- // create VBO
- checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer,
- mesh_width * mesh_height * 4 * sizeof(float)));
-
- // run the cuda part
- runAutoTest(devID, argv, ref_file);
-
- // check result of Cuda step
- checkResultCuda(argc, argv, mesh_vbo);
-
- cudaFree(d_vbo_buffer);
- d_vbo_buffer = NULL;
- } else {
- double endTime = TIME_LIMIT;
-
- // this would use command-line specified CUDA device, note that CUDA
- // defaults to highest Gflops/s device
- if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
- error_exit("Device setting not yet implemented!\n");
- }
-
- // display selection
- if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) {
- dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno");
- }
-
- // Window width
- if (checkCmdLineFlag(argc, (const char **)argv, "width")) {
- window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width");
- }
-
- // Window Height
- if (checkCmdLineFlag(argc, (const char **)argv, "height")) {
- window_height =
- getCmdLineArgumentInt(argc, (const char **)argv, "height");
- }
-
- // Determine how long to run for in secs: default is 10s
- if (checkCmdLineFlag(argc, (const char **)argv, "runtime")) {
- endTime = getCmdLineArgumentInt(argc, (const char **)argv, "runtime");
- }
-
- SetCloseCB(closeCB_app);
- SetKeyCB(keyCB_app);
-
- // create QNX screen window and set up associated OpenGL ES context
- graphics_setup_window(0, 0, window_width, window_height, sSDKsample,
- dispno);
-
-#if defined(__aarch64__) || defined(__arm__)
- // find iGPU on the system which is compute capable which will perform
- // GLES-CUDA interop
- int devID = findIntegratedGPU();
-#else
- // use command-line specified CUDA device, otherwise use device with highest
- // Gflops/s
- int devID = findCudaDevice(argc, (const char **)argv);
-#endif
- InitGraphicsState(); // set up GLES stuff
-
- glClearColor(0, 0.5, 1, 1); // blue-ish background
- glClear(GL_COLOR_BUFFER_BIT);
-
- graphics_swap_buffers();
-
- int frame = 0;
-
- struct timeval begin, end;
- gettimeofday(&begin, NULL);
-
- // Print runtime
- if (endTime < 0.0) {
- endTime = TIME_LIMIT;
- printf(" running forever...\n");
- } else {
- printf(" running for %f seconds...\n", endTime);
- }
-
- while (!shutdown) {
- frame++;
- display_thisframe(0.010);
- usleep(1000);
- graphics_swap_buffers();
- CheckEvents();
-
- gettimeofday(&end, 0);
- double elapsed = (end.tv_sec - begin.tv_sec) +
- ((end.tv_usec - begin.tv_usec) / 1000000.0);
-
- // Check whether time limit has been exceeded
- if (!shutdown) shutdown = (endTime <= elapsed);
- }
-
- // NOTE: Before destroying OpenGL ES context, must unregister all shared
- // resources from CUDA !
- checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
-
- graphics_close_window(); // close window and destroy OpenGL ES context
- }
-
- return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Program main
-////////////////////////////////////////////////////////////////////////////////
-int main(int argc, char **argv) {
- char *ref_file = NULL;
-
- pArgc = &argc;
- pArgv = argv;
-
-#if defined(__linux__)
- setenv("DISPLAY", ":0", 0);
-#endif
-
- printf("%s starting...\n", sSDKsample);
-
- if (argc > 1) {
- if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
- // In this mode, we run without OpenGL and see if VBO is generated
- // correctly
- getCmdLineArgumentString(argc, (const char **)argv, "file",
- (char **)&ref_file);
- }
- }
-
- printf("\n");
-
- runTest(argc, argv, ref_file);
-
- printf("%s completed, returned %s\n", sSDKsample,
- (g_TotalErrors == 0) ? "OK" : "ERROR!");
-
- exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
-}