mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-01 20:20:29 +08:00
Update CUDA Samples for CTK 12.8 release and migrate build system to CMake
Update CUDA Samples for CTK 12.8 release and migrate build system to CMake
This commit is contained in:
commit
db3eea2394
4
.gitignore
vendored
4
.gitignore
vendored
@ -1 +1,3 @@
|
|||||||
.vscode/*
|
build
|
||||||
|
.vs
|
||||||
|
.clangd
|
||||||
|
46
CHANGELOG.md
46
CHANGELOG.md
@ -1,5 +1,49 @@
|
|||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
### CUDA 12.8
|
||||||
|
* Updated build system across the repository to CMake. Removed Visual Studio project files and Makefiles.
|
||||||
|
* Removed the following outdated samples:
|
||||||
|
* `0_Introduction`
|
||||||
|
* `c++11_cuda` demonstrating CUDA and C++ 11 interoperability (reason: obsolete)
|
||||||
|
* `concurrentKernels` demonstrating the ability to run multiple kernels simultaneously (reason: obsolete)
|
||||||
|
* `cppIntegration` demonstrating calling between .cu and .cpp files (reason: obsolete)
|
||||||
|
* `cppOverload` demonstrating C++ function overloading (reason: obsolete)
|
||||||
|
* `simpleSeparateCompilation` demonstrating NVCC compilation to a static library (reason: trivial)
|
||||||
|
* `simpleTemplates_nvrtc` demonstrating NVRTC usage for `simpleTemplates` sample (reason: redundant)
|
||||||
|
* `simpleVoteIntrinsics_nvrtc` demonstrating NVRTC usage for `simpleVoteIntrinsics` sample (reason: redundant)
|
||||||
|
* `2_Concepts_and_Techniques`
|
||||||
|
* `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`)
|
||||||
|
* `4_CUDA_Libraries`
|
||||||
|
* `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
|
||||||
|
* `5_Domain_Specific`
|
||||||
|
* Legacy Direct3D 9 and 10 interoperability samples:
|
||||||
|
* `fluidsD3D9`
|
||||||
|
* `simpleD3D10`
|
||||||
|
* `simpleD3D10RenderTarget`
|
||||||
|
* `simpleD3D10Texture`
|
||||||
|
* `simpleD3D9`
|
||||||
|
* `simpleD3D9Texture`
|
||||||
|
* `SLID3D10Texture`
|
||||||
|
* `VFlockingD3D10`
|
||||||
|
* `8_Platform_Specific/Tegra`
|
||||||
|
* Temporarily removed the following two samples pending updates:
|
||||||
|
* `nbody_screen` demonstrating the nbody sample in QNX
|
||||||
|
* `simpleGLES_screen` demonstrating GLES interop in QNX
|
||||||
|
* Moved the following Tegra-specific samples to a dedicated subdirectory: `8_Platform_Specific/Tegra`
|
||||||
|
* `EGLSync_CUDAEvent_Interop`
|
||||||
|
* `cuDLAErrorReporting`
|
||||||
|
* `cuDLAHybridMode`
|
||||||
|
* `cuDLALayerwiseStatsHybrid`
|
||||||
|
* `cuDLALayerwiseStatsStandalone`
|
||||||
|
* `cuDLAStandaloneMode`
|
||||||
|
* `cudaNvSciNvMedia`
|
||||||
|
* `fluidsGLES`
|
||||||
|
* `nbody_opengles`
|
||||||
|
* `simpleGLES`
|
||||||
|
* `simpleGLES_EGLOutput`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### CUDA 12.5
|
### CUDA 12.5
|
||||||
|
|
||||||
### CUDA 12.4
|
### CUDA 12.4
|
||||||
@ -18,7 +62,7 @@
|
|||||||
* Added new sample for Large Kernels
|
* Added new sample for Large Kernels
|
||||||
|
|
||||||
### CUDA 12.0
|
### CUDA 12.0
|
||||||
* Added new flags for JIT compiling
|
* Added new flags for JIT compiling
|
||||||
* Removed deprecated APIs in Hopper Architecture
|
* Removed deprecated APIs in Hopper Architecture
|
||||||
|
|
||||||
### CUDA 11.6
|
### CUDA 11.6
|
||||||
|
25
CMakeLists.txt
Normal file
25
CMakeLists.txt
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(cuda-samples LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_STANDARD 17)
|
||||||
|
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda")
|
||||||
|
|
||||||
|
add_subdirectory(Samples)
|
@ -1,294 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
// File: dynlink_d3d10.h
|
|
||||||
//
|
|
||||||
// Shortcut macros and functions for using DX objects
|
|
||||||
//
|
|
||||||
// Copyright (c) Microsoft Corporation. All rights reserved
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
#ifndef _DYNLINK_D3D10_H_
|
|
||||||
#define _DYNLINK_D3D10_H_
|
|
||||||
|
|
||||||
// Standard Windows includes
|
|
||||||
#include <windows.h>
|
|
||||||
#include <initguid.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <wchar.h>
|
|
||||||
#include <mmsystem.h>
|
|
||||||
#include <commctrl.h> // for InitCommonControls()
|
|
||||||
#include <shellapi.h> // for ExtractIcon()
|
|
||||||
#include <new.h> // for placement new
|
|
||||||
#include <shlobj.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
// CRT's memory leak detection
|
|
||||||
#if defined(DEBUG) || defined(_DEBUG)
|
|
||||||
#include <crtdbg.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Direct3D9 includes
|
|
||||||
#include <d3d9.h>
|
|
||||||
|
|
||||||
// Direct3D10 includes
|
|
||||||
#include <dxgi.h>
|
|
||||||
#include <d3d10_1.h>
|
|
||||||
#include <d3d10.h>
|
|
||||||
|
|
||||||
// XInput includes
|
|
||||||
#include <xinput.h>
|
|
||||||
|
|
||||||
// strsafe.h deprecates old unsecure string functions. If you
|
|
||||||
// really do not want to it to (not recommended), then uncomment the next line
|
|
||||||
//#define STRSAFE_NO_DEPRECATE
|
|
||||||
|
|
||||||
#ifndef STRSAFE_NO_DEPRECATE
|
|
||||||
#pragma deprecated("strncpy")
|
|
||||||
#pragma deprecated("wcsncpy")
|
|
||||||
#pragma deprecated("_tcsncpy")
|
|
||||||
#pragma deprecated("wcsncat")
|
|
||||||
#pragma deprecated("strncat")
|
|
||||||
#pragma deprecated("_tcsncat")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma warning( disable : 4996 ) // disable deprecated warning
|
|
||||||
#include <strsafe.h>
|
|
||||||
#pragma warning( default : 4996 )
|
|
||||||
|
|
||||||
#include <DirectXMath.h>
|
|
||||||
|
|
||||||
using namespace DirectX;
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
// Structs
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
struct DXUTD3D9DeviceSettings
|
|
||||||
{
|
|
||||||
UINT AdapterOrdinal;
|
|
||||||
D3DDEVTYPE DeviceType;
|
|
||||||
D3DFORMAT AdapterFormat;
|
|
||||||
DWORD BehaviorFlags;
|
|
||||||
D3DPRESENT_PARAMETERS pp;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct DXUTD3D10DeviceSettings
|
|
||||||
{
|
|
||||||
UINT AdapterOrdinal;
|
|
||||||
D3D10_DRIVER_TYPE DriverType;
|
|
||||||
UINT Output;
|
|
||||||
DXGI_SWAP_CHAIN_DESC sd;
|
|
||||||
UINT32 CreateFlags;
|
|
||||||
UINT32 SyncInterval;
|
|
||||||
DWORD PresentFlags;
|
|
||||||
bool AutoCreateDepthStencil; // DXUT will create the a depth stencil resource and view if true
|
|
||||||
DXGI_FORMAT AutoDepthStencilFormat;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum DXUTDeviceVersion { DXUT_D3D9_DEVICE, DXUT_D3D10_DEVICE };
|
|
||||||
struct DXUTDeviceSettings
|
|
||||||
{
|
|
||||||
DXUTDeviceVersion ver;
|
|
||||||
union
|
|
||||||
{
|
|
||||||
DXUTD3D9DeviceSettings d3d9; // only valid if ver == DXUT_D3D9_DEVICE
|
|
||||||
DXUTD3D10DeviceSettings d3d10; // only valid if ver == DXUT_D3D10_DEVICE
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
// Error codes
|
|
||||||
//--------------------------------------------------------------------------------------
|
|
||||||
#define DXUTERR_NODIRECT3D MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0901)
|
|
||||||
#define DXUTERR_NOCOMPATIBLEDEVICES MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0902)
|
|
||||||
#define DXUTERR_MEDIANOTFOUND MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0903)
|
|
||||||
#define DXUTERR_NONZEROREFCOUNT MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0904)
|
|
||||||
#define DXUTERR_CREATINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0905)
|
|
||||||
#define DXUTERR_RESETTINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0906)
|
|
||||||
#define DXUTERR_CREATINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0907)
|
|
||||||
#define DXUTERR_RESETTINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0908)
|
|
||||||
#define DXUTERR_DEVICEREMOVED MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x090A)
|
|
||||||
|
|
||||||
|
|
||||||
typedef HRESULT(WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT, UINT32,
|
|
||||||
ID3D10Device **);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE1)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT,
|
|
||||||
D3D10_FEATURE_LEVEL1, UINT, ID3D10Device1 **);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATESTATEBLOCK)(ID3D10Device *pDevice, D3D10_STATE_BLOCK_MASK *pStateBlockMask,
|
|
||||||
ID3D10StateBlock **ppStateBlock);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKUNION)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
|
||||||
D3D10_STATE_BLOCK_MASK *pResult);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKINTERSECT)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
|
||||||
D3D10_STATE_BLOCK_MASK *pResult);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDIFFERENCE)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
|
||||||
D3D10_STATE_BLOCK_MASK *pResult);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
|
|
||||||
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
|
|
||||||
UINT RangeLength);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
|
|
||||||
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
|
|
||||||
UINT RangeLength);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
|
|
||||||
typedef BOOL (WINAPI *LPD3D10STATEBLOCKMASKGETSETTING)(D3D10_STATE_BLOCK_MASK *pMask,
|
|
||||||
D3D10_DEVICE_STATE_TYPES StateType, UINT Entry);
|
|
||||||
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10COMPILEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, LPCSTR pSrcFileName,
|
|
||||||
CONST D3D10_SHADER_MACRO *pDefines,
|
|
||||||
ID3D10Include *pInclude, UINT HLSLFlags, UINT FXFlags,
|
|
||||||
ID3D10Blob **ppCompiledEffect, ID3D10Blob **ppErrors);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
|
|
||||||
ID3D10Device *pDevice,
|
|
||||||
ID3D10EffectPool *pEffectPool,
|
|
||||||
ID3D10Effect **ppEffect);
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTPOOLFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
|
|
||||||
ID3D10Device *pDevice, ID3D10EffectPool **ppEffectPool);
|
|
||||||
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN)(IDXGIAdapter *pAdapter,
|
|
||||||
D3D10_DRIVER_TYPE DriverType,
|
|
||||||
HMODULE Software,
|
|
||||||
UINT Flags,
|
|
||||||
UINT SDKVersion,
|
|
||||||
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
|
|
||||||
IDXGISwapChain **ppSwapChain,
|
|
||||||
ID3D10Device **ppDevice);
|
|
||||||
|
|
||||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN1)(IDXGIAdapter *pAdapter,
|
|
||||||
D3D10_DRIVER_TYPE DriverType,
|
|
||||||
HMODULE Software,
|
|
||||||
UINT Flags,
|
|
||||||
D3D10_FEATURE_LEVEL1 HardwareLevel,
|
|
||||||
UINT SDKVersion,
|
|
||||||
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
|
|
||||||
IDXGISwapChain **ppSwapChain,
|
|
||||||
ID3D10Device1 **ppDevice);
|
|
||||||
|
|
||||||
// Module and function pointers
|
|
||||||
static HMODULE g_hModDXGI = NULL;
|
|
||||||
static HMODULE g_hModD3D10 = NULL;
|
|
||||||
static HMODULE g_hModD3D101 = NULL;
|
|
||||||
static LPCREATEDXGIFACTORY sFnPtr_CreateDXGIFactory = NULL;
|
|
||||||
static LPD3D10CREATESTATEBLOCK sFnPtr_D3D10CreateStateBlock = NULL;
|
|
||||||
static LPD3D10CREATEDEVICE sFnPtr_D3D10CreateDevice = NULL;
|
|
||||||
static LPD3D10CREATEDEVICE1 sFnPtr_D3D10CreateDevice1 = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKUNION sFnPtr_D3D10StateBlockMaskUnion = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKINTERSECT sFnPtr_D3D10StateBlockMaskIntersect = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKDIFFERENCE sFnPtr_D3D10StateBlockMaskDifference = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKENABLECAPTURE sFnPtr_D3D10StateBlockMaskEnableCapture = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKDISABLECAPTURE sFnPtr_D3D10StateBlockMaskDisableCapture = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKENABLEALL sFnPtr_D3D10StateBlockMaskEnableAll = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKDISABLEALL sFnPtr_D3D10StateBlockMaskDisableAll = NULL;
|
|
||||||
static LPD3D10STATEBLOCKMASKGETSETTING sFnPtr_D3D10StateBlockMaskGetSetting = NULL;
|
|
||||||
static LPD3D10COMPILEEFFECTFROMMEMORY sFnPtr_D3D10CompileEffectFromMemory = NULL;
|
|
||||||
static LPD3D10CREATEEFFECTFROMMEMORY sFnPtr_D3D10CreateEffectFromMemory = NULL;
|
|
||||||
static LPD3D10CREATEEFFECTPOOLFROMMEMORY sFnPtr_D3D10CreateEffectPoolFromMemory = NULL;
|
|
||||||
static LPD3D10CREATEDEVICEANDSWAPCHAIN sFnPtr_D3D10CreateDeviceAndSwapChain = NULL;
|
|
||||||
static LPD3D10CREATEDEVICEANDSWAPCHAIN1 sFnPtr_D3D10CreateDeviceAndSwapChain1 = NULL;
|
|
||||||
|
|
||||||
// unload the D3D10 DLLs
|
|
||||||
static bool dynlinkUnloadD3D10API(void)
|
|
||||||
{
|
|
||||||
if (g_hModD3D10)
|
|
||||||
{
|
|
||||||
FreeLibrary(g_hModD3D10);
|
|
||||||
g_hModD3D10 = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_hModDXGI)
|
|
||||||
{
|
|
||||||
FreeLibrary(g_hModDXGI);
|
|
||||||
g_hModDXGI = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_hModD3D101)
|
|
||||||
{
|
|
||||||
FreeLibrary(g_hModD3D101);
|
|
||||||
g_hModD3D101 = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dynamically load the D3D10 DLLs loaded and map the function pointers
|
|
||||||
static bool dynlinkLoadD3D10API(void)
|
|
||||||
{
|
|
||||||
// First check to see if the D3D10 Library is present.
|
|
||||||
// if it succeeds, then we can call GetProcAddress to grab all of the DX10 functions
|
|
||||||
g_hModD3D10 = LoadLibrary("d3d10.dll");
|
|
||||||
|
|
||||||
if (g_hModD3D10 != NULL)
|
|
||||||
{
|
|
||||||
sFnPtr_D3D10CreateStateBlock = (LPD3D10CREATESTATEBLOCK) GetProcAddress(g_hModD3D10, "D3D10CreateStateBlock");
|
|
||||||
sFnPtr_D3D10CreateDevice = (LPD3D10CREATEDEVICE) GetProcAddress(g_hModD3D10, "D3D10CreateDevice");
|
|
||||||
|
|
||||||
sFnPtr_D3D10StateBlockMaskUnion = (LPD3D10STATEBLOCKMASKUNION) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskUnion");
|
|
||||||
sFnPtr_D3D10StateBlockMaskIntersect = (LPD3D10STATEBLOCKMASKINTERSECT) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskIntersect");
|
|
||||||
sFnPtr_D3D10StateBlockMaskDifference = (LPD3D10STATEBLOCKMASKDIFFERENCE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDifference");
|
|
||||||
sFnPtr_D3D10StateBlockMaskEnableCapture = (LPD3D10STATEBLOCKMASKENABLECAPTURE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableCapture");
|
|
||||||
sFnPtr_D3D10StateBlockMaskDisableCapture = (LPD3D10STATEBLOCKMASKDISABLECAPTURE)GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableCapture");
|
|
||||||
|
|
||||||
sFnPtr_D3D10StateBlockMaskEnableAll = (LPD3D10STATEBLOCKMASKENABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableAll");
|
|
||||||
sFnPtr_D3D10StateBlockMaskDisableAll = (LPD3D10STATEBLOCKMASKDISABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableAll");
|
|
||||||
sFnPtr_D3D10StateBlockMaskGetSetting = (LPD3D10STATEBLOCKMASKGETSETTING) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskGetSetting");
|
|
||||||
|
|
||||||
sFnPtr_D3D10CompileEffectFromMemory = (LPD3D10COMPILEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CompileEffectFromMemory");
|
|
||||||
sFnPtr_D3D10CreateEffectFromMemory = (LPD3D10CREATEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectFromMemory");
|
|
||||||
sFnPtr_D3D10CreateEffectPoolFromMemory = (LPD3D10CREATEEFFECTPOOLFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectPoolFromMemory");
|
|
||||||
|
|
||||||
sFnPtr_D3D10CreateDeviceAndSwapChain = (LPD3D10CREATEDEVICEANDSWAPCHAIN) GetProcAddress(g_hModD3D10, "D3D10CreateDeviceAndSwapChain");
|
|
||||||
}
|
|
||||||
|
|
||||||
g_hModDXGI = LoadLibrary("dxgi.dll");
|
|
||||||
|
|
||||||
if (g_hModDXGI)
|
|
||||||
{
|
|
||||||
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY) GetProcAddress(g_hModDXGI , "CreateDXGIFactory");
|
|
||||||
}
|
|
||||||
|
|
||||||
// This may fail if this machine isn't Windows Vista SP1 or later
|
|
||||||
g_hModD3D101 = LoadLibrary("d3d10_1.dll");
|
|
||||||
|
|
||||||
if (g_hModD3D101 != NULL)
|
|
||||||
{
|
|
||||||
sFnPtr_D3D10CreateDevice1 = (LPD3D10CREATEDEVICE1) GetProcAddress(g_hModD3D101, "D3D10CreateDevice1");
|
|
||||||
sFnPtr_D3D10CreateDeviceAndSwapChain1 = (LPD3D10CREATEDEVICEANDSWAPCHAIN1) GetProcAddress(g_hModD3D101, "D3D10CreateDeviceAndSwapChain1");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_hModD3D10 == NULL || g_hModDXGI == NULL || g_hModD3D101 == NULL)
|
|
||||||
{
|
|
||||||
dynlinkUnloadD3D10API();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -668,6 +668,9 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||||||
{0x87, 128},
|
{0x87, 128},
|
||||||
{0x89, 128},
|
{0x89, 128},
|
||||||
{0x90, 128},
|
{0x90, 128},
|
||||||
|
{0xa0, 128},
|
||||||
|
{0xa1, 128},
|
||||||
|
{0xc0, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
@ -717,6 +720,9 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
|||||||
{0x87, "Ampere"},
|
{0x87, "Ampere"},
|
||||||
{0x89, "Ada"},
|
{0x89, "Ada"},
|
||||||
{0x90, "Hopper"},
|
{0x90, "Hopper"},
|
||||||
|
{0xa0, "Blackwell"},
|
||||||
|
{0xa1, "Blackwell"},
|
||||||
|
{0xc0, "Blackwell"},
|
||||||
{-1, "Graphics Device"}};
|
{-1, "Graphics Device"}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
@ -116,6 +116,9 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||||||
{0x87, 128},
|
{0x87, 128},
|
||||||
{0x89, 128},
|
{0x89, 128},
|
||||||
{0x90, 128},
|
{0x90, 128},
|
||||||
|
{0xa0, 128},
|
||||||
|
{0xa1, 128},
|
||||||
|
{0xc0, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
@ -405,4 +408,3 @@ bool inline findFatbinPath(const char *module_file, std::string &module_path, ch
|
|||||||
// end of CUDA Helper Functions
|
// end of CUDA Helper Functions
|
||||||
|
|
||||||
#endif // COMMON_HELPER_CUDA_DRVAPI_H_
|
#endif // COMMON_HELPER_CUDA_DRVAPI_H_
|
||||||
|
|
||||||
|
@ -1,128 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Utility funcs to wrap up saving a surface or the back buffer as a PPM file
|
|
||||||
// In addition, wraps up a threshold comparision of two PPMs.
|
|
||||||
//
|
|
||||||
// These functions are designed to be used to implement an automated QA testing
|
|
||||||
// for SDK samples.
|
|
||||||
//
|
|
||||||
// Author: Bryan Dudash
|
|
||||||
// Email: sdkfeedback@nvidia.com
|
|
||||||
//
|
|
||||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <helper_functions.h>
|
|
||||||
#include <rendercheck_d3d10.h>
|
|
||||||
|
|
||||||
HRESULT CheckRenderD3D10::ActiveRenderTargetToPPM(ID3D10Device *pDevice,
|
|
||||||
const char *zFileName) {
|
|
||||||
ID3D10RenderTargetView *pRTV = NULL;
|
|
||||||
pDevice->OMGetRenderTargets(1, &pRTV, NULL);
|
|
||||||
|
|
||||||
ID3D10Resource *pSourceResource = NULL;
|
|
||||||
pRTV->GetResource(&pSourceResource);
|
|
||||||
|
|
||||||
return ResourceToPPM(pDevice, pSourceResource, zFileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
HRESULT CheckRenderD3D10::ResourceToPPM(ID3D10Device *pDevice,
|
|
||||||
ID3D10Resource *pResource,
|
|
||||||
const char *zFileName) {
|
|
||||||
D3D10_RESOURCE_DIMENSION rType;
|
|
||||||
pResource->GetType(&rType);
|
|
||||||
|
|
||||||
if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) {
|
|
||||||
printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
ID3D10Texture2D *pSourceTexture = (ID3D10Texture2D *)pResource;
|
|
||||||
ID3D10Texture2D *pTargetTexture = NULL;
|
|
||||||
|
|
||||||
D3D10_TEXTURE2D_DESC desc;
|
|
||||||
pSourceTexture->GetDesc(&desc);
|
|
||||||
desc.BindFlags = 0;
|
|
||||||
desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
|
|
||||||
desc.Usage = D3D10_USAGE_STAGING;
|
|
||||||
|
|
||||||
if (FAILED(pDevice->CreateTexture2D(&desc, NULL, &pTargetTexture))) {
|
|
||||||
printf(
|
|
||||||
"SurfaceToPPM: Unable to create target Texture resoruce! Aborting... "
|
|
||||||
"\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
pDevice->CopyResource(pTargetTexture, pSourceTexture);
|
|
||||||
|
|
||||||
D3D10_MAPPED_TEXTURE2D mappedTex2D;
|
|
||||||
pTargetTexture->Map(0, D3D10_MAP_READ, 0, &mappedTex2D);
|
|
||||||
|
|
||||||
// Need to convert from dx pitch to pitch=width
|
|
||||||
unsigned char *pPPMData = new unsigned char[desc.Width * desc.Height * 4];
|
|
||||||
|
|
||||||
for (unsigned int iHeight = 0; iHeight < desc.Height; iHeight++) {
|
|
||||||
memcpy(
|
|
||||||
&(pPPMData[iHeight * desc.Width * 4]),
|
|
||||||
(unsigned char *)(mappedTex2D.pData) + iHeight * mappedTex2D.RowPitch,
|
|
||||||
desc.Width * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
pTargetTexture->Unmap(0);
|
|
||||||
|
|
||||||
// Prepends the PPM header info and bumps byte data afterwards
|
|
||||||
sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);
|
|
||||||
|
|
||||||
delete[] pPPMData;
|
|
||||||
pTargetTexture->Release();
|
|
||||||
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CheckRenderD3D10::PPMvsPPM(const char *src_file, const char *ref_file,
|
|
||||||
const char *exec_path, const float epsilon,
|
|
||||||
const float threshold) {
|
|
||||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
|
||||||
|
|
||||||
if (ref_file_path == NULL) {
|
|
||||||
printf(
|
|
||||||
"CheckRenderD3D10::PPMvsPPM unable to find <%s> in <%s> Aborting "
|
|
||||||
"comparison!\n",
|
|
||||||
ref_file, exec_path);
|
|
||||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
|
||||||
ref_file);
|
|
||||||
printf("Aborting comparison!\n");
|
|
||||||
printf(" FAILURE!\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
|
|
||||||
true);
|
|
||||||
}
|
|
@ -1,53 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#ifndef _RENDERCHECK_D3D10_H_
|
|
||||||
#define _RENDERCHECK_D3D10_H_
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <d3d10.h>
|
|
||||||
|
|
||||||
class CheckRenderD3D10 {
|
|
||||||
public:
|
|
||||||
CheckRenderD3D10() {}
|
|
||||||
|
|
||||||
static HRESULT ActiveRenderTargetToPPM(ID3D10Device *pDevice,
|
|
||||||
const char *zFileName);
|
|
||||||
static HRESULT ResourceToPPM(ID3D10Device *pDevice, ID3D10Resource *pResource,
|
|
||||||
const char *zFileName);
|
|
||||||
|
|
||||||
static bool PPMvsPPM(const char *src_file, const char *ref_file,
|
|
||||||
const char *exec_path, const float epsilon,
|
|
||||||
const float threshold = 0.0f);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,167 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// Utility funcs to wrap up savings a surface or the back buffer as a PPM file
|
|
||||||
// In addition, wraps up a threshold comparision of two PPMs.
|
|
||||||
//
|
|
||||||
// These functions are designed to be used to implement an automated QA testing
|
|
||||||
// for SDK samples.
|
|
||||||
//
|
|
||||||
// Author: Bryan Dudash
|
|
||||||
// Email: sdkfeedback@nvidia.com
|
|
||||||
//
|
|
||||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
#include <helper_functions.h>
|
|
||||||
#include <rendercheck_d3d9.h>
|
|
||||||
|
|
||||||
// originally copied from checkrender_gl.cpp and slightly modified
|
|
||||||
bool CheckRenderD3D9::PPMvsPPM(const char *src_file, const char *ref_file,
|
|
||||||
const char *exec_path, const float epsilon,
|
|
||||||
const float threshold) {
|
|
||||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
|
||||||
|
|
||||||
if (ref_file_path == NULL) {
|
|
||||||
printf(
|
|
||||||
"CheckRenderD3D9::PPMvsPPM unable to find <%s> in <%s> Aborting "
|
|
||||||
"comparison!\n",
|
|
||||||
ref_file, exec_path);
|
|
||||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
|
||||||
ref_file);
|
|
||||||
printf("Aborting comparison!\n");
|
|
||||||
printf(" FAILURE!\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
|
|
||||||
true);
|
|
||||||
};
|
|
||||||
|
|
||||||
HRESULT CheckRenderD3D9::BackbufferToPPM(IDirect3DDevice9 *pDevice,
|
|
||||||
const char *zFileName) {
|
|
||||||
IDirect3DSurface9 *pSurface = NULL;
|
|
||||||
|
|
||||||
if (FAILED(
|
|
||||||
pDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &pSurface))) {
|
|
||||||
printf("Unable to get the back buffer. Aborting...\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// D3DXSaveSurfaceToFile("C:\\bing.dds",D3DXIFF_DDS,pSurface,NULL,NULL);
|
|
||||||
|
|
||||||
HRESULT hr = S_OK;
|
|
||||||
hr = SurfaceToPPM(pDevice, pSurface, zFileName);
|
|
||||||
|
|
||||||
pSurface->Release();
|
|
||||||
|
|
||||||
return hr;
|
|
||||||
}
|
|
||||||
|
|
||||||
HRESULT CheckRenderD3D9::SurfaceToPPM(IDirect3DDevice9 *pDevice,
|
|
||||||
IDirect3DSurface9 *pSurface,
|
|
||||||
const char *zFileName) {
|
|
||||||
D3DSURFACE_DESC pDesc;
|
|
||||||
pSurface->GetDesc(&pDesc);
|
|
||||||
|
|
||||||
// $$ For now only support common 8bit formats. TODO: support for more
|
|
||||||
// complex formats via conversion?
|
|
||||||
if (!(pDesc.Format == D3DFMT_A8R8G8B8 || pDesc.Format == D3DFMT_X8R8G8B8)) {
|
|
||||||
return E_INVALIDARG;
|
|
||||||
}
|
|
||||||
|
|
||||||
IDirect3DTexture9 *pTargetTex = NULL;
|
|
||||||
|
|
||||||
if (FAILED(pDevice->CreateTexture(pDesc.Width, pDesc.Height, 1,
|
|
||||||
D3DUSAGE_DYNAMIC, pDesc.Format,
|
|
||||||
D3DPOOL_SYSTEMMEM, &pTargetTex, NULL))) {
|
|
||||||
printf("Unable to create texture for surface transfer! Aborting...\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
IDirect3DSurface9 *pTargetSurface = NULL;
|
|
||||||
|
|
||||||
if (FAILED(pTargetTex->GetSurfaceLevel(0, &pTargetSurface))) {
|
|
||||||
printf("Unable to get surface for surface transfer! Aborting...\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is required because we cannot lock a D3DPOOL_DEAULT surface directly.
|
|
||||||
// So, we copy to our sysmem surface.
|
|
||||||
if (FAILED(pDevice->GetRenderTargetData(pSurface, pTargetSurface))) {
|
|
||||||
printf(
|
|
||||||
"Unable to GetRenderTargetData() for surface transfer! Aborting...\n");
|
|
||||||
return E_FAIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
D3DLOCKED_RECT lockedRect;
|
|
||||||
HRESULT hr = pTargetSurface->LockRect(&lockedRect, NULL, 0);
|
|
||||||
|
|
||||||
// Need to convert from dx pitch to pitch=width
|
|
||||||
//
|
|
||||||
// $ PPM is BGR and not RGB it seems. Saved image looks "funny" in viewer(red
|
|
||||||
// and blue swapped), but since ref will be dumped using same method, this is
|
|
||||||
// ok.
|
|
||||||
// however, if we want the saved image to be properly colored, then we
|
|
||||||
// can swizzle the color bytes here.
|
|
||||||
unsigned char *pPPMData = new unsigned char[pDesc.Width * pDesc.Height * 4];
|
|
||||||
|
|
||||||
for (unsigned int iHeight = 0; iHeight < pDesc.Height; iHeight++) {
|
|
||||||
#if 1 // swizzle to implment RGB to BGR conversion.
|
|
||||||
|
|
||||||
for (unsigned int iWidth = 0; iWidth < pDesc.Width; iWidth++) {
|
|
||||||
DWORD color = *(DWORD *)((unsigned char *)(lockedRect.pBits) +
|
|
||||||
iHeight * lockedRect.Pitch + iWidth * 4);
|
|
||||||
|
|
||||||
// R<->B, [7:0] <-> [23:16], swizzle
|
|
||||||
color = ((color & 0xFF) << 16) | (color & 0xFF00) |
|
|
||||||
((color & 0xFF0000) >> 16) | (color & 0xFF000000);
|
|
||||||
|
|
||||||
memcpy(&(pPPMData[(iHeight * pDesc.Width + iWidth) * 4]),
|
|
||||||
(unsigned char *)&color, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
memcpy(&(pPPMData[iHeight * pDesc.Width * 4]),
|
|
||||||
(unsigned char *)(lockedRect.pBits) + iHeight * lockedRect.Pitch,
|
|
||||||
pDesc.Width * 4);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
pTargetSurface->UnlockRect();
|
|
||||||
|
|
||||||
// Prepends the PPM header info and bumps byte data afterwards
|
|
||||||
sdkSavePPM4ub(zFileName, pPPMData, pDesc.Width, pDesc.Height);
|
|
||||||
|
|
||||||
delete[] pPPMData;
|
|
||||||
pTargetSurface->Release();
|
|
||||||
pTargetTex->Release();
|
|
||||||
|
|
||||||
return S_OK;
|
|
||||||
}
|
|
@ -1,54 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#ifndef _RENDERCHECK_D3D9_H_
|
|
||||||
#define _RENDERCHECK_D3D9_H_
|
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <d3d9.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
class CheckRenderD3D9 {
|
|
||||||
public:
|
|
||||||
CheckRenderD3D9() {}
|
|
||||||
|
|
||||||
static HRESULT BackbufferToPPM(IDirect3DDevice9 *pDevice,
|
|
||||||
const char *zFileName);
|
|
||||||
static HRESULT SurfaceToPPM(IDirect3DDevice9 *pDevice,
|
|
||||||
IDirect3DSurface9 *pSurface,
|
|
||||||
const char *zFileName);
|
|
||||||
|
|
||||||
static bool PPMvsPPM(const char *src_file, const char *ref_file,
|
|
||||||
const char *exec_path, const float epsilon,
|
|
||||||
const float threshold = 0.0f);
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
69
Makefile
69
Makefile
@ -1,69 +0,0 @@
|
|||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
#
|
|
||||||
# CUDA Samples
|
|
||||||
#
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
TARGET_ARCH ?= $(shell uname -m)
|
|
||||||
|
|
||||||
# Project folders that contain CUDA samples
|
|
||||||
PROJECTS ?= $(shell find Samples -name Makefile)
|
|
||||||
|
|
||||||
FILTER_OUT :=
|
|
||||||
|
|
||||||
PROJECTS := $(filter-out $(FILTER_OUT),$(PROJECTS))
|
|
||||||
|
|
||||||
%.ph_build :
|
|
||||||
+@$(MAKE) -C $(dir $*) $(MAKECMDGOALS)
|
|
||||||
|
|
||||||
%.ph_test :
|
|
||||||
+@$(MAKE) -C $(dir $*) testrun
|
|
||||||
|
|
||||||
%.ph_clean :
|
|
||||||
+@$(MAKE) -C $(dir $*) clean $(USE_DEVICE)
|
|
||||||
|
|
||||||
%.ph_clobber :
|
|
||||||
+@$(MAKE) -C $(dir $*) clobber $(USE_DEVICE)
|
|
||||||
|
|
||||||
all: $(addsuffix .ph_build,$(PROJECTS))
|
|
||||||
@echo "Finished building CUDA samples"
|
|
||||||
|
|
||||||
build: $(addsuffix .ph_build,$(PROJECTS))
|
|
||||||
|
|
||||||
test : $(addsuffix .ph_test,$(PROJECTS))
|
|
||||||
|
|
||||||
tidy:
|
|
||||||
@find * | egrep "#" | xargs rm -f
|
|
||||||
@find * | egrep "\~" | xargs rm -f
|
|
||||||
|
|
||||||
clean: tidy $(addsuffix .ph_clean,$(PROJECTS))
|
|
||||||
|
|
||||||
clobber: clean $(addsuffix .ph_clobber,$(PROJECTS))
|
|
143
README.md
143
README.md
@ -1,20 +1,20 @@
|
|||||||
# CUDA Samples
|
# CUDA Samples
|
||||||
|
|
||||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads).
|
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 12.6](https://developer.nvidia.com/cuda-downloads).
|
||||||
|
|
||||||
## Release Notes
|
## Release Notes
|
||||||
|
|
||||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||||
|
|
||||||
### CUDA 12.5
|
### Change Log
|
||||||
|
|
||||||
### [older versions...](./CHANGELOG.md)
|
### [Revision History](./CHANGELOG.md)
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||||
|
|
||||||
### Getting the CUDA Samples
|
### Getting the CUDA Samples
|
||||||
@ -28,43 +28,105 @@ Without using git the easiest way to use these samples is to download the zip fi
|
|||||||
|
|
||||||
## Building CUDA Samples
|
## Building CUDA Samples
|
||||||
|
|
||||||
### Windows
|
### Building CUDA Samples
|
||||||
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
The CUDA Samples are built using CMake. Follow the instructions below for building on Linux, Windows, and for cross-compilation to Tegra devices.
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Complete samples solution files exist at parent directory of the repo:
|
|
||||||
|
|
||||||
Each individual sample has its own set of solution files at:
|
|
||||||
`<CUDA_SAMPLES_REPO>\Samples\<sample_dir>\`
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
|
|
||||||
### Linux
|
### Linux
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l, aarch64.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/> `$ make TARGET_ARCH=aarch64` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details on cross platform compilation of cuda samples.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
Ensure that CMake (version 3.20 or later) is installed. Install it using your package manager if necessary:
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
e.g.
|
||||||
```
|
```sudo apt install cmake```
|
||||||
|
|
||||||
|
Navigate to the root of the cloned repository and create a build directory:
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
```
|
||||||
|
Configure the project with CMake:
|
||||||
|
```
|
||||||
|
cmake ..
|
||||||
|
```
|
||||||
|
Build the samples:
|
||||||
|
```
|
||||||
|
make -j$(nproc)
|
||||||
|
```
|
||||||
|
Run the samples from their respective directories in the build folder. You can also follow this process from and subdirectory of the samples repo, or from within any individual sample.
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
|
||||||
|
Language services for CMake are available in Visual Studio 2019 version 16.5 or later, and you can directly import the CUDA samples repository from either the root level or from any
|
||||||
|
subdirectory or individual sample.
|
||||||
|
|
||||||
|
To build from the command line, open the `x64 Native Tools Command Prompt for VS` provided with your Visual Studio installation.
|
||||||
|
|
||||||
|
Navigate to the root of the cloned repository and create a build directory:
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
```
|
||||||
|
Configure the project with CMake - for example:
|
||||||
|
```
|
||||||
|
cmake .. -G "Visual Studio 16 2019" -A x64
|
||||||
|
```
|
||||||
|
Open the generated solution file CUDA_Samples.sln in Visual Studio. Build the samples by selecting the desired configuration (e.g., Debug or Release) and pressing F7 (Build Solution).
|
||||||
|
|
||||||
|
Run the samples from the output directories specified in Visual Studio.
|
||||||
|
|
||||||
|
### Platform-Specific Samples
|
||||||
|
|
||||||
|
Some CUDA samples are specific to certain platforms, and require passing flags into CMake to enable. In particular, we define the following platform-specific flags:
|
||||||
|
|
||||||
|
* `BUILD_TEGRA` - for Tegra-specific samples
|
||||||
|
|
||||||
|
To build these samples, set the variables either on the command line or through your CMake GUI. For example:
|
||||||
|
|
||||||
|
```
|
||||||
|
cmake -DBUILD_TEGRA=True ..
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cross-Compilation for Tegra Platforms
|
||||||
|
|
||||||
|
Install the NVIDIA toolchain and cross-compilation environment for Tegra devices as described in the Tegra Development Guide.
|
||||||
|
|
||||||
|
Ensure that CMake (version 3.20 or later) is installed.
|
||||||
|
|
||||||
|
Navigate to the root of the cloned repository and create a build directory:
|
||||||
|
```
|
||||||
|
mkdir build && cd build
|
||||||
|
```
|
||||||
|
Configure the project with CMake, specifying the Tegra toolchain file:
|
||||||
|
```
|
||||||
|
cmake .. -DCMAKE_TOOLCHAIN_FILE=/path/to/tegra/toolchain.cmake
|
||||||
|
```
|
||||||
|
Build the samples:
|
||||||
|
```
|
||||||
|
make -j$(nproc)
|
||||||
|
```
|
||||||
|
Transfer the built binaries to the Tegra device and execute them there.
|
||||||
|
|
||||||
|
### Building for Automotive Linux Platforms
|
||||||
|
|
||||||
|
These platforms require additional information to be passed to CMake on the command line to ensure proper resolution of all necessary include and library files.
|
||||||
|
Instead of being in the default location, `/usr/local/cuda/include` or `/usr/local/cuda/lib64`, you must point to architecture-specific paths:
|
||||||
|
|
||||||
|
`/usr/local/cuda/<ARCH>/targets/aarch64-linux/lib`
|
||||||
|
and
|
||||||
|
`/usr/local/cuda-12.8/<ARCH>/include`
|
||||||
|
|
||||||
|
An example build might look like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ mkdir build
|
||||||
|
$ cd build
|
||||||
|
|
||||||
|
$ cmake -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_LIBRARY_PATH=/usr/local/cuda/orin/lib64/ -DCMAKE_INCLUDE_PATH=/usr/local/cuda/orin/include -DBUILD_TEGRA=True ..
|
||||||
|
```
|
||||||
|
|
||||||
|
### QNX
|
||||||
|
|
||||||
|
Note that in the current branch sample cross-compilation for QNX is not fully validated. This placeholder will be updated in the
|
||||||
|
near future with QNX cross-compilation instructions. In the meantime, if you want to cross-compile for QNX please check out one
|
||||||
|
of the previous tags prior to the CMake build system transition in 12.8.
|
||||||
|
|
||||||
## Samples list
|
## Samples list
|
||||||
|
|
||||||
@ -108,7 +170,7 @@ These third-party dependencies are required by some CUDA samples. If available,
|
|||||||
|
|
||||||
FreeImage is an open source imaging library. FreeImage can usually be installed on Linux using your distribution's package manager system. FreeImage can also be downloaded from the FreeImage website.
|
FreeImage is an open source imaging library. FreeImage can usually be installed on Linux using your distribution's package manager system. FreeImage can also be downloaded from the FreeImage website.
|
||||||
|
|
||||||
To set up FreeImage on a Windows system, extract the FreeImage DLL distribution into the folder `../../../Common/FreeImage/Dist/x64` such that it contains the .h and .lib files. Copy the .dll file to root level `bin/win64/Debug` and `bin/win64/Release` folder.
|
To set up FreeImage on a Windows system, extract the FreeImage DLL distribution into the folder `../../../Common/FreeImage/Dist/x64` such that it contains the .h and .lib files. Copy the .dll file to the Release/ Debug/ execution folder or pass the FreeImage folder when cmake configuring with the `-DFREEIMAGE_INCLUDE_DIR` and `-DFREEIMAGE_LIBRARY` options.
|
||||||
|
|
||||||
#### Message Passing Interface
|
#### Message Passing Interface
|
||||||
|
|
||||||
@ -138,6 +200,11 @@ OpenGL ES is an embedded systems graphics library used for 2D and 3D rendering.
|
|||||||
|
|
||||||
Vulkan is a low-overhead, cross-platform 3D graphics and compute API. Vulkan targets high-performance realtime 3D graphics applications such as video games and interactive media across all platforms. On systems which support Vulkan, NVIDIA's Vulkan implementation is provided with the CUDA Driver. For building and running Vulkan applications one needs to install the [Vulkan SDK](https://www.lunarg.com/vulkan-sdk/).
|
Vulkan is a low-overhead, cross-platform 3D graphics and compute API. Vulkan targets high-performance realtime 3D graphics applications such as video games and interactive media across all platforms. On systems which support Vulkan, NVIDIA's Vulkan implementation is provided with the CUDA Driver. For building and running Vulkan applications one needs to install the [Vulkan SDK](https://www.lunarg.com/vulkan-sdk/).
|
||||||
|
|
||||||
|
#### GLFW
|
||||||
|
GLFW is a lightweight, open-source library designed for managing OpenGL, OpenGL ES, and Vulkan contexts. It simplifies the process of creating and managing windows, handling user input (keyboard, mouse, and joystick), and working with multiple monitors in a cross-platform manner.
|
||||||
|
|
||||||
|
To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW website](https://www.glfw.org/download.html) and extract the zip file into the folder, pass the GLFW include header as `-DGLFW_INCLUDE_DIR` for cmake configuring and follow the Build_instructions.txt in the sample folder to set up the t.
|
||||||
|
|
||||||
#### OpenMP
|
#### OpenMP
|
||||||
|
|
||||||
OpenMP is an API for multiprocessing programming. OpenMP can be installed using your Linux distribution's package manager system. It usually comes preinstalled with GCC. It can also be found at the [OpenMP website](http://openmp.org/).
|
OpenMP is an API for multiprocessing programming. OpenMP can be installed using your Linux distribution's package manager system. It usually comes preinstalled with GCC. It can also be found at the [OpenMP website](http://openmp.org/).
|
||||||
|
62
Samples/0_Introduction/CMakeLists.txt
Normal file
62
Samples/0_Introduction/CMakeLists.txt
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(simpleCallback LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
add_subdirectory(UnifiedMemoryStreams)
|
||||||
|
add_subdirectory(asyncAPI)
|
||||||
|
add_subdirectory(clock)
|
||||||
|
add_subdirectory(clock_nvrtc)
|
||||||
|
add_subdirectory(cudaOpenMP)
|
||||||
|
add_subdirectory(fp16ScalarProduct)
|
||||||
|
add_subdirectory(matrixMul)
|
||||||
|
add_subdirectory(matrixMulDrv)
|
||||||
|
add_subdirectory(matrixMulDynlinkJIT)
|
||||||
|
add_subdirectory(matrixMul_nvrtc)
|
||||||
|
add_subdirectory(mergeSort)
|
||||||
|
add_subdirectory(simpleAWBarrier)
|
||||||
|
add_subdirectory(simpleAssert)
|
||||||
|
add_subdirectory(simpleAssert_nvrtc)
|
||||||
|
add_subdirectory(simpleAtomicIntrinsics)
|
||||||
|
add_subdirectory(simpleAtomicIntrinsics_nvrtc)
|
||||||
|
add_subdirectory(simpleAttributes)
|
||||||
|
add_subdirectory(simpleCUDA2GL)
|
||||||
|
add_subdirectory(simpleCallback)
|
||||||
|
add_subdirectory(simpleCooperativeGroups)
|
||||||
|
add_subdirectory(simpleCubemapTexture)
|
||||||
|
add_subdirectory(simpleDrvRuntime)
|
||||||
|
add_subdirectory(simpleHyperQ)
|
||||||
|
add_subdirectory(simpleIPC)
|
||||||
|
add_subdirectory(simpleLayeredTexture)
|
||||||
|
add_subdirectory(simpleMPI)
|
||||||
|
add_subdirectory(simpleMultiCopy)
|
||||||
|
add_subdirectory(simpleMultiGPU)
|
||||||
|
add_subdirectory(simpleOccupancy)
|
||||||
|
add_subdirectory(simpleP2P)
|
||||||
|
add_subdirectory(simplePitchLinearTexture)
|
||||||
|
add_subdirectory(simplePrintf)
|
||||||
|
add_subdirectory(simpleStreams)
|
||||||
|
add_subdirectory(simpleSurfaceWrite)
|
||||||
|
add_subdirectory(simpleTemplates)
|
||||||
|
add_subdirectory(simpleTexture)
|
||||||
|
add_subdirectory(simpleTexture3D)
|
||||||
|
add_subdirectory(simpleTextureDrv)
|
||||||
|
add_subdirectory(simpleVoteIntrinsics)
|
||||||
|
add_subdirectory(simpleZeroCopy)
|
||||||
|
add_subdirectory(systemWideAtomics)
|
||||||
|
add_subdirectory(vectorAdd)
|
||||||
|
add_subdirectory(vectorAddDrv)
|
||||||
|
add_subdirectory(vectorAddMMAP)
|
||||||
|
add_subdirectory(vectorAdd_nvrtc)
|
@ -4,24 +4,12 @@
|
|||||||
### [asyncAPI](./asyncAPI)
|
### [asyncAPI](./asyncAPI)
|
||||||
This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.
|
This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.
|
||||||
|
|
||||||
### [c++11_cuda](./c++11_cuda)
|
|
||||||
This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters.
|
|
||||||
|
|
||||||
### [clock](./clock)
|
### [clock](./clock)
|
||||||
This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.
|
This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.
|
||||||
|
|
||||||
### [clock_nvrtc](./clock_nvrtc)
|
### [clock_nvrtc](./clock_nvrtc)
|
||||||
This example shows how to use the clock function using libNVRTC to measure the performance of block of threads of a kernel accurately.
|
This example shows how to use the clock function using libNVRTC to measure the performance of block of threads of a kernel accurately.
|
||||||
|
|
||||||
### [concurrentKernels](./concurrentKernels)
|
|
||||||
This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.
|
|
||||||
|
|
||||||
### [cppIntegration](./cppIntegration)
|
|
||||||
This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.
|
|
||||||
|
|
||||||
### [cppOverload](./cppOverload)
|
|
||||||
This sample demonstrates how to use C++ function overloading on the GPU.
|
|
||||||
|
|
||||||
### [cudaOpenMP](./cudaOpenMP)
|
### [cudaOpenMP](./cudaOpenMP)
|
||||||
This sample demonstrates how to use OpenMP API to write an application for multiple GPUs.
|
This sample demonstrates how to use OpenMP API to write an application for multiple GPUs.
|
||||||
|
|
||||||
@ -106,9 +94,6 @@ Use of Pitch Linear Textures
|
|||||||
### [simplePrintf](./simplePrintf)
|
### [simplePrintf](./simplePrintf)
|
||||||
This basic CUDA Runtime API sample demonstrates how to use the printf function in the device code.
|
This basic CUDA Runtime API sample demonstrates how to use the printf function in the device code.
|
||||||
|
|
||||||
### [simpleSeparateCompilation](./simpleSeparateCompilation)
|
|
||||||
This sample demonstrates a CUDA 5.0 feature, the ability to create a GPU device static library and use it within another CUDA kernel. This example demonstrates how to pass in a GPU device function (from the GPU device static library) as a function pointer to be called. This sample requires devices with compute capability 2.0 or higher.
|
|
||||||
|
|
||||||
### [simpleStreams](./simpleStreams)
|
### [simpleStreams](./simpleStreams)
|
||||||
This sample uses CUDA streams to overlap kernel executions with memory copies between the host and a GPU device. This sample uses a new CUDA 4.0 feature that supports pinning of generic host memory. Requires Compute Capability 2.0 or higher.
|
This sample uses CUDA streams to overlap kernel executions with memory copies between the host and a GPU device. This sample uses a new CUDA 4.0 feature that supports pinning of generic host memory. Requires Compute Capability 2.0 or higher.
|
||||||
|
|
||||||
@ -118,9 +103,6 @@ Simple example that demonstrates the use of 2D surface references (Write-to-Text
|
|||||||
### [simpleTemplates](./simpleTemplates)
|
### [simpleTemplates](./simpleTemplates)
|
||||||
This sample is a templatized version of the template project. It also shows how to correctly templatize dynamically allocated shared memory arrays.
|
This sample is a templatized version of the template project. It also shows how to correctly templatize dynamically allocated shared memory arrays.
|
||||||
|
|
||||||
### [simpleTemplates_nvrtc](./simpleTemplates_nvrtc)
|
|
||||||
This sample is a templatized version of the template project. It also shows how to correctly templatize dynamically allocated shared memory arrays.
|
|
||||||
|
|
||||||
### [simpleTexture](./simpleTexture)
|
### [simpleTexture](./simpleTexture)
|
||||||
Simple example that demonstrates use of Textures in CUDA.
|
Simple example that demonstrates use of Textures in CUDA.
|
||||||
|
|
||||||
@ -133,9 +115,6 @@ Simple example that demonstrates use of Textures in CUDA. This sample uses the
|
|||||||
### [simpleVoteIntrinsics](./simpleVoteIntrinsics)
|
### [simpleVoteIntrinsics](./simpleVoteIntrinsics)
|
||||||
Simple program which demonstrates how to use the Vote (__any_sync, __all_sync) intrinsic instruction in a CUDA kernel.
|
Simple program which demonstrates how to use the Vote (__any_sync, __all_sync) intrinsic instruction in a CUDA kernel.
|
||||||
|
|
||||||
### [simpleVoteIntrinsics_nvrtc](./simpleVoteIntrinsics_nvrtc)
|
|
||||||
Simple program which demonstrates how to use the Vote (any, all) intrinsic instruction in a CUDA kernel with runtime compilation using NVRTC APIs. Requires Compute Capability 2.0 or higher.
|
|
||||||
|
|
||||||
### [simpleZeroCopy](./simpleZeroCopy)
|
### [simpleZeroCopy](./simpleZeroCopy)
|
||||||
This sample illustrates how to use Zero MemCopy, kernels can read and write directly to pinned system memory.
|
This sample illustrates how to use Zero MemCopy, kernels can read and write directly to pinned system memory.
|
||||||
|
|
||||||
@ -159,4 +138,3 @@ This Vector Addition sample is a basic sample that is implemented element by ele
|
|||||||
|
|
||||||
### [vectorAddMMAP](./vectorAddMMAP)
|
### [vectorAddMMAP](./vectorAddMMAP)
|
||||||
This sample replaces the device allocation in the vectorAddDrv sample with cuMemMap-ed allocations. This sample demonstrates that the cuMemMap api allows the user to specify the physical properties of their memory while retaining the contiguous nature of their access, thus not requiring a change in their program structure.
|
This sample replaces the device allocation in the vectorAddDrv sample with cuMemMap-ed allocations. This sample demonstrates that the cuMemMap api allows the user to specify the physical properties of their memory while retaining the contiguous nature of their access, thus not requiring a change in their program structure.
|
||||||
|
|
||||||
|
39
Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt
Normal file
39
Samples/0_Introduction/UnifiedMemoryStreams/CMakeLists.txt
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(UnifiedMemoryStreams LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Include directories and libraries
|
||||||
|
include_directories(../../../Common)
|
||||||
|
|
||||||
|
# Source file
|
||||||
|
find_package(OpenMP REQUIRED)
|
||||||
|
|
||||||
|
if(${OpenMP_FOUND})
|
||||||
|
# Add target for UnifiedMemoryStreams
|
||||||
|
add_executable(UnifiedMemoryStreams UnifiedMemoryStreams.cu)
|
||||||
|
|
||||||
|
target_compile_options(UnifiedMemoryStreams PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
target_compile_features(UnifiedMemoryStreams PRIVATE cxx_std_17 cuda_std_17)
|
||||||
|
|
||||||
|
set_target_properties(UnifiedMemoryStreams PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
|
|
||||||
|
target_link_libraries(UnifiedMemoryStreams PUBLIC
|
||||||
|
CUDA::cublas
|
||||||
|
OpenMP::OpenMP_CXX
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
message(STATUS "OpenMP not found - will not build sample 'UnifiedMemoryStreams'")
|
||||||
|
endif()
|
@ -1,398 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
# This sample is not supported on QNX
|
|
||||||
ifeq ($(TARGET_OS),qnx)
|
|
||||||
$(info >>> WARNING - UnifiedMemoryStreams is not supported on QNX - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Attempt to compile a minimal OpenMP application. If a.out exists, OpenMP is properly set up.
|
|
||||||
ifneq (,$(filter $(TARGET_OS),linux android))
|
|
||||||
|
|
||||||
ifneq (,$(filter $(TARGET_OS), android))
|
|
||||||
LIBRARIES += -lomp
|
|
||||||
else
|
|
||||||
LIBRARIES += -lgomp
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += -Xcompiler -fopenmp
|
|
||||||
$(shell echo "#include <omp.h>" > test.c ; echo "int main() { omp_get_num_threads(); return 0; }" >> test.c ; $(HOST_COMPILER) -fopenmp test.c)
|
|
||||||
OPENMP ?= $(shell find a.out 2>/dev/null)
|
|
||||||
|
|
||||||
ifeq ($(OPENMP),)
|
|
||||||
$(info -----------------------------------------------------------------------------------------------)
|
|
||||||
$(info WARNING - OpenMP is unable to compile)
|
|
||||||
$(info -----------------------------------------------------------------------------------------------)
|
|
||||||
$(info This CUDA Sample cannot be built if the OpenMP compiler is not set up correctly.)
|
|
||||||
$(info This will be a dry-run of the Makefile.)
|
|
||||||
$(info For more information on how to set up your environment to build and run this )
|
|
||||||
$(info sample, please refer the CUDA Samples documentation and release notes)
|
|
||||||
$(info -----------------------------------------------------------------------------------------------)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
$(shell rm a.out test.c 2>/dev/null)
|
|
||||||
else
|
|
||||||
LIBRARIES += -lpthread
|
|
||||||
ALL_CCFLAGS += -DUSE_PTHREADS
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
LIBRARIES += -lcublas
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: UnifiedMemoryStreams
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
UnifiedMemoryStreams.o:UnifiedMemoryStreams.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
UnifiedMemoryStreams: UnifiedMemoryStreams.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./UnifiedMemoryStreams
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f UnifiedMemoryStreams UnifiedMemoryStreams.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/UnifiedMemoryStreams
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,88 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>UnifiedMemoryStreams</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaStreamDestroy</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
<toolkit>cudaMallocManaged</toolkit>
|
|
||||||
<toolkit>cudaStreamAttachMemAsync</toolkit>
|
|
||||||
<toolkit>cudaSetDevice</toolkit>
|
|
||||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
|
||||||
<toolkit>cudaStreamSynchronize</toolkit>
|
|
||||||
<toolkit>cudaStreamCreate</toolkit>
|
|
||||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This sample demonstrates the use of OpenMP and streams with Unified Memory on a single GPU.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="basic">CUDA Systems Integration</concept>
|
|
||||||
<concept level="basic">OpenMP</concept>
|
|
||||||
<concept level="basic">CUBLAS</concept>
|
|
||||||
<concept level="basic">Multithreading</concept>
|
|
||||||
<concept level="basic">Unified Memory</concept>
|
|
||||||
<concept level="basic">CUDA Streams and Events</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>CUDA</keyword>
|
|
||||||
<keyword>CUBLAS</keyword>
|
|
||||||
<keyword>OpenMP</keyword>
|
|
||||||
<keyword>cluster</keyword>
|
|
||||||
<keyword>multi-GPU Support</keyword>
|
|
||||||
<keyword>Unified Memory</keyword>
|
|
||||||
<keyword>UVM</keyword>
|
|
||||||
<keyword>openMP</keyword>
|
|
||||||
<keyword>Streams</keyword>
|
|
||||||
<keyword>pthreads</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
<library>cublas</library>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>UnifiedMemoryStreams.cu</primary_file>
|
|
||||||
<required_dependencies>
|
|
||||||
<dependency>OpenMP</dependency>
|
|
||||||
<dependency>UVM</dependency>
|
|
||||||
<dependency>CUBLAS</dependency>
|
|
||||||
</required_dependencies>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
|
||||||
<scope>1:CUDA Systems Integration</scope>
|
|
||||||
<scope>1:Unified Memory</scope>
|
|
||||||
</scopes>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<from>3.5</from>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>Unified Memory Streams</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -16,7 +16,7 @@ Linux, Windows
|
|||||||
|
|
||||||
## Supported CPU Architecture
|
## Supported CPU Architecture
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
x86_64, armv7l
|
||||||
|
|
||||||
## CUDA APIs involved
|
## CUDA APIs involved
|
||||||
|
|
||||||
@ -31,42 +31,4 @@ cudaStreamDestroy, cudaFree, cudaMallocManaged, cudaStreamAttachMemAsync, cudaSe
|
|||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
## References (for more details)
|
||||||
|
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryStreams", "UnifiedMemoryStreams_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,113 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>UnifiedMemoryStreams_vs2017</RootNamespace>
|
|
||||||
<ProjectName>UnifiedMemoryStreams</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cublas.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
<AdditionalCompilerOptions>/openmp</AdditionalCompilerOptions>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="UnifiedMemoryStreams.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryStreams", "UnifiedMemoryStreams_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,109 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>UnifiedMemoryStreams_vs2019</RootNamespace>
|
|
||||||
<ProjectName>UnifiedMemoryStreams</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cublas.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
<AdditionalCompilerOptions>/openmp</AdditionalCompilerOptions>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="UnifiedMemoryStreams.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryStreams", "UnifiedMemoryStreams_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,109 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>UnifiedMemoryStreams_vs2022</RootNamespace>
|
|
||||||
<ProjectName>UnifiedMemoryStreams</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cublas.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryStreams.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
<AdditionalCompilerOptions>/openmp</AdditionalCompilerOptions>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="UnifiedMemoryStreams.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
28
Samples/0_Introduction/asyncAPI/CMakeLists.txt
Normal file
28
Samples/0_Introduction/asyncAPI/CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(asyncAPI LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Include directories and libraries
|
||||||
|
include_directories(../../../Common)
|
||||||
|
|
||||||
|
# Source file
|
||||||
|
# Add target for asyncAPI
|
||||||
|
add_executable(asyncAPI asyncAPI.cu)
|
||||||
|
|
||||||
|
target_compile_options(asyncAPI PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
target_compile_features(asyncAPI PRIVATE cxx_std_17 cuda_std_17)
|
||||||
|
|
||||||
|
set_target_properties(asyncAPI PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
@ -1,358 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: asyncAPI
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
asyncAPI.o:asyncAPI.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
asyncAPI: asyncAPI.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./asyncAPI
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
$(EXEC) ./asyncAPI --dummy-test-param
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f asyncAPI asyncAPI.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/asyncAPI
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,90 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>asyncAPI</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaProfilerStop</toolkit>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaMemcpyAsync</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
<toolkit>cudaMallocHost</toolkit>
|
|
||||||
<toolkit>cudaProfilerStart</toolkit>
|
|
||||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
|
||||||
<toolkit>cudaEventRecord</toolkit>
|
|
||||||
<toolkit>cudaFreeHost</toolkit>
|
|
||||||
<toolkit>cudaMemset</toolkit>
|
|
||||||
<toolkit>cudaEventDestroy</toolkit>
|
|
||||||
<toolkit>cudaEventQuery</toolkit>
|
|
||||||
<toolkit>cudaEventElapsedTime</toolkit>
|
|
||||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
|
||||||
<toolkit>cudaEventCreate</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This sample illustrates the usage of CUDA events for both GPU timing and overlapping CPU and GPU execution. Events are inserted into a stream of CUDA calls. Since CUDA stream calls are asynchronous, the CPU can perform computations while GPU is executing (including DMA memcopies between the host and device). CPU can query CUDA events to determine whether GPU has completed tasks.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="basic">Asynchronous Data Transfers</concept>
|
|
||||||
<concept level="basic">CUDA Streams and Events</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>GPGPU</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>asyncAPI.cu</primary_file>
|
|
||||||
<qatests>
|
|
||||||
<qatest>--dummy-test-param</qatest>
|
|
||||||
</qatests>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
|
||||||
<scope>1:Performance Strategies</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>asyncAPI</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -18,7 +18,7 @@ Linux, Windows
|
|||||||
|
|
||||||
## Supported CPU Architecture
|
## Supported CPU Architecture
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
x86_64, armv7l
|
||||||
|
|
||||||
## CUDA APIs involved
|
## CUDA APIs involved
|
||||||
|
|
||||||
@ -29,42 +29,4 @@ cudaProfilerStop, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaPro
|
|||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
## References (for more details)
|
||||||
|
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asyncAPI", "asyncAPI_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>asyncAPI_vs2017</RootNamespace>
|
|
||||||
<ProjectName>asyncAPI</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="asyncAPI.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asyncAPI", "asyncAPI_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>asyncAPI_vs2019</RootNamespace>
|
|
||||||
<ProjectName>asyncAPI</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="asyncAPI.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asyncAPI", "asyncAPI_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>asyncAPI_vs2022</RootNamespace>
|
|
||||||
<ProjectName>asyncAPI</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/asyncAPI.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="asyncAPI.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,391 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
# This sample is not supported on QNX
|
|
||||||
ifeq ($(TARGET_OS),qnx)
|
|
||||||
$(info >>> WARNING - c++11_cuda is not supported on QNX - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
#Detect if installed version of GCC supports required C++11
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
empty :=
|
|
||||||
space := $(empty) $(empty)
|
|
||||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
|
||||||
#Create version number without "."
|
|
||||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
|
||||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
|
||||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
|
||||||
# Make sure the version number has at least 3 decimals
|
|
||||||
GCCVERSION += 00
|
|
||||||
# Remove spaces from the version number
|
|
||||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
|
||||||
#$(warning $(GCCVERSION))
|
|
||||||
|
|
||||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 47000)
|
|
||||||
ifneq ($(CUSTOM_HOST_COMPILER), 1)
|
|
||||||
ifeq ($(IS_MIN_VERSION), 1)
|
|
||||||
$(info >>> GCC Version is greater or equal to 4.7.0 <<<)
|
|
||||||
else
|
|
||||||
$(info >>> Waiving build. Minimum GCC version required is 4.7.0<<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(warning >>> Custom HOST_COMPILER set; skipping GCC version check. This may lead to unintended behavior. Please note the minimum equivalent GCC version is 4.7.0 <<<)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --std=c++11 --threads 0
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: c++11_cuda
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
c++11_cuda.o:c++11_cuda.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
c++11_cuda: c++11_cuda.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./c++11_cuda
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f c++11_cuda c++11_cuda.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/c++11_cuda
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,82 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>c++11_cuda</name>
|
|
||||||
<cflags>
|
|
||||||
<flag>--std=c++11</flag>
|
|
||||||
</cflags>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaMemcpy</toolkit>
|
|
||||||
<toolkit>cudaMemset</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters. ]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="advanced">CPP11 CUDA</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>GPGPU</keyword>
|
|
||||||
<keyword>CPP11</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>c++11_cuda.cu</primary_file>
|
|
||||||
<required_dependencies>
|
|
||||||
<dependency>CPP11</dependency>
|
|
||||||
</required_dependencies>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Advanced Topics</scope>
|
|
||||||
<scope>1:C++11 CUDA</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>C++11 CUDA</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -1,74 +0,0 @@
|
|||||||
# c++11_cuda - C++11 CUDA
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
This sample demonstrates C++11 feature support in CUDA. It scans a input text file and prints no. of occurrences of x, y, z, w characters.
|
|
||||||
|
|
||||||
## Key Concepts
|
|
||||||
|
|
||||||
CPP11 CUDA
|
|
||||||
|
|
||||||
## Supported SM Architectures
|
|
||||||
|
|
||||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
|
||||||
|
|
||||||
## Supported OSes
|
|
||||||
|
|
||||||
Linux, Windows
|
|
||||||
|
|
||||||
## Supported CPU Architecture
|
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
|
||||||
|
|
||||||
## CUDA APIs involved
|
|
||||||
|
|
||||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
|
||||||
cudaMalloc, cudaMemcpy, cudaMemset, cudaFree
|
|
||||||
|
|
||||||
## Dependencies needed to build/run
|
|
||||||
[CPP11](../../../README.md#cpp11)
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
|
||||||
|
|
@ -1,140 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <thrust/device_ptr.h>
|
|
||||||
#include <thrust/count.h>
|
|
||||||
#include <thrust/execution_policy.h>
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <helper_cuda.h>
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////
|
|
||||||
// Some utility code to define grid_stride_range
|
|
||||||
// Normally this would be in a header but it's here
|
|
||||||
// for didactic purposes. Uses
|
|
||||||
#include "range.hpp"
|
|
||||||
using namespace util::lang;
|
|
||||||
|
|
||||||
// type alias to simplify typing...
|
|
||||||
template <typename T>
|
|
||||||
using step_range = typename range_proxy<T>::step_range_proxy;
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
__device__ step_range<T> grid_stride_range(T begin, T end) {
|
|
||||||
begin += blockDim.x * blockIdx.x + threadIdx.x;
|
|
||||||
return range(begin, end).step(gridDim.x * blockDim.x);
|
|
||||||
}
|
|
||||||
/////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
template <typename T, typename Predicate>
|
|
||||||
__device__ void count_if(int *count, T *data, int n, Predicate p) {
|
|
||||||
for (auto i : grid_stride_range(0, n)) {
|
|
||||||
if (p(data[i])) atomicAdd(count, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use count_if with a lambda function that searches for x, y, z or w
|
|
||||||
// Note the use of range-based for loop and initializer_list inside the functor
|
|
||||||
// We use auto so we don't have to know the type of the functor or array
|
|
||||||
__global__ void xyzw_frequency(int *count, char *text, int n) {
|
|
||||||
const char letters[]{'x', 'y', 'z', 'w'};
|
|
||||||
|
|
||||||
count_if(count, text, n, [&](char c) {
|
|
||||||
for (const auto x : letters)
|
|
||||||
if (c == x) return true;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void xyzw_frequency_thrust_device(int *count, char *text, int n) {
|
|
||||||
const char letters[]{'x', 'y', 'z', 'w'};
|
|
||||||
*count = thrust::count_if(thrust::device, text, text + n, [=](char c) {
|
|
||||||
for (const auto x : letters)
|
|
||||||
if (c == x) return true;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// a bug in Thrust 1.8 causes warnings when this is uncommented
|
|
||||||
// so commented out by default -- fixed in Thrust master branch
|
|
||||||
#if 0
|
|
||||||
void xyzw_frequency_thrust_host(int *count, char *text, int n)
|
|
||||||
{
|
|
||||||
const char letters[] { 'x','y','z','w' };
|
|
||||||
*count = thrust::count_if(thrust::host, text, text+n, [&](char c) {
|
|
||||||
for (const auto x : letters)
|
|
||||||
if (c == x) return true;
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
const char *filename = sdkFindFilePath("warandpeace.txt", argv[0]);
|
|
||||||
|
|
||||||
int numBytes = 16 * 1048576;
|
|
||||||
char *h_text = (char *)malloc(numBytes);
|
|
||||||
|
|
||||||
// find first CUDA device
|
|
||||||
int devID = findCudaDevice(argc, (const char **)argv);
|
|
||||||
|
|
||||||
char *d_text;
|
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_text, numBytes));
|
|
||||||
|
|
||||||
FILE *fp = fopen(filename, "r");
|
|
||||||
if (fp == NULL) {
|
|
||||||
printf("Cannot find the input text file\n. Exiting..\n");
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
int len = (int)fread(h_text, sizeof(char), numBytes, fp);
|
|
||||||
fclose(fp);
|
|
||||||
std::cout << "Read " << len << " byte corpus from " << filename << std::endl;
|
|
||||||
|
|
||||||
checkCudaErrors(cudaMemcpy(d_text, h_text, len, cudaMemcpyHostToDevice));
|
|
||||||
|
|
||||||
int count = 0;
|
|
||||||
int *d_count;
|
|
||||||
checkCudaErrors(cudaMalloc(&d_count, sizeof(int)));
|
|
||||||
checkCudaErrors(cudaMemset(d_count, 0, sizeof(int)));
|
|
||||||
|
|
||||||
// Try uncommenting one kernel call at a time
|
|
||||||
xyzw_frequency<<<8, 256>>>(d_count, d_text, len);
|
|
||||||
xyzw_frequency_thrust_device<<<1, 1>>>(d_count, d_text, len);
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMemcpy(&count, d_count, sizeof(int), cudaMemcpyDeviceToHost));
|
|
||||||
|
|
||||||
// xyzw_frequency_thrust_host(&count, h_text, len);
|
|
||||||
|
|
||||||
std::cout << "counted " << count
|
|
||||||
<< " instances of 'x', 'y', 'z', or 'w' in \"" << filename << "\""
|
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
checkCudaErrors(cudaFree(d_count));
|
|
||||||
checkCudaErrors(cudaFree(d_text));
|
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "c++11_cuda", "c++11_cuda_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>c++11_cuda_vs2017</RootNamespace>
|
|
||||||
<ProjectName>c++11_cuda</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="c++11_cuda.cu" />
|
|
||||||
<ClInclude Include="range.hpp" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "c++11_cuda", "c++11_cuda_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>c++11_cuda_vs2019</RootNamespace>
|
|
||||||
<ProjectName>c++11_cuda</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="c++11_cuda.cu" />
|
|
||||||
<ClInclude Include="range.hpp" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "c++11_cuda", "c++11_cuda_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>c++11_cuda_vs2022</RootNamespace>
|
|
||||||
<ProjectName>c++11_cuda</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/c++11_cuda.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="c++11_cuda.cu" />
|
|
||||||
<ClInclude Include="range.hpp" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,279 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef UTIL_LANG_RANGE_HPP
|
|
||||||
#define UTIL_LANG_RANGE_HPP
|
|
||||||
|
|
||||||
#include <iterator>
|
|
||||||
#include <type_traits>
|
|
||||||
|
|
||||||
// Make these ranges usable inside CUDA C++ device code
|
|
||||||
#ifdef __CUDACC__
|
|
||||||
#define DEVICE_CALLABLE __host__ __device__
|
|
||||||
#else
|
|
||||||
#define DEVICE_CALLABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace util {
|
|
||||||
namespace lang {
|
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct range_iter_base : std::iterator<std::input_iterator_tag, T> {
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
range_iter_base(T current) : current(current) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
T operator*() const { return current; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
T const* operator->() const { return ¤t; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
range_iter_base& operator++() {
|
|
||||||
++current;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
range_iter_base operator++(int) {
|
|
||||||
auto copy = *this;
|
|
||||||
++*this;
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator==(range_iter_base const& other) const {
|
|
||||||
return current == other.current;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator!=(range_iter_base const& other) const {
|
|
||||||
return not(*this == other);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
T current;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct range_proxy {
|
|
||||||
struct iter : detail::range_iter_base<T> {
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter(T current) : detail::range_iter_base<T>(current) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct step_range_proxy {
|
|
||||||
struct iter : detail::range_iter_base<T> {
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter(T current, T step)
|
|
||||||
: detail::range_iter_base<T>(current), step(step) {}
|
|
||||||
|
|
||||||
using detail::range_iter_base<T>::current;
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter& operator++() {
|
|
||||||
current += step;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter operator++(int) {
|
|
||||||
auto copy = *this;
|
|
||||||
++*this;
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loses commutativity. Iterator-based ranges are simply broken. :-(
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator==(iter const& other) const {
|
|
||||||
return step > 0 ? current >= other.current : current < other.current;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator!=(iter const& other) const { return !(*this == other); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
T step;
|
|
||||||
};
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
step_range_proxy(T begin, T end, T step)
|
|
||||||
: begin_(begin, step), end_(end, step) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter begin() const { return begin_; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter end() const { return end_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
iter begin_;
|
|
||||||
iter end_;
|
|
||||||
};
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
range_proxy(T begin, T end) : begin_(begin), end_(end) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
step_range_proxy step(T step) { return {*begin_, *end_, step}; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter begin() const { return begin_; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter end() const { return end_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
iter begin_;
|
|
||||||
iter end_;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct infinite_range_proxy {
|
|
||||||
struct iter : detail::range_iter_base<T> {
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter(T current = T()) : detail::range_iter_base<T>(current) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator==(iter const&) const { return false; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator!=(iter const&) const { return true; }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct step_range_proxy {
|
|
||||||
struct iter : detail::range_iter_base<T> {
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter(T current = T(), T step = T())
|
|
||||||
: detail::range_iter_base<T>(current), step(step) {}
|
|
||||||
|
|
||||||
using detail::range_iter_base<T>::current;
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter& operator++() {
|
|
||||||
current += step;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter operator++(int) {
|
|
||||||
auto copy = *this;
|
|
||||||
++*this;
|
|
||||||
return copy;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator==(iter const&) const { return false; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
bool operator!=(iter const&) const { return true; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
T step;
|
|
||||||
};
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
step_range_proxy(T begin, T step) : begin_(begin, step) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter begin() const { return begin_; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter end() const { return iter(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
iter begin_;
|
|
||||||
};
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
infinite_range_proxy(T begin) : begin_(begin) {}
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
step_range_proxy step(T step) { return step_range_proxy(*begin_, step); }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter begin() const { return begin_; }
|
|
||||||
|
|
||||||
DEVICE_CALLABLE
|
|
||||||
iter end() const { return iter(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
iter begin_;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
DEVICE_CALLABLE range_proxy<T> range(T begin, T end) {
|
|
||||||
return {begin, end};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
DEVICE_CALLABLE infinite_range_proxy<T> range(T begin) {
|
|
||||||
return {begin};
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace traits {
|
|
||||||
|
|
||||||
template <typename C>
|
|
||||||
struct has_size {
|
|
||||||
template <typename T>
|
|
||||||
static constexpr auto check(T*) ->
|
|
||||||
typename std::is_integral<decltype(std::declval<T const>().size())>::type;
|
|
||||||
|
|
||||||
template <typename>
|
|
||||||
static constexpr auto check(...) -> std::false_type;
|
|
||||||
|
|
||||||
using type = decltype(check<C>(0));
|
|
||||||
static constexpr bool value = type::value;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace traits
|
|
||||||
|
|
||||||
template <typename C,
|
|
||||||
typename = typename std::enable_if<traits::has_size<C>::value>>
|
|
||||||
DEVICE_CALLABLE auto indices(C const& cont)
|
|
||||||
-> range_proxy<decltype(cont.size())> {
|
|
||||||
return {0, cont.size()};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, std::size_t N>
|
|
||||||
DEVICE_CALLABLE range_proxy<std::size_t> indices(T(&)[N]) {
|
|
||||||
return {0, N};
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
range_proxy<typename std::initializer_list<T>::size_type> DEVICE_CALLABLE
|
|
||||||
indices(std::initializer_list<T>&& cont) {
|
|
||||||
return {0, cont.size()};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace util::lang
|
|
||||||
|
|
||||||
#endif // ndef UTIL_LANG_RANGE_HPP
|
|
File diff suppressed because it is too large
Load Diff
28
Samples/0_Introduction/clock/CMakeLists.txt
Normal file
28
Samples/0_Introduction/clock/CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(clock LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Include directories and libraries
|
||||||
|
include_directories(../../../Common)
|
||||||
|
|
||||||
|
# Source file
|
||||||
|
# Add target for asyncAPI
|
||||||
|
add_executable(clock clock.cu)
|
||||||
|
|
||||||
|
target_compile_options(clock PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
target_compile_features(clock PRIVATE cxx_std_17 cuda_std_17)
|
||||||
|
|
||||||
|
set_target_properties(clock PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
@ -1,357 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: clock
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
clock.o:clock.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
clock: clock.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./clock
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f clock clock.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/clock
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,78 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>clock</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaMemcpy</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This example shows how to use the clock function to measure the performance of block of threads of a kernel accurately.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="basic">Performance Strategies</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>performance</keyword>
|
|
||||||
<keyword>timing</keyword>
|
|
||||||
<keyword>CUDA</keyword>
|
|
||||||
<keyword>clock</keyword>
|
|
||||||
<keyword>timer</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>clock.cu</primary_file>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
|
||||||
<scope>1:Performance Strategies</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>Clock</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -18,7 +18,7 @@ Linux, Windows
|
|||||||
|
|
||||||
## Supported CPU Architecture
|
## Supported CPU Architecture
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
x86_64, armv7l
|
||||||
|
|
||||||
## CUDA APIs involved
|
## CUDA APIs involved
|
||||||
|
|
||||||
@ -29,42 +29,4 @@ cudaMalloc, cudaMemcpy, cudaFree
|
|||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
## References (for more details)
|
||||||
|
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock", "clock_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_vs2017</RootNamespace>
|
|
||||||
<ProjectName>clock</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="clock.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock", "clock_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_vs2019</RootNamespace>
|
|
||||||
<ProjectName>clock</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="clock.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock", "clock_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_vs2022</RootNamespace>
|
|
||||||
<ProjectName>clock</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="clock.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
37
Samples/0_Introduction/clock_nvrtc/CMakeLists.txt
Normal file
37
Samples/0_Introduction/clock_nvrtc/CMakeLists.txt
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(clock_nvrtc LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 72 75 80 86 87 89 90 100 101 120)
|
||||||
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Include directories and libraries
|
||||||
|
include_directories(../../../Common)
|
||||||
|
|
||||||
|
# Source file
|
||||||
|
# Add sample target executable
|
||||||
|
add_executable(clock_nvrtc clock.cpp)
|
||||||
|
|
||||||
|
target_compile_options(clock_nvrtc PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
target_compile_features(clock_nvrtc PRIVATE cxx_std_17 cuda_std_17)
|
||||||
|
|
||||||
|
target_link_libraries(clock_nvrtc PRIVATE
|
||||||
|
CUDA::nvrtc
|
||||||
|
CUDA::cuda_driver
|
||||||
|
)
|
||||||
|
|
||||||
|
# Copy clock_kernel.cu to the output directory
|
||||||
|
add_custom_command(TARGET clock_nvrtc POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/clock_kernel.cu ${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
)
|
@ -1,409 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
# This sample is not supported on ARMv7
|
|
||||||
ifeq ($(TARGET_ARCH),armv7l)
|
|
||||||
$(info >>> WARNING - clock_nvrtc is not supported on ARMv7 - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# libNVRTC specific libraries
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -L$(CUDA_PATH)/lib -F/Library/Frameworks -framework CUDA
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
|
||||||
else
|
|
||||||
ifeq ($(TARGET_ARCH),x86_64)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
|
||||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
|
|
||||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(HOST_ARCH),ppc64le)
|
|
||||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
|
|
||||||
endif
|
|
||||||
|
|
||||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
|
||||||
ifeq ("$(CUDALIB)","")
|
|
||||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
else
|
|
||||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
|
||||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
INCLUDES += -I$(CUDA_PATH)/include
|
|
||||||
|
|
||||||
LIBRARIES += -lnvrtc
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: clock_nvrtc
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
clock.o:clock.cpp
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
clock_nvrtc: clock.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./clock_nvrtc
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f clock_nvrtc clock.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/clock_nvrtc
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -18,7 +18,7 @@ Linux, Windows, QNX
|
|||||||
|
|
||||||
## Supported CPU Architecture
|
## Supported CPU Architecture
|
||||||
|
|
||||||
x86_64, ppc64le, aarch64
|
x86_64, aarch64
|
||||||
|
|
||||||
## CUDA APIs involved
|
## CUDA APIs involved
|
||||||
|
|
||||||
@ -36,42 +36,4 @@ cudaBlockSize, cudaGridSize
|
|||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, aarch64.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=aarch64` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
## References (for more details)
|
||||||
|
|
||||||
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock_nvrtc", "clock_nvrtc_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_nvrtc_vs2017</RootNamespace>
|
|
||||||
<ProjectName>clock_nvrtc</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock_nvrtc.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration></CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<ClCompile Include="clock.cpp" />
|
|
||||||
<None Include="clock_kernel.cu" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock_nvrtc", "clock_nvrtc_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_nvrtc_vs2019</RootNamespace>
|
|
||||||
<ProjectName>clock_nvrtc</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock_nvrtc.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration></CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<ClCompile Include="clock.cpp" />
|
|
||||||
<None Include="clock_kernel.cu" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "clock_nvrtc", "clock_nvrtc_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>clock_nvrtc_vs2022</RootNamespace>
|
|
||||||
<ProjectName>clock_nvrtc</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/clock_nvrtc.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration></CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<ClCompile Include="clock.cpp" />
|
|
||||||
<None Include="clock_kernel.cu" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "CUDA C++: Launch",
|
|
||||||
"type": "cuda-gdb",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceFolder}/concurrentKernels"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
@ -1,357 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: concurrentKernels
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
concurrentKernels.o:concurrentKernels.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
concurrentKernels: concurrentKernels.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./concurrentKernels
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f concurrentKernels concurrentKernels.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/concurrentKernels
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,87 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>concurrentKernels</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaStreamDestroy</toolkit>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaMemcpyAsync</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
<toolkit>cudaMallocHost</toolkit>
|
|
||||||
<toolkit>cudaEventCreateWithFlags</toolkit>
|
|
||||||
<toolkit>cudaEventSynchronize</toolkit>
|
|
||||||
<toolkit>cudaEventRecord</toolkit>
|
|
||||||
<toolkit>cudaFreeHost</toolkit>
|
|
||||||
<toolkit>cudaGetDevice</toolkit>
|
|
||||||
<toolkit>cudaStreamWaitEvent</toolkit>
|
|
||||||
<toolkit>cudaEventDestroy</toolkit>
|
|
||||||
<toolkit>cudaEventElapsedTime</toolkit>
|
|
||||||
<toolkit>cudaStreamCreate</toolkit>
|
|
||||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
|
||||||
<toolkit>cudaEventCreate</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="advanced">Performance Strategies</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>CUDA</keyword>
|
|
||||||
<keyword>Concurrent Kernels</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>concurrentKernels.cu</primary_file>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Advanced Topics</scope>
|
|
||||||
<scope>1:Performance Strategies</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>Concurrent Kernels</title>
|
|
||||||
</entry>
|
|
@ -1,70 +0,0 @@
|
|||||||
# concurrentKernels - Concurrent Kernels
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
This sample demonstrates the use of CUDA streams for concurrent execution of several kernels on GPU device. It also illustrates how to introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function.
|
|
||||||
|
|
||||||
## Key Concepts
|
|
||||||
|
|
||||||
Performance Strategies
|
|
||||||
|
|
||||||
## Supported SM Architectures
|
|
||||||
|
|
||||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
|
||||||
|
|
||||||
## Supported OSes
|
|
||||||
|
|
||||||
Linux, Windows
|
|
||||||
|
|
||||||
## Supported CPU Architecture
|
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
|
||||||
|
|
||||||
## CUDA APIs involved
|
|
||||||
|
|
||||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
|
||||||
cudaStreamDestroy, cudaMalloc, cudaMemcpyAsync, cudaFree, cudaMallocHost, cudaEventCreateWithFlags, cudaEventSynchronize, cudaEventRecord, cudaFreeHost, cudaGetDevice, cudaStreamWaitEvent, cudaEventDestroy, cudaEventElapsedTime, cudaStreamCreate, cudaGetDeviceProperties, cudaEventCreate
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
|
||||||
|
|
@ -1,228 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
//
|
|
||||||
// This sample demonstrates the use of streams for concurrent execution. It also
|
|
||||||
// illustrates how to introduce dependencies between CUDA streams with the
|
|
||||||
// cudaStreamWaitEvent function.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Devices of compute capability 2.0 or higher can overlap the kernels
|
|
||||||
//
|
|
||||||
#include <cooperative_groups.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
namespace cg = cooperative_groups;
|
|
||||||
#include <helper_cuda.h>
|
|
||||||
#include <helper_functions.h>
|
|
||||||
|
|
||||||
// This is a kernel that does no real work but runs at least for a specified
|
|
||||||
// number of clocks
|
|
||||||
__global__ void clock_block(clock_t *d_o, clock_t clock_count) {
|
|
||||||
unsigned int start_clock = (unsigned int)clock();
|
|
||||||
|
|
||||||
clock_t clock_offset = 0;
|
|
||||||
|
|
||||||
while (clock_offset < clock_count) {
|
|
||||||
unsigned int end_clock = (unsigned int)clock();
|
|
||||||
|
|
||||||
// The code below should work like
|
|
||||||
// this (thanks to modular arithmetics):
|
|
||||||
//
|
|
||||||
// clock_offset = (clock_t) (end_clock > start_clock ?
|
|
||||||
// end_clock - start_clock :
|
|
||||||
// end_clock + (0xffffffffu - start_clock));
|
|
||||||
//
|
|
||||||
// Indeed, let m = 2^32 then
|
|
||||||
// end - start = end + m - start (mod m).
|
|
||||||
|
|
||||||
clock_offset = (clock_t)(end_clock - start_clock);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_o[0] = clock_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Single warp reduction kernel
|
|
||||||
__global__ void sum(clock_t *d_clocks, int N) {
|
|
||||||
// Handle to thread block group
|
|
||||||
cg::thread_block cta = cg::this_thread_block();
|
|
||||||
__shared__ clock_t s_clocks[32];
|
|
||||||
|
|
||||||
clock_t my_sum = 0;
|
|
||||||
|
|
||||||
for (int i = threadIdx.x; i < N; i += blockDim.x) {
|
|
||||||
my_sum += d_clocks[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
s_clocks[threadIdx.x] = my_sum;
|
|
||||||
cg::sync(cta);
|
|
||||||
|
|
||||||
for (int i = 16; i > 0; i /= 2) {
|
|
||||||
if (threadIdx.x < i) {
|
|
||||||
s_clocks[threadIdx.x] += s_clocks[threadIdx.x + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
cg::sync(cta);
|
|
||||||
}
|
|
||||||
|
|
||||||
d_clocks[0] = s_clocks[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
int nkernels = 8; // number of concurrent kernels
|
|
||||||
int nstreams = nkernels + 1; // use one more stream than concurrent kernel
|
|
||||||
int nbytes = nkernels * sizeof(clock_t); // number of data bytes
|
|
||||||
float kernel_time = 10; // time the kernel should run in ms
|
|
||||||
float elapsed_time; // timing variables
|
|
||||||
int cuda_device = 0;
|
|
||||||
|
|
||||||
printf("[%s] - Starting...\n", argv[0]);
|
|
||||||
|
|
||||||
// get number of kernels if overridden on the command line
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "nkernels")) {
|
|
||||||
nkernels = getCmdLineArgumentInt(argc, (const char **)argv, "nkernels");
|
|
||||||
nstreams = nkernels + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// use command-line specified CUDA device, otherwise use device with highest
|
|
||||||
// Gflops/s
|
|
||||||
cuda_device = findCudaDevice(argc, (const char **)argv);
|
|
||||||
|
|
||||||
cudaDeviceProp deviceProp;
|
|
||||||
checkCudaErrors(cudaGetDevice(&cuda_device));
|
|
||||||
|
|
||||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device));
|
|
||||||
|
|
||||||
if ((deviceProp.concurrentKernels == 0)) {
|
|
||||||
printf("> GPU does not support concurrent kernel execution\n");
|
|
||||||
printf(" CUDA kernel runs will be serialized\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n",
|
|
||||||
deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount);
|
|
||||||
|
|
||||||
// allocate host memory
|
|
||||||
clock_t *a = 0; // pointer to the array data in host memory
|
|
||||||
checkCudaErrors(cudaMallocHost((void **)&a, nbytes));
|
|
||||||
|
|
||||||
// allocate device memory
|
|
||||||
clock_t *d_a = 0; // pointers to data and init value in the device memory
|
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_a, nbytes));
|
|
||||||
|
|
||||||
// allocate and initialize an array of stream handles
|
|
||||||
cudaStream_t *streams =
|
|
||||||
(cudaStream_t *)malloc(nstreams * sizeof(cudaStream_t));
|
|
||||||
|
|
||||||
for (int i = 0; i < nstreams; i++) {
|
|
||||||
checkCudaErrors(cudaStreamCreate(&(streams[i])));
|
|
||||||
}
|
|
||||||
|
|
||||||
// create CUDA event handles
|
|
||||||
cudaEvent_t start_event, stop_event;
|
|
||||||
checkCudaErrors(cudaEventCreate(&start_event));
|
|
||||||
checkCudaErrors(cudaEventCreate(&stop_event));
|
|
||||||
|
|
||||||
// the events are used for synchronization only and hence do not need to
|
|
||||||
// record timings this also makes events not introduce global sync points when
|
|
||||||
// recorded which is critical to get overlap
|
|
||||||
cudaEvent_t *kernelEvent;
|
|
||||||
kernelEvent = (cudaEvent_t *)malloc(nkernels * sizeof(cudaEvent_t));
|
|
||||||
|
|
||||||
for (int i = 0; i < nkernels; i++) {
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaEventCreateWithFlags(&(kernelEvent[i]), cudaEventDisableTiming));
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// time execution with nkernels streams
|
|
||||||
clock_t total_clocks = 0;
|
|
||||||
#if defined(__arm__) || defined(__aarch64__)
|
|
||||||
// the kernel takes more time than the channel reset time on arm archs, so to
|
|
||||||
// prevent hangs reduce time_clocks.
|
|
||||||
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 100));
|
|
||||||
#else
|
|
||||||
clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
cudaEventRecord(start_event, 0);
|
|
||||||
|
|
||||||
// queue nkernels in separate streams and record when they are done
|
|
||||||
for (int i = 0; i < nkernels; ++i) {
|
|
||||||
clock_block<<<1, 1, 0, streams[i]>>>(&d_a[i], time_clocks);
|
|
||||||
total_clocks += time_clocks;
|
|
||||||
checkCudaErrors(cudaEventRecord(kernelEvent[i], streams[i]));
|
|
||||||
|
|
||||||
// make the last stream wait for the kernel event to be recorded
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaStreamWaitEvent(streams[nstreams - 1], kernelEvent[i], 0));
|
|
||||||
}
|
|
||||||
|
|
||||||
// queue a sum kernel and a copy back to host in the last stream.
|
|
||||||
// the commands in this stream get dispatched as soon as all the kernel events
|
|
||||||
// have been recorded
|
|
||||||
sum<<<1, 32, 0, streams[nstreams - 1]>>>(d_a, nkernels);
|
|
||||||
checkCudaErrors(cudaMemcpyAsync(
|
|
||||||
a, d_a, sizeof(clock_t), cudaMemcpyDeviceToHost, streams[nstreams - 1]));
|
|
||||||
|
|
||||||
// at this point the CPU has dispatched all work for the GPU and can continue
|
|
||||||
// processing other tasks in parallel
|
|
||||||
|
|
||||||
// in this sample we just wait until the GPU is done
|
|
||||||
checkCudaErrors(cudaEventRecord(stop_event, 0));
|
|
||||||
checkCudaErrors(cudaEventSynchronize(stop_event));
|
|
||||||
checkCudaErrors(cudaEventElapsedTime(&elapsed_time, start_event, stop_event));
|
|
||||||
|
|
||||||
printf("Expected time for serial execution of %d kernels = %.3fs\n", nkernels,
|
|
||||||
nkernels * kernel_time / 1000.0f);
|
|
||||||
printf("Expected time for concurrent execution of %d kernels = %.3fs\n",
|
|
||||||
nkernels, kernel_time / 1000.0f);
|
|
||||||
printf("Measured time for sample = %.3fs\n", elapsed_time / 1000.0f);
|
|
||||||
|
|
||||||
bool bTestResult = (a[0] > total_clocks);
|
|
||||||
|
|
||||||
// release resources
|
|
||||||
for (int i = 0; i < nkernels; i++) {
|
|
||||||
cudaStreamDestroy(streams[i]);
|
|
||||||
cudaEventDestroy(kernelEvent[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
free(streams);
|
|
||||||
free(kernelEvent);
|
|
||||||
|
|
||||||
cudaEventDestroy(start_event);
|
|
||||||
cudaEventDestroy(stop_event);
|
|
||||||
cudaFreeHost(a);
|
|
||||||
cudaFree(d_a);
|
|
||||||
|
|
||||||
if (!bTestResult) {
|
|
||||||
printf("Test failed!\n");
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Test passed\n");
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>concurrentKernels_vs2017</RootNamespace>
|
|
||||||
<ProjectName>concurrentKernels</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="concurrentKernels.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>concurrentKernels_vs2019</RootNamespace>
|
|
||||||
<ProjectName>concurrentKernels</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="concurrentKernels.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>concurrentKernels_vs2022</RootNamespace>
|
|
||||||
<ProjectName>concurrentKernels</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="concurrentKernels.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "CUDA C++: Launch",
|
|
||||||
"type": "cuda-gdb",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceFolder}/cppIntegration"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
@ -1,363 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: cppIntegration
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
cppIntegration.o:cppIntegration.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
cppIntegration_gold.o:cppIntegration_gold.cpp
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
main.o:main.cpp
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
cppIntegration: cppIntegration.o cppIntegration_gold.o main.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./cppIntegration
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f cppIntegration cppIntegration.o cppIntegration_gold.o main.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cppIntegration
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,72 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>cppIntegration</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaMemcpy</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="basic">CPP-CUDA Integration</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>cppIntegration.cu</primary_file>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>C++ Integration</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -1,70 +0,0 @@
|
|||||||
# cppIntegration - C++ Integration
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
This example demonstrates how to integrate CUDA into an existing C++ application, i.e. the CUDA entry point on host side is only a function which is called from C++ code and only the file containing this function is compiled with nvcc. It also demonstrates that vector types can be used from cpp.
|
|
||||||
|
|
||||||
## Key Concepts
|
|
||||||
|
|
||||||
CPP-CUDA Integration
|
|
||||||
|
|
||||||
## Supported SM Architectures
|
|
||||||
|
|
||||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
|
||||||
|
|
||||||
## Supported OSes
|
|
||||||
|
|
||||||
Linux, Windows
|
|
||||||
|
|
||||||
## Supported CPU Architecture
|
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
|
||||||
|
|
||||||
## CUDA APIs involved
|
|
||||||
|
|
||||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
|
||||||
cudaMalloc, cudaMemcpy, cudaFree
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
|
||||||
|
|
@ -1,172 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Example of integrating CUDA functions into an existing
|
|
||||||
* application / framework.
|
|
||||||
* Host part of the device code.
|
|
||||||
* Compiled with Cuda compiler.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// System includes
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
// CUDA runtime
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
|
|
||||||
// helper functions and utilities to work with CUDA
|
|
||||||
#include <helper_cuda.h>
|
|
||||||
#include <helper_functions.h>
|
|
||||||
|
|
||||||
#ifndef MAX
|
|
||||||
#define MAX(a, b) (a > b ? a : b)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// declaration, forward
|
|
||||||
|
|
||||||
extern "C" void computeGold(char *reference, char *idata,
|
|
||||||
const unsigned int len);
|
|
||||||
extern "C" void computeGold2(int2 *reference, int2 *idata,
|
|
||||||
const unsigned int len);
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
//! Simple test kernel for device functionality
|
|
||||||
//! @param g_odata memory to process (in and out)
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
__global__ void kernel(int *g_data) {
|
|
||||||
// write data to global memory
|
|
||||||
const unsigned int tid = threadIdx.x;
|
|
||||||
int data = g_data[tid];
|
|
||||||
|
|
||||||
// use integer arithmetic to process all four bytes with one thread
|
|
||||||
// this serializes the execution, but is the simplest solutions to avoid
|
|
||||||
// bank conflicts for this very low number of threads
|
|
||||||
// in general it is more efficient to process each byte by a separate thread,
|
|
||||||
// to avoid bank conflicts the access pattern should be
|
|
||||||
// g_data[4 * wtid + wid], where wtid is the thread id within the half warp
|
|
||||||
// and wid is the warp id
|
|
||||||
// see also the programming guide for a more in depth discussion.
|
|
||||||
g_data[tid] =
|
|
||||||
((((data << 0) >> 24) - 10) << 24) | ((((data << 8) >> 24) - 10) << 16) |
|
|
||||||
((((data << 16) >> 24) - 10) << 8) | ((((data << 24) >> 24) - 10) << 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
//! Demonstration that int2 data can be used in the cpp code
|
|
||||||
//! @param g_odata memory to process (in and out)
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
__global__ void kernel2(int2 *g_data) {
|
|
||||||
// write data to global memory
|
|
||||||
const unsigned int tid = threadIdx.x;
|
|
||||||
int2 data = g_data[tid];
|
|
||||||
|
|
||||||
// use integer arithmetic to process all four bytes with one thread
|
|
||||||
// this serializes the execution, but is the simplest solutions to avoid
|
|
||||||
// bank conflicts for this very low number of threads
|
|
||||||
// in general it is more efficient to process each byte by a separate thread,
|
|
||||||
// to avoid bank conflicts the access pattern should be
|
|
||||||
// g_data[4 * wtid + wid], where wtid is the thread id within the half warp
|
|
||||||
// and wid is the warp id
|
|
||||||
// see also the programming guide for a more in depth discussion.
|
|
||||||
g_data[tid].x = data.x - data.y;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//! Entry point for Cuda functionality on host side
|
|
||||||
//! @param argc command line argument count
|
|
||||||
//! @param argv command line arguments
|
|
||||||
//! @param data data to process on the device
|
|
||||||
//! @param len len of \a data
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
extern "C" bool runTest(const int argc, const char **argv, char *data,
|
|
||||||
int2 *data_int2, unsigned int len) {
|
|
||||||
// use command-line specified CUDA device, otherwise use device with highest
|
|
||||||
// Gflops/s
|
|
||||||
findCudaDevice(argc, (const char **)argv);
|
|
||||||
|
|
||||||
const unsigned int num_threads = len / 4;
|
|
||||||
assert(0 == (len % 4));
|
|
||||||
const unsigned int mem_size = sizeof(char) * len;
|
|
||||||
const unsigned int mem_size_int2 = sizeof(int2) * len;
|
|
||||||
|
|
||||||
// allocate device memory
|
|
||||||
char *d_data;
|
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_data, mem_size));
|
|
||||||
// copy host memory to device
|
|
||||||
checkCudaErrors(cudaMemcpy(d_data, data, mem_size, cudaMemcpyHostToDevice));
|
|
||||||
// allocate device memory for int2 version
|
|
||||||
int2 *d_data_int2;
|
|
||||||
checkCudaErrors(cudaMalloc((void **)&d_data_int2, mem_size_int2));
|
|
||||||
// copy host memory to device
|
|
||||||
checkCudaErrors(cudaMemcpy(d_data_int2, data_int2, mem_size_int2,
|
|
||||||
cudaMemcpyHostToDevice));
|
|
||||||
|
|
||||||
// setup execution parameters
|
|
||||||
dim3 grid(1, 1, 1);
|
|
||||||
dim3 threads(num_threads, 1, 1);
|
|
||||||
dim3 threads2(len, 1, 1); // more threads needed fir separate int2 version
|
|
||||||
// execute the kernel
|
|
||||||
kernel<<<grid, threads>>>((int *)d_data);
|
|
||||||
kernel2<<<grid, threads2>>>(d_data_int2);
|
|
||||||
|
|
||||||
// check if kernel execution generated and error
|
|
||||||
getLastCudaError("Kernel execution failed");
|
|
||||||
|
|
||||||
// compute reference solutions
|
|
||||||
char *reference = (char *)malloc(mem_size);
|
|
||||||
computeGold(reference, data, len);
|
|
||||||
int2 *reference2 = (int2 *)malloc(mem_size_int2);
|
|
||||||
computeGold2(reference2, data_int2, len);
|
|
||||||
|
|
||||||
// copy results from device to host
|
|
||||||
checkCudaErrors(cudaMemcpy(data, d_data, mem_size, cudaMemcpyDeviceToHost));
|
|
||||||
checkCudaErrors(cudaMemcpy(data_int2, d_data_int2, mem_size_int2,
|
|
||||||
cudaMemcpyDeviceToHost));
|
|
||||||
|
|
||||||
// check result
|
|
||||||
bool success = true;
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < len; i++) {
|
|
||||||
if (reference[i] != data[i] || reference2[i].x != data_int2[i].x ||
|
|
||||||
reference2[i].y != data_int2[i].y) {
|
|
||||||
success = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// cleanup memory
|
|
||||||
checkCudaErrors(cudaFree(d_data));
|
|
||||||
checkCudaErrors(cudaFree(d_data_int2));
|
|
||||||
free(reference);
|
|
||||||
free(reference2);
|
|
||||||
|
|
||||||
return success;
|
|
||||||
}
|
|
@ -1,67 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Example of integrating CUDA functions into an existing
|
|
||||||
* application / framework.
|
|
||||||
* Reference solution computation.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Required header to support CUDA vector types
|
|
||||||
#include <vector_types.h>
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// export C interface
|
|
||||||
extern "C" void computeGold(char *reference, char *idata,
|
|
||||||
const unsigned int len);
|
|
||||||
extern "C" void computeGold2(int2 *reference, int2 *idata,
|
|
||||||
const unsigned int len);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//! Compute reference data set
|
|
||||||
//! Each element is multiplied with the number of threads / array length
|
|
||||||
//! @param reference reference data, computed but preallocated
|
|
||||||
//! @param idata input data as provided to device
|
|
||||||
//! @param len number of elements in reference / idata
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void computeGold(char *reference, char *idata, const unsigned int len) {
|
|
||||||
for (unsigned int i = 0; i < len; ++i) reference[i] = idata[i] - 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//! Compute reference data set for int2 version
|
|
||||||
//! Each element is multiplied with the number of threads / array length
|
|
||||||
//! @param reference reference data, computed but preallocated
|
|
||||||
//! @param idata input data as provided to device
|
|
||||||
//! @param len number of elements in reference / idata
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
void computeGold2(int2 *reference, int2 *idata, const unsigned int len) {
|
|
||||||
for (unsigned int i = 0; i < len; ++i) {
|
|
||||||
reference[i].x = idata[i].x - idata[i].y;
|
|
||||||
reference[i].y = idata[i].y;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppIntegration", "cppIntegration_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,114 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cppIntegration_vs2017</RootNamespace>
|
|
||||||
<ProjectName>cppIntegration</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="cppIntegration.cu" />
|
|
||||||
<ClCompile Include="cppIntegration_gold.cpp" />
|
|
||||||
<ClCompile Include="main.cpp" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppIntegration", "cppIntegration_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,110 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cppIntegration_vs2019</RootNamespace>
|
|
||||||
<ProjectName>cppIntegration</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="cppIntegration.cu" />
|
|
||||||
<ClCompile Include="cppIntegration_gold.cpp" />
|
|
||||||
<ClCompile Include="main.cpp" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppIntegration", "cppIntegration_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,110 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cppIntegration_vs2022</RootNamespace>
|
|
||||||
<ProjectName>cppIntegration</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v143</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cppIntegration.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="cppIntegration.cu" />
|
|
||||||
<ClCompile Include="cppIntegration_gold.cpp" />
|
|
||||||
<ClCompile Include="main.cpp" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,86 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Example of integrating CUDA functions into an existing
|
|
||||||
* application / framework.
|
|
||||||
* CPP code representing the existing application / framework.
|
|
||||||
* Compiled with default CPP compiler.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// includes, system
|
|
||||||
#include <iostream>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
// Required to include CUDA vector types
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include <vector_types.h>
|
|
||||||
#include <helper_cuda.h>
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// declaration, forward
|
|
||||||
extern "C" bool runTest(const int argc, const char **argv, char *data,
|
|
||||||
int2 *data_int2, unsigned int len);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Program main
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
int main(int argc, char **argv) {
|
|
||||||
// input data
|
|
||||||
int len = 16;
|
|
||||||
// the data has some zero padding at the end so that the size is a multiple of
|
|
||||||
// four, this simplifies the processing as each thread can process four
|
|
||||||
// elements (which is necessary to avoid bank conflicts) but no branching is
|
|
||||||
// necessary to avoid out of bounds reads
|
|
||||||
char str[] = {82, 111, 118, 118, 121, 42, 97, 121,
|
|
||||||
124, 118, 110, 56, 10, 10, 10, 10};
|
|
||||||
|
|
||||||
// Use int2 showing that CUDA vector types can be used in cpp code
|
|
||||||
int2 i2[16];
|
|
||||||
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
i2[i].x = str[i];
|
|
||||||
i2[i].y = 10;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool bTestResult;
|
|
||||||
|
|
||||||
// run the device part of the program
|
|
||||||
bTestResult = runTest(argc, (const char **)argv, str, i2, len);
|
|
||||||
|
|
||||||
std::cout << str << std::endl;
|
|
||||||
|
|
||||||
char str_device[16];
|
|
||||||
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
str_device[i] = (char)(i2[i].x);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << str_device << std::endl;
|
|
||||||
|
|
||||||
exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
{
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "CUDA C++: Launch",
|
|
||||||
"type": "cuda-gdb",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${workspaceFolder}/cppOverload"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
@ -1,357 +0,0 @@
|
|||||||
################################################################################
|
|
||||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions
|
|
||||||
# are met:
|
|
||||||
# * Redistributions of source code must retain the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer.
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived
|
|
||||||
# from this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
#
|
|
||||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
|
||||||
#
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Location of the CUDA Toolkit
|
|
||||||
CUDA_PATH ?= /usr/local/cuda
|
|
||||||
|
|
||||||
##############################
|
|
||||||
# start deprecated interface #
|
|
||||||
##############################
|
|
||||||
ifeq ($(x86_64),1)
|
|
||||||
$(info WARNING - x86_64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
|
||||||
TARGET_ARCH ?= x86_64
|
|
||||||
endif
|
|
||||||
ifeq ($(ARMv7),1)
|
|
||||||
$(info WARNING - ARMv7 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
|
||||||
TARGET_ARCH ?= armv7l
|
|
||||||
endif
|
|
||||||
ifeq ($(aarch64),1)
|
|
||||||
$(info WARNING - aarch64 variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
|
||||||
TARGET_ARCH ?= aarch64
|
|
||||||
endif
|
|
||||||
ifeq ($(ppc64le),1)
|
|
||||||
$(info WARNING - ppc64le variable has been deprecated)
|
|
||||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
|
||||||
TARGET_ARCH ?= ppc64le
|
|
||||||
endif
|
|
||||||
ifneq ($(GCC),)
|
|
||||||
$(info WARNING - GCC variable has been deprecated)
|
|
||||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
|
||||||
HOST_COMPILER ?= $(GCC)
|
|
||||||
endif
|
|
||||||
ifneq ($(abi),)
|
|
||||||
$(error ERROR - abi variable has been removed)
|
|
||||||
endif
|
|
||||||
############################
|
|
||||||
# end deprecated interface #
|
|
||||||
############################
|
|
||||||
|
|
||||||
# architecture
|
|
||||||
HOST_ARCH := $(shell uname -m)
|
|
||||||
TARGET_ARCH ?= $(HOST_ARCH)
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
|
||||||
TARGET_SIZE := 64
|
|
||||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
|
||||||
TARGET_SIZE := 32
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
|
||||||
ifeq ($(HOST_ARCH),aarch64)
|
|
||||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
|
||||||
HOST_ARCH := sbsa
|
|
||||||
TARGET_ARCH := sbsa
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
|
||||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
|
||||||
TARGET_ARCH = armv7l
|
|
||||||
endif
|
|
||||||
|
|
||||||
# operating system
|
|
||||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
|
||||||
TARGET_OS ?= $(HOST_OS)
|
|
||||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
|
||||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# host compiler
|
|
||||||
ifdef HOST_COMPILER
|
|
||||||
CUSTOM_HOST_COMPILER = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
|
||||||
HOST_COMPILER ?= clang++
|
|
||||||
endif
|
|
||||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
|
||||||
ifeq ($(TARGET_OS),linux)
|
|
||||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),aarch64)
|
|
||||||
ifeq ($(TARGET_OS), linux)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_OS),qnx)
|
|
||||||
ifeq ($(QNX_HOST),)
|
|
||||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
|
||||||
endif
|
|
||||||
ifeq ($(QNX_TARGET),)
|
|
||||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
|
||||||
endif
|
|
||||||
export QNX_HOST
|
|
||||||
export QNX_TARGET
|
|
||||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
|
||||||
else ifeq ($(TARGET_OS), android)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
|
||||||
endif
|
|
||||||
else ifeq ($(TARGET_ARCH),sbsa)
|
|
||||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
HOST_COMPILER ?= g++
|
|
||||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
|
||||||
|
|
||||||
# internal flags
|
|
||||||
NVCCFLAGS := -m${TARGET_SIZE}
|
|
||||||
CCFLAGS :=
|
|
||||||
LDFLAGS :=
|
|
||||||
|
|
||||||
# build flags
|
|
||||||
|
|
||||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
|
||||||
GCC_PATH := $(shell which gcc)
|
|
||||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
|
||||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
|
||||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
|
||||||
LDFLAGS += -lstdc++
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TARGET_OS),darwin)
|
|
||||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
|
||||||
CCFLAGS += -arch $(HOST_ARCH)
|
|
||||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
|
||||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
|
||||||
CCFLAGS += -mfloat-abi=hard
|
|
||||||
else ifeq ($(TARGET_OS),android)
|
|
||||||
LDFLAGS += -pie
|
|
||||||
CCFLAGS += -fpie -fpic -fexceptions
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
|
||||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
|
||||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
endif
|
|
||||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
|
||||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
|
||||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
|
||||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
NVCCFLAGS += -D_QNX_SOURCE
|
|
||||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
|
||||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
|
||||||
LDFLAGS += -lsocket
|
|
||||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
|
||||||
ifdef TARGET_OVERRIDE
|
|
||||||
LDFLAGS += -lslog2
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(TARGET_FS),)
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
|
||||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
|
||||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
|
||||||
CCFLAGS += -I$(TARGET_FS)/../include
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
|
||||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Install directory of different arch
|
|
||||||
CUDA_INSTALL_TARGET_DIR :=
|
|
||||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
|
||||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
|
||||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
|
||||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
|
||||||
endif
|
|
||||||
|
|
||||||
# Debug build flags
|
|
||||||
ifeq ($(dbg),1)
|
|
||||||
NVCCFLAGS += -g -G
|
|
||||||
BUILD_TYPE := debug
|
|
||||||
else
|
|
||||||
BUILD_TYPE := release
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS :=
|
|
||||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
|
||||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
|
||||||
|
|
||||||
SAMPLE_ENABLED := 1
|
|
||||||
|
|
||||||
ALL_LDFLAGS :=
|
|
||||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
|
||||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
|
||||||
|
|
||||||
# Common includes and paths for CUDA
|
|
||||||
INCLUDES := -I../../../Common
|
|
||||||
LIBRARIES :=
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Gencode arguments
|
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
|
||||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
|
||||||
else
|
|
||||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
|
||||||
SAMPLE_ENABLED := 0
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(GENCODE_FLAGS),)
|
|
||||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
|
||||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
|
||||||
|
|
||||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
|
||||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
|
||||||
ifneq ($(HIGHEST_SM),)
|
|
||||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
|
||||||
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
EXEC ?= @echo "[@]"
|
|
||||||
endif
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
# Target rules
|
|
||||||
all: build
|
|
||||||
|
|
||||||
build: cppOverload
|
|
||||||
|
|
||||||
check.deps:
|
|
||||||
ifeq ($(SAMPLE_ENABLED),0)
|
|
||||||
@echo "Sample will be waived due to the above missing dependencies"
|
|
||||||
else
|
|
||||||
@echo "Sample is ready - all dependencies have been met"
|
|
||||||
endif
|
|
||||||
|
|
||||||
cppOverload.o:cppOverload.cu
|
|
||||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
cppOverload: cppOverload.o
|
|
||||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
|
||||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
|
||||||
|
|
||||||
run: build
|
|
||||||
$(EXEC) ./cppOverload
|
|
||||||
|
|
||||||
testrun: build
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -f cppOverload cppOverload.o
|
|
||||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cppOverload
|
|
||||||
|
|
||||||
clobber: clean
|
|
@ -1,83 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
|
||||||
<entry>
|
|
||||||
<name>cppOverload</name>
|
|
||||||
<cuda_api_list>
|
|
||||||
<toolkit>cudaMemcpy</toolkit>
|
|
||||||
<toolkit>cudaFuncSetCacheConfig</toolkit>
|
|
||||||
<toolkit>cudaFree</toolkit>
|
|
||||||
<toolkit>cudaMallocHost</toolkit>
|
|
||||||
<toolkit>cudaSetDevice</toolkit>
|
|
||||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
|
||||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
|
||||||
<toolkit>cudaFreeHost</toolkit>
|
|
||||||
<toolkit>cudaMalloc</toolkit>
|
|
||||||
<toolkit>cudaFuncGetAttributes</toolkit>
|
|
||||||
<toolkit>cudaGetDeviceCount</toolkit>
|
|
||||||
</cuda_api_list>
|
|
||||||
<description><![CDATA[This sample demonstrates how to use C++ function overloading on the GPU.]]></description>
|
|
||||||
<devicecompilation>whole</devicecompilation>
|
|
||||||
<includepaths>
|
|
||||||
<path>./</path>
|
|
||||||
<path>../</path>
|
|
||||||
<path>../../../Common</path>
|
|
||||||
</includepaths>
|
|
||||||
<keyconcepts>
|
|
||||||
<concept level="basic">C++ Function Overloading</concept>
|
|
||||||
<concept level="basic">CUDA Streams and Events</concept>
|
|
||||||
</keyconcepts>
|
|
||||||
<keywords>
|
|
||||||
<keyword>GPGPU</keyword>
|
|
||||||
</keywords>
|
|
||||||
<libraries>
|
|
||||||
</libraries>
|
|
||||||
<librarypaths>
|
|
||||||
</librarypaths>
|
|
||||||
<nsight_eclipse>true</nsight_eclipse>
|
|
||||||
<primary_file>cppOverload_kernel.cu</primary_file>
|
|
||||||
<scopes>
|
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
|
||||||
<scope>1:Performance Strategies</scope>
|
|
||||||
</scopes>
|
|
||||||
<sm-arch>sm50</sm-arch>
|
|
||||||
<sm-arch>sm52</sm-arch>
|
|
||||||
<sm-arch>sm53</sm-arch>
|
|
||||||
<sm-arch>sm60</sm-arch>
|
|
||||||
<sm-arch>sm61</sm-arch>
|
|
||||||
<sm-arch>sm70</sm-arch>
|
|
||||||
<sm-arch>sm72</sm-arch>
|
|
||||||
<sm-arch>sm75</sm-arch>
|
|
||||||
<sm-arch>sm80</sm-arch>
|
|
||||||
<sm-arch>sm86</sm-arch>
|
|
||||||
<sm-arch>sm87</sm-arch>
|
|
||||||
<sm-arch>sm89</sm-arch>
|
|
||||||
<sm-arch>sm90</sm-arch>
|
|
||||||
<supported_envs>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<platform>windows7</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>x86_64</arch>
|
|
||||||
<platform>macosx</platform>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>arm</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>sbsa</arch>
|
|
||||||
</env>
|
|
||||||
<env>
|
|
||||||
<arch>ppc64le</arch>
|
|
||||||
<platform>linux</platform>
|
|
||||||
</env>
|
|
||||||
</supported_envs>
|
|
||||||
<supported_sm_architectures>
|
|
||||||
<include>all</include>
|
|
||||||
</supported_sm_architectures>
|
|
||||||
<title>cppOverload</title>
|
|
||||||
<type>exe</type>
|
|
||||||
</entry>
|
|
@ -1,70 +0,0 @@
|
|||||||
# cppOverload - cppOverload
|
|
||||||
|
|
||||||
## Description
|
|
||||||
|
|
||||||
This sample demonstrates how to use C++ function overloading on the GPU.
|
|
||||||
|
|
||||||
## Key Concepts
|
|
||||||
|
|
||||||
C++ Function Overloading, CUDA Streams and Events
|
|
||||||
|
|
||||||
## Supported SM Architectures
|
|
||||||
|
|
||||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
|
||||||
|
|
||||||
## Supported OSes
|
|
||||||
|
|
||||||
Linux, Windows
|
|
||||||
|
|
||||||
## Supported CPU Architecture
|
|
||||||
|
|
||||||
x86_64, ppc64le, armv7l
|
|
||||||
|
|
||||||
## CUDA APIs involved
|
|
||||||
|
|
||||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
|
||||||
cudaMemcpy, cudaFuncSetCacheConfig, cudaFree, cudaMallocHost, cudaSetDevice, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaFreeHost, cudaMalloc, cudaFuncGetAttributes, cudaGetDeviceCount
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
|
||||||
|
|
||||||
## Build and Run
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
|
||||||
```
|
|
||||||
*_vs<version>.sln - for Visual Studio <version>
|
|
||||||
```
|
|
||||||
Each individual sample has its own set of solution files in its directory:
|
|
||||||
|
|
||||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
|
||||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
|
||||||
|
|
||||||
### Linux
|
|
||||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
|
||||||
```
|
|
||||||
$ cd <sample_dir>
|
|
||||||
$ make
|
|
||||||
```
|
|
||||||
The samples makefiles can take advantage of certain options:
|
|
||||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
|
||||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
|
||||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
|
||||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
|
||||||
* **dbg=1** - build with debug symbols
|
|
||||||
```
|
|
||||||
$ make dbg=1
|
|
||||||
```
|
|
||||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
|
||||||
```
|
|
||||||
$ make SMS="50 60"
|
|
||||||
```
|
|
||||||
|
|
||||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
|
||||||
```
|
|
||||||
$ make HOST_COMPILER=g++
|
|
||||||
```
|
|
||||||
|
|
||||||
## References (for more details)
|
|
||||||
|
|
@ -1,188 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define THREAD_N 256
|
|
||||||
#define N 1024
|
|
||||||
#define DIV_UP(a, b) (((a) + (b) - 1) / (b))
|
|
||||||
|
|
||||||
// Includes, system
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <helper_cuda.h>
|
|
||||||
#include <helper_string.h>
|
|
||||||
#include <helper_math.h>
|
|
||||||
#include "cppOverload_kernel.cuh"
|
|
||||||
|
|
||||||
const char *sampleName = "C++ Function Overloading";
|
|
||||||
|
|
||||||
#define OUTPUT_ATTR(attr) \
|
|
||||||
printf("Shared Size: %d\n", (int)attr.sharedSizeBytes); \
|
|
||||||
printf("Constant Size: %d\n", (int)attr.constSizeBytes); \
|
|
||||||
printf("Local Size: %d\n", (int)attr.localSizeBytes); \
|
|
||||||
printf("Max Threads Per Block: %d\n", attr.maxThreadsPerBlock); \
|
|
||||||
printf("Number of Registers: %d\n", attr.numRegs); \
|
|
||||||
printf("PTX Version: %d\n", attr.ptxVersion); \
|
|
||||||
printf("Binary Version: %d\n", attr.binaryVersion);
|
|
||||||
|
|
||||||
bool check_func1(int *hInput, int *hOutput, int a) {
|
|
||||||
for (int i = 0; i < N; ++i) {
|
|
||||||
int cpuRes = hInput[i] * a + i;
|
|
||||||
|
|
||||||
if (hOutput[i] != cpuRes) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool check_func2(int2 *hInput, int *hOutput, int a) {
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
int cpuRes = (hInput[i].x + hInput[i].y) * a + i;
|
|
||||||
|
|
||||||
if (hOutput[i] != cpuRes) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool check_func3(int *hInput1, int *hInput2, int *hOutput, int a) {
|
|
||||||
for (int i = 0; i < N; i++) {
|
|
||||||
if (hOutput[i] != (hInput1[i] + hInput2[i]) * a + i) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, const char *argv[]) {
|
|
||||||
int *hInput = NULL;
|
|
||||||
int *hOutput = NULL;
|
|
||||||
int *dInput = NULL;
|
|
||||||
int *dOutput = NULL;
|
|
||||||
|
|
||||||
printf("%s starting...\n", sampleName);
|
|
||||||
|
|
||||||
int deviceCount;
|
|
||||||
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
|
|
||||||
printf("Device Count: %d\n", deviceCount);
|
|
||||||
|
|
||||||
int deviceID = findCudaDevice(argc, argv);
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
checkCudaErrors(cudaGetDeviceProperties(&prop, deviceID));
|
|
||||||
if (prop.major < 2) {
|
|
||||||
printf(
|
|
||||||
"ERROR: cppOverload requires GPU devices with compute SM 2.0 or "
|
|
||||||
"higher.\n");
|
|
||||||
printf("Current GPU device has compute SM%d.%d, Exiting...", prop.major,
|
|
||||||
prop.minor);
|
|
||||||
exit(EXIT_WAIVED);
|
|
||||||
}
|
|
||||||
|
|
||||||
checkCudaErrors(cudaSetDevice(deviceID));
|
|
||||||
|
|
||||||
// Allocate device memory
|
|
||||||
checkCudaErrors(cudaMalloc(&dInput, sizeof(int) * N * 2));
|
|
||||||
checkCudaErrors(cudaMalloc(&dOutput, sizeof(int) * N));
|
|
||||||
|
|
||||||
// Allocate host memory
|
|
||||||
checkCudaErrors(cudaMallocHost(&hInput, sizeof(int) * N * 2));
|
|
||||||
checkCudaErrors(cudaMallocHost(&hOutput, sizeof(int) * N));
|
|
||||||
|
|
||||||
for (int i = 0; i < N * 2; i++) {
|
|
||||||
hInput[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy data from host to device
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMemcpy(dInput, hInput, sizeof(int) * N * 2, cudaMemcpyHostToDevice));
|
|
||||||
|
|
||||||
// Test C++ overloading
|
|
||||||
bool testResult = true;
|
|
||||||
bool funcResult = true;
|
|
||||||
int a = 1;
|
|
||||||
|
|
||||||
void (*func1)(const int *, int *, int);
|
|
||||||
void (*func2)(const int2 *, int *, int);
|
|
||||||
void (*func3)(const int *, const int *, int *, int);
|
|
||||||
struct cudaFuncAttributes attr;
|
|
||||||
|
|
||||||
// overload function 1
|
|
||||||
func1 = simple_kernel;
|
|
||||||
memset(&attr, 0, sizeof(attr));
|
|
||||||
checkCudaErrors(cudaFuncSetCacheConfig(*func1, cudaFuncCachePreferShared));
|
|
||||||
checkCudaErrors(cudaFuncGetAttributes(&attr, *func1));
|
|
||||||
OUTPUT_ATTR(attr);
|
|
||||||
(*func1)<<<DIV_UP(N, THREAD_N), THREAD_N>>>(dInput, dOutput, a);
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMemcpy(hOutput, dOutput, sizeof(int) * N, cudaMemcpyDeviceToHost));
|
|
||||||
funcResult = check_func1(hInput, hOutput, a);
|
|
||||||
printf("simple_kernel(const int *pIn, int *pOut, int a) %s\n\n",
|
|
||||||
funcResult ? "PASSED" : "FAILED");
|
|
||||||
testResult &= funcResult;
|
|
||||||
|
|
||||||
// overload function 2
|
|
||||||
func2 = simple_kernel;
|
|
||||||
memset(&attr, 0, sizeof(attr));
|
|
||||||
checkCudaErrors(cudaFuncSetCacheConfig(*func2, cudaFuncCachePreferShared));
|
|
||||||
checkCudaErrors(cudaFuncGetAttributes(&attr, *func2));
|
|
||||||
OUTPUT_ATTR(attr);
|
|
||||||
(*func2)<<<DIV_UP(N, THREAD_N), THREAD_N>>>((int2 *)dInput, dOutput, a);
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMemcpy(hOutput, dOutput, sizeof(int) * N, cudaMemcpyDeviceToHost));
|
|
||||||
funcResult = check_func2(reinterpret_cast<int2 *>(hInput), hOutput, a);
|
|
||||||
printf("simple_kernel(const int2 *pIn, int *pOut, int a) %s\n\n",
|
|
||||||
funcResult ? "PASSED" : "FAILED");
|
|
||||||
testResult &= funcResult;
|
|
||||||
|
|
||||||
// overload function 3
|
|
||||||
func3 = simple_kernel;
|
|
||||||
memset(&attr, 0, sizeof(attr));
|
|
||||||
checkCudaErrors(cudaFuncSetCacheConfig(*func3, cudaFuncCachePreferShared));
|
|
||||||
checkCudaErrors(cudaFuncGetAttributes(&attr, *func3));
|
|
||||||
OUTPUT_ATTR(attr);
|
|
||||||
(*func3)<<<DIV_UP(N, THREAD_N), THREAD_N>>>(dInput, dInput + N, dOutput, a);
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMemcpy(hOutput, dOutput, sizeof(int) * N, cudaMemcpyDeviceToHost));
|
|
||||||
funcResult = check_func3(&hInput[0], &hInput[N], hOutput, a);
|
|
||||||
printf(
|
|
||||||
"simple_kernel(const int *pIn1, const int *pIn2, int *pOut, int a) "
|
|
||||||
"%s\n\n",
|
|
||||||
funcResult ? "PASSED" : "FAILED");
|
|
||||||
testResult &= funcResult;
|
|
||||||
|
|
||||||
checkCudaErrors(cudaFree(dInput));
|
|
||||||
checkCudaErrors(cudaFree(dOutput));
|
|
||||||
checkCudaErrors(cudaFreeHost(hOutput));
|
|
||||||
checkCudaErrors(cudaFreeHost(hInput));
|
|
||||||
|
|
||||||
checkCudaErrors(cudaDeviceSynchronize());
|
|
||||||
|
|
||||||
exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
|
|
||||||
}
|
|
@ -1,61 +0,0 @@
|
|||||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without
|
|
||||||
* modification, are permitted provided that the following conditions
|
|
||||||
* are met:
|
|
||||||
* * Redistributions of source code must retain the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer.
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
* notice, this list of conditions and the following disclaimer in the
|
|
||||||
* documentation and/or other materials provided with the distribution.
|
|
||||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived
|
|
||||||
* from this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
||||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
||||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
||||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
||||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
||||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
||||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
||||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
__global__ void simple_kernel(const int *pIn, int *pOut, int a) {
|
|
||||||
__shared__ int sData[THREAD_N];
|
|
||||||
int tid = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
|
|
||||||
sData[threadIdx.x] = pIn[tid];
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
pOut[tid] = sData[threadIdx.x] * a + tid;
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void simple_kernel(const int2 *pIn, int *pOut, int a) {
|
|
||||||
__shared__ int2 sData[THREAD_N];
|
|
||||||
int tid = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
|
|
||||||
sData[threadIdx.x] = pIn[tid];
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
pOut[tid] = (sData[threadIdx.x].x + sData[threadIdx.x].y) * a + tid;
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
__global__ void simple_kernel(const int *pIn1, const int *pIn2, int *pOut,
|
|
||||||
int a) {
|
|
||||||
__shared__ int sData1[THREAD_N];
|
|
||||||
__shared__ int sData2[THREAD_N];
|
|
||||||
int tid = threadIdx.x + blockDim.x * blockIdx.x;
|
|
||||||
|
|
||||||
sData1[threadIdx.x] = pIn1[tid];
|
|
||||||
sData2[threadIdx.x] = pIn2[tid];
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
pOut[tid] = (sData1[threadIdx.x] + sData2[threadIdx.x]) * a + tid;
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2017
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppOverload", "cppOverload_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cppOverload_vs2017</RootNamespace>
|
|
||||||
<ProjectName>cppOverload</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
|
||||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
|
||||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
|
||||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v141</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="cppOverload.cu" />
|
|
||||||
<None Include="cppOverload_kernel.cuh" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2019
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppOverload", "cppOverload_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
@ -1,108 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cppOverload_vs2019</RootNamespace>
|
|
||||||
<ProjectName>cppOverload</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v142</PlatformToolset>
|
|
||||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cppOverload.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;compute_89,sm_89;compute_90,sm_90;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" --threads 0 </AdditionalOptions>
|
|
||||||
<Include>./;../../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="cppOverload.cu" />
|
|
||||||
<None Include="cppOverload_kernel.cuh" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 12.5.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2022
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cppOverload", "cppOverload_vs2022.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user