add and update samples for CUDA 11.5

This commit is contained in:
Rutwik Choughule 2021-10-21 16:34:49 +05:30
parent 3342d604fe
commit 1f76a2d110
2796 changed files with 1511725 additions and 827 deletions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

294
Common/dynlink_d3d10.h Normal file
View File

@ -0,0 +1,294 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//--------------------------------------------------------------------------------------
// File: dynlink_d3d10.h
//
// Shortcut macros and functions for using DX objects
//
// Copyright (c) Microsoft Corporation. All rights reserved
//--------------------------------------------------------------------------------------
#ifndef _DYNLINK_D3D10_H_
#define _DYNLINK_D3D10_H_
// Standard Windows includes
#include <windows.h>
#include <initguid.h>
#include <assert.h>
#include <wchar.h>
#include <mmsystem.h>
#include <commctrl.h> // for InitCommonControls()
#include <shellapi.h> // for ExtractIcon()
#include <new.h> // for placement new
#include <shlobj.h>
#include <math.h>
#include <limits.h>
#include <stdio.h>
// CRT's memory leak detection
#if defined(DEBUG) || defined(_DEBUG)
#include <crtdbg.h>
#endif
// Direct3D9 includes
#include <d3d9.h>
// Direct3D10 includes
#include <dxgi.h>
#include <d3d10_1.h>
#include <d3d10.h>
// XInput includes
#include <xinput.h>
// strsafe.h deprecates old unsecure string functions. If you
// really do not want to it to (not recommended), then uncomment the next line
//#define STRSAFE_NO_DEPRECATE
#ifndef STRSAFE_NO_DEPRECATE
#pragma deprecated("strncpy")
#pragma deprecated("wcsncpy")
#pragma deprecated("_tcsncpy")
#pragma deprecated("wcsncat")
#pragma deprecated("strncat")
#pragma deprecated("_tcsncat")
#endif
#pragma warning( disable : 4996 ) // disable deprecated warning
#include <strsafe.h>
#pragma warning( default : 4996 )
#include <DirectXMath.h>
using namespace DirectX;
//--------------------------------------------------------------------------------------
// Structs
//--------------------------------------------------------------------------------------
struct DXUTD3D9DeviceSettings
{
UINT AdapterOrdinal;
D3DDEVTYPE DeviceType;
D3DFORMAT AdapterFormat;
DWORD BehaviorFlags;
D3DPRESENT_PARAMETERS pp;
};
struct DXUTD3D10DeviceSettings
{
UINT AdapterOrdinal;
D3D10_DRIVER_TYPE DriverType;
UINT Output;
DXGI_SWAP_CHAIN_DESC sd;
UINT32 CreateFlags;
UINT32 SyncInterval;
DWORD PresentFlags;
bool AutoCreateDepthStencil; // DXUT will create the a depth stencil resource and view if true
DXGI_FORMAT AutoDepthStencilFormat;
};
enum DXUTDeviceVersion { DXUT_D3D9_DEVICE, DXUT_D3D10_DEVICE };
struct DXUTDeviceSettings
{
DXUTDeviceVersion ver;
union
{
DXUTD3D9DeviceSettings d3d9; // only valid if ver == DXUT_D3D9_DEVICE
DXUTD3D10DeviceSettings d3d10; // only valid if ver == DXUT_D3D10_DEVICE
};
};
//--------------------------------------------------------------------------------------
// Error codes
//--------------------------------------------------------------------------------------
#define DXUTERR_NODIRECT3D MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0901)
#define DXUTERR_NOCOMPATIBLEDEVICES MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0902)
#define DXUTERR_MEDIANOTFOUND MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0903)
#define DXUTERR_NONZEROREFCOUNT MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0904)
#define DXUTERR_CREATINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0905)
#define DXUTERR_RESETTINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0906)
#define DXUTERR_CREATINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0907)
#define DXUTERR_RESETTINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0908)
#define DXUTERR_DEVICEREMOVED MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x090A)
typedef HRESULT(WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT, UINT32,
ID3D10Device **);
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE1)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT,
D3D10_FEATURE_LEVEL1, UINT, ID3D10Device1 **);
typedef HRESULT(WINAPI *LPD3D10CREATESTATEBLOCK)(ID3D10Device *pDevice, D3D10_STATE_BLOCK_MASK *pStateBlockMask,
ID3D10StateBlock **ppStateBlock);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKUNION)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
D3D10_STATE_BLOCK_MASK *pResult);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKINTERSECT)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
D3D10_STATE_BLOCK_MASK *pResult);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDIFFERENCE)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
D3D10_STATE_BLOCK_MASK *pResult);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
UINT RangeLength);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
UINT RangeLength);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
typedef BOOL (WINAPI *LPD3D10STATEBLOCKMASKGETSETTING)(D3D10_STATE_BLOCK_MASK *pMask,
D3D10_DEVICE_STATE_TYPES StateType, UINT Entry);
typedef HRESULT(WINAPI *LPD3D10COMPILEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, LPCSTR pSrcFileName,
CONST D3D10_SHADER_MACRO *pDefines,
ID3D10Include *pInclude, UINT HLSLFlags, UINT FXFlags,
ID3D10Blob **ppCompiledEffect, ID3D10Blob **ppErrors);
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
ID3D10Device *pDevice,
ID3D10EffectPool *pEffectPool,
ID3D10Effect **ppEffect);
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTPOOLFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
ID3D10Device *pDevice, ID3D10EffectPool **ppEffectPool);
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN)(IDXGIAdapter *pAdapter,
D3D10_DRIVER_TYPE DriverType,
HMODULE Software,
UINT Flags,
UINT SDKVersion,
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
IDXGISwapChain **ppSwapChain,
ID3D10Device **ppDevice);
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN1)(IDXGIAdapter *pAdapter,
D3D10_DRIVER_TYPE DriverType,
HMODULE Software,
UINT Flags,
D3D10_FEATURE_LEVEL1 HardwareLevel,
UINT SDKVersion,
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
IDXGISwapChain **ppSwapChain,
ID3D10Device1 **ppDevice);
// Module and function pointers
static HMODULE g_hModDXGI = NULL;
static HMODULE g_hModD3D10 = NULL;
static HMODULE g_hModD3D101 = NULL;
static LPCREATEDXGIFACTORY sFnPtr_CreateDXGIFactory = NULL;
static LPD3D10CREATESTATEBLOCK sFnPtr_D3D10CreateStateBlock = NULL;
static LPD3D10CREATEDEVICE sFnPtr_D3D10CreateDevice = NULL;
static LPD3D10CREATEDEVICE1 sFnPtr_D3D10CreateDevice1 = NULL;
static LPD3D10STATEBLOCKMASKUNION sFnPtr_D3D10StateBlockMaskUnion = NULL;
static LPD3D10STATEBLOCKMASKINTERSECT sFnPtr_D3D10StateBlockMaskIntersect = NULL;
static LPD3D10STATEBLOCKMASKDIFFERENCE sFnPtr_D3D10StateBlockMaskDifference = NULL;
static LPD3D10STATEBLOCKMASKENABLECAPTURE sFnPtr_D3D10StateBlockMaskEnableCapture = NULL;
static LPD3D10STATEBLOCKMASKDISABLECAPTURE sFnPtr_D3D10StateBlockMaskDisableCapture = NULL;
static LPD3D10STATEBLOCKMASKENABLEALL sFnPtr_D3D10StateBlockMaskEnableAll = NULL;
static LPD3D10STATEBLOCKMASKDISABLEALL sFnPtr_D3D10StateBlockMaskDisableAll = NULL;
static LPD3D10STATEBLOCKMASKGETSETTING sFnPtr_D3D10StateBlockMaskGetSetting = NULL;
static LPD3D10COMPILEEFFECTFROMMEMORY sFnPtr_D3D10CompileEffectFromMemory = NULL;
static LPD3D10CREATEEFFECTFROMMEMORY sFnPtr_D3D10CreateEffectFromMemory = NULL;
static LPD3D10CREATEEFFECTPOOLFROMMEMORY sFnPtr_D3D10CreateEffectPoolFromMemory = NULL;
static LPD3D10CREATEDEVICEANDSWAPCHAIN sFnPtr_D3D10CreateDeviceAndSwapChain = NULL;
static LPD3D10CREATEDEVICEANDSWAPCHAIN1 sFnPtr_D3D10CreateDeviceAndSwapChain1 = NULL;
// unload the D3D10 DLLs
static bool dynlinkUnloadD3D10API(void)
{
if (g_hModD3D10)
{
FreeLibrary(g_hModD3D10);
g_hModD3D10 = NULL;
}
if (g_hModDXGI)
{
FreeLibrary(g_hModDXGI);
g_hModDXGI = NULL;
}
if (g_hModD3D101)
{
FreeLibrary(g_hModD3D101);
g_hModD3D101 = NULL;
}
return true;
}
// Dynamically load the D3D10 DLLs loaded and map the function pointers
static bool dynlinkLoadD3D10API(void)
{
// First check to see if the D3D10 Library is present.
// if it succeeds, then we can call GetProcAddress to grab all of the DX10 functions
g_hModD3D10 = LoadLibrary("d3d10.dll");
if (g_hModD3D10 != NULL)
{
sFnPtr_D3D10CreateStateBlock = (LPD3D10CREATESTATEBLOCK) GetProcAddress(g_hModD3D10, "D3D10CreateStateBlock");
sFnPtr_D3D10CreateDevice = (LPD3D10CREATEDEVICE) GetProcAddress(g_hModD3D10, "D3D10CreateDevice");
sFnPtr_D3D10StateBlockMaskUnion = (LPD3D10STATEBLOCKMASKUNION) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskUnion");
sFnPtr_D3D10StateBlockMaskIntersect = (LPD3D10STATEBLOCKMASKINTERSECT) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskIntersect");
sFnPtr_D3D10StateBlockMaskDifference = (LPD3D10STATEBLOCKMASKDIFFERENCE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDifference");
sFnPtr_D3D10StateBlockMaskEnableCapture = (LPD3D10STATEBLOCKMASKENABLECAPTURE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableCapture");
sFnPtr_D3D10StateBlockMaskDisableCapture = (LPD3D10STATEBLOCKMASKDISABLECAPTURE)GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableCapture");
sFnPtr_D3D10StateBlockMaskEnableAll = (LPD3D10STATEBLOCKMASKENABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableAll");
sFnPtr_D3D10StateBlockMaskDisableAll = (LPD3D10STATEBLOCKMASKDISABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableAll");
sFnPtr_D3D10StateBlockMaskGetSetting = (LPD3D10STATEBLOCKMASKGETSETTING) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskGetSetting");
sFnPtr_D3D10CompileEffectFromMemory = (LPD3D10COMPILEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CompileEffectFromMemory");
sFnPtr_D3D10CreateEffectFromMemory = (LPD3D10CREATEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectFromMemory");
sFnPtr_D3D10CreateEffectPoolFromMemory = (LPD3D10CREATEEFFECTPOOLFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectPoolFromMemory");
sFnPtr_D3D10CreateDeviceAndSwapChain = (LPD3D10CREATEDEVICEANDSWAPCHAIN) GetProcAddress(g_hModD3D10, "D3D10CreateDeviceAndSwapChain");
}
g_hModDXGI = LoadLibrary("dxgi.dll");
if (g_hModDXGI)
{
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY) GetProcAddress(g_hModDXGI , "CreateDXGIFactory");
}
// This may fail if this machine isn't Windows Vista SP1 or later
g_hModD3D101 = LoadLibrary("d3d10_1.dll");
if (g_hModD3D101 != NULL)
{
sFnPtr_D3D10CreateDevice1 = (LPD3D10CREATEDEVICE1) GetProcAddress(g_hModD3D101, "D3D10CreateDevice1");
sFnPtr_D3D10CreateDeviceAndSwapChain1 = (LPD3D10CREATEDEVICEANDSWAPCHAIN1) GetProcAddress(g_hModD3D101, "D3D10CreateDeviceAndSwapChain1");
}
if (g_hModD3D10 == NULL || g_hModDXGI == NULL || g_hModD3D101 == NULL)
{
dynlinkUnloadD3D10API();
return false;
}
return true;
}
#endif

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

78
Common/multithreading.cpp Normal file
View File

@ -0,0 +1,78 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <multithreading.h>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Create thread
CUTThread cutStartThread(CUT_THREADROUTINE func, void *data) {
return CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, data, 0, NULL);
}
// Wait for thread to finish
void cutEndThread(CUTThread thread) {
WaitForSingleObject(thread, INFINITE);
CloseHandle(thread);
}
// Destroy thread
void cutDestroyThread(CUTThread thread) {
TerminateThread(thread, 0);
CloseHandle(thread);
}
// Wait for multiple threads
void cutWaitForThreads(const CUTThread *threads, int num) {
WaitForMultipleObjects(num, threads, true, INFINITE);
for (int i = 0; i < num; i++) {
CloseHandle(threads[i]);
}
}
#else
// Create thread
CUTThread cutStartThread(CUT_THREADROUTINE func, void *data) {
pthread_t thread;
pthread_create(&thread, NULL, func, data);
return thread;
}
// Wait for thread to finish
void cutEndThread(CUTThread thread) { pthread_join(thread, NULL); }
// Destroy thread
void cutDestroyThread(CUTThread thread) { pthread_cancel(thread); }
// Wait for multiple threads
void cutWaitForThreads(const CUTThread *threads, int num) {
for (int i = 0; i < num; i++) {
cutEndThread(threads[i]);
}
}
#endif

76
Common/multithreading.h Normal file
View File

@ -0,0 +1,76 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef MULTITHREADING_H
#define MULTITHREADING_H
//Simple portable thread library.
//Windows threads.
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#include <windows.h>
typedef HANDLE CUTThread;
typedef unsigned(WINAPI *CUT_THREADROUTINE)(void *);
#define CUT_THREADPROC unsigned WINAPI
#define CUT_THREADEND return 0
#else
//POSIX threads.
#include <pthread.h>
typedef pthread_t CUTThread;
typedef void *(*CUT_THREADROUTINE)(void *);
#define CUT_THREADPROC void
#define CUT_THREADEND
#endif
#ifdef __cplusplus
extern "C" {
#endif
//Create thread.
CUTThread cutStartThread(CUT_THREADROUTINE, void *data);
//Wait for thread to finish.
void cutEndThread(CUTThread thread);
//Destroy thread.
void cutDestroyThread(CUTThread thread);
//Wait for multiple threads.
void cutWaitForThreads(const CUTThread *threads, int num);
#ifdef __cplusplus
} //extern "C"
#endif
#endif //MULTITHREADING_H

111
Common/nvMath.h Normal file
View File

@ -0,0 +1,111 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Template math library for common 3D functionality
//
// This code is in part deriver from glh, a cross platform glut helper library.
// The copyright for glh follows this notice.
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2000 Cass Everitt
Copyright (c) 2000 NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or
without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* The names of contributors to this software may not be used
to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Cass Everitt - cass@r3.nu
*/
#ifndef NV_MATH_H
#define NV_MATH_H
#include <math.h>
#include <nvVector.h>
#include <nvMatrix.h>
#include <nvQuaternion.h>
#define NV_PI float(3.1415926535897932384626433832795)
namespace nv
{
typedef vec2<float> vec2f;
typedef vec3<float> vec3f;
typedef vec3<int> vec3i;
typedef vec3<unsigned int> vec3ui;
typedef vec4<float> vec4f;
typedef matrix4<float> matrix4f;
typedef quaternion<float> quaternionf;
inline void applyRotation(const quaternionf &r)
{
float angle;
vec3f axis;
r.get_value(axis, angle);
glRotatef(angle/3.1415926f * 180.0f, axis[0], axis[1], axis[2]);
}
};
#endif

540
Common/nvMatrix.h Normal file
View File

@ -0,0 +1,540 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Template math library for common 3D functionality
//
// nvMatrix.h - template matrix code
//
// This code is in part deriver from glh, a cross platform glut helper library.
// The copyright for glh follows this notice.
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2000 Cass Everitt
Copyright (c) 2000 NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or
without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* The names of contributors to this software may not be used
to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Cass Everitt - cass@r3.nu
*/
#ifndef NV_MATRIX_H
#define NV_MATRIX_H
namespace nv
{
template <class T> class vec2;
template <class T> class vec3;
template <class T> class vec4;
////////////////////////////////////////////////////////////////////////////////
//
// Matrix
//
////////////////////////////////////////////////////////////////////////////////
template<class T>
class matrix4
{
public:
matrix4()
{
make_identity();
}
matrix4(T t)
{
set_value(t);
}
matrix4(const T *m)
{
set_value(m);
}
matrix4(T a00, T a01, T a02, T a03,
T a10, T a11, T a12, T a13,
T a20, T a21, T a22, T a23,
T a30, T a31, T a32, T a33) :
_11(a00), _12(a01), _13(a02), _14(a03),
_21(a10), _22(a11), _23(a12), _24(a13),
_31(a20), _32(a21), _33(a22), _34(a23),
_41(a30), _42(a31), _43(a32), _44(a33)
{}
void get_value(T *mp) const
{
int c = 0;
for (int j=0; j < 4; j++)
for (int i=0; i < 4; i++)
{
mp[c++] = element(i,j);
}
}
const T *get_value() const
{
return _array;
}
void set_value(T *mp)
{
int c = 0;
for (int j=0; j < 4; j++)
for (int i=0; i < 4; i++)
{
element(i,j) = mp[c++];
}
}
void set_value(T r)
{
for (int i=0; i < 4; i++)
for (int j=0; j < 4; j++)
{
element(i,j) = r;
}
}
void make_identity()
{
element(0,0) = 1.0;
element(0,1) = 0.0;
element(0,2) = 0.0;
element(0,3) = 0.0;
element(1,0) = 0.0;
element(1,1) = 1.0;
element(1,2) = 0.0;
element(1,3) = 0.0;
element(2,0) = 0.0;
element(2,1) = 0.0;
element(2,2) = 1.0;
element(2,3) = 0.0;
element(3,0) = 0.0;
element(3,1) = 0.0;
element(3,2) = 0.0;
element(3,3) = 1.0;
}
// set a uniform scale
void set_scale(T s)
{
element(0,0) = s;
element(1,1) = s;
element(2,2) = s;
}
void set_scale(const vec3<T> &s)
{
for (int i = 0; i < 3; i++)
{
element(i,i) = s[i];
}
}
void set_translate(const vec3<T> &t)
{
for (int i = 0; i < 3; i++)
{
element(i,3) = t[i];
}
}
void set_row(int r, const vec4<T> &t)
{
for (int i = 0; i < 4; i++)
{
element(r,i) = t[i];
}
}
void set_column(int c, const vec4<T> &t)
{
for (int i = 0; i < 4; i++)
{
element(i,c) = t[i];
}
}
vec4<T> get_row(int r) const
{
vec4<T> v;
for (int i = 0; i < 4; i++)
{
v[i] = element(r,i);
}
return v;
}
vec4<T> get_column(int c) const
{
vec4<T> v;
for (int i = 0; i < 4; i++)
{
v[i] = element(i,c);
}
return v;
}
friend matrix4 inverse(const matrix4 &m)
{
matrix4 minv;
T r1[8], r2[8], r3[8], r4[8];
T *s[4], *tmprow;
s[0] = &r1[0];
s[1] = &r2[0];
s[2] = &r3[0];
s[3] = &r4[0];
register int i,j,p,jj;
for (i=0; i<4; i++)
{
for (j=0; j<4; j++)
{
s[i][j] = m.element(i,j);
if (i==j)
{
s[i][j+4] = 1.0;
}
else
{
s[i][j+4] = 0.0;
}
}
}
T scp[4];
for (i=0; i<4; i++)
{
scp[i] = T(fabs(s[i][0]));
for (j=1; j<4; j++)
if (T(fabs(s[i][j])) > scp[i])
{
scp[i] = T(fabs(s[i][j]));
}
if (scp[i] == 0.0)
{
return minv; // singular matrix!
}
}
int pivot_to;
T scp_max;
for (i=0; i<4; i++)
{
// select pivot row
pivot_to = i;
scp_max = T(fabs(s[i][i]/scp[i]));
// find out which row should be on top
for (p=i+1; p<4; p++)
if (T(fabs(s[p][i]/scp[p])) > scp_max)
{
scp_max = T(fabs(s[p][i]/scp[p]));
pivot_to = p;
}
// Pivot if necessary
if (pivot_to != i)
{
tmprow = s[i];
s[i] = s[pivot_to];
s[pivot_to] = tmprow;
T tmpscp;
tmpscp = scp[i];
scp[i] = scp[pivot_to];
scp[pivot_to] = tmpscp;
}
T mji;
// perform gaussian elimination
for (j=i+1; j<4; j++)
{
mji = s[j][i]/s[i][i];
s[j][i] = 0.0;
for (jj=i+1; jj<8; jj++)
{
s[j][jj] -= mji*s[i][jj];
}
}
}
if (s[3][3] == 0.0)
{
return minv; // singular matrix!
}
//
// Now we have an upper triangular matrix.
//
// x x x x | y y y y
// 0 x x x | y y y y
// 0 0 x x | y y y y
// 0 0 0 x | y y y y
//
// we'll back substitute to get the inverse
//
// 1 0 0 0 | z z z z
// 0 1 0 0 | z z z z
// 0 0 1 0 | z z z z
// 0 0 0 1 | z z z z
//
T mij;
for (i=3; i>0; i--)
{
for (j=i-1; j > -1; j--)
{
mij = s[j][i]/s[i][i];
for (jj=j+1; jj<8; jj++)
{
s[j][jj] -= mij*s[i][jj];
}
}
}
for (i=0; i<4; i++)
for (j=0; j<4; j++)
{
minv(i,j) = s[i][j+4] / s[i][i];
}
return minv;
}
friend matrix4 transpose(const matrix4 &m)
{
matrix4 mtrans;
for (int i=0; i<4; i++)
for (int j=0; j<4; j++)
{
mtrans(i,j) = m.element(j,i);
}
return mtrans;
}
matrix4 &operator *= (const matrix4 &rhs)
{
matrix4 mt(*this);
set_value(T(0));
for (int i=0; i < 4; i++)
for (int j=0; j < 4; j++)
for (int c=0; c < 4; c++)
{
element(i,j) += mt(i,c) * rhs(c,j);
}
return *this;
}
friend matrix4 operator * (const matrix4 &lhs, const matrix4 &rhs)
{
matrix4 r(T(0));
for (int i=0; i < 4; i++)
for (int j=0; j < 4; j++)
for (int c=0; c < 4; c++)
{
r.element(i,j) += lhs(i,c) * rhs(c,j);
}
return r;
}
// dst = M * src
vec4<T> operator *(const vec4<T> &src) const
{
vec4<T> r;
for (int i = 0; i < 4; i++)
r[i] = (src[0] * element(i,0) + src[1] * element(i,1) +
src[2] * element(i,2) + src[3] * element(i,3));
return r;
}
// dst = src * M
friend vec4<T> operator *(const vec4<T> &lhs, const matrix4 &rhs)
{
vec4<T> r;
for (int i = 0; i < 4; i++)
r[i] = (lhs[0] * rhs.element(0,i) + lhs[1] * rhs.element(1,i) +
lhs[2] * rhs.element(2,i) + lhs[3] * rhs.element(3,i));
return r;
}
T &operator()(int row, int col)
{
return element(row,col);
}
const T &operator()(int row, int col) const
{
return element(row,col);
}
T &element(int row, int col)
{
return _array[row | (col<<2)];
}
const T &element(int row, int col) const
{
return _array[row | (col<<2)];
}
matrix4 &operator *= (const T &r)
{
for (int i = 0; i < 4; ++i)
{
element(0,i) *= r;
element(1,i) *= r;
element(2,i) *= r;
element(3,i) *= r;
}
return *this;
}
matrix4 &operator += (const matrix4 &mat)
{
for (int i = 0; i < 4; ++i)
{
element(0,i) += mat.element(0,i);
element(1,i) += mat.element(1,i);
element(2,i) += mat.element(2,i);
element(3,i) += mat.element(3,i);
}
return *this;
}
friend bool operator == (const matrix4 &lhs, const matrix4 &rhs)
{
bool r = true;
for (int i = 0; i < 16; i++)
{
r &= lhs._array[i] == rhs._array[i];
}
return r;
}
friend bool operator != (const matrix4 &lhs, const matrix4 &rhs)
{
bool r = true;
for (int i = 0; i < 16; i++)
{
r &= lhs._array[i] != rhs._array[i];
}
return r;
}
union
{
struct
{
T _11, _12, _13, _14; // standard names for components
T _21, _22, _23, _24; // standard names for components
T _31, _32, _33, _34; // standard names for components
T _41, _42, _43, _44; // standard names for components
};
T _array[16]; // array access
};
};
};
#endif

530
Common/nvQuaternion.h Normal file
View File

@ -0,0 +1,530 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Template math library for common 3D functionality
//
// nvQuaterion.h - quaternion template and utility functions
//
// This code is in part deriver from glh, a cross platform glut helper library.
// The copyright for glh follows this notice.
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
/*
Copyright (c) 2000 Cass Everitt
Copyright (c) 2000 NVIDIA Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or
without modification, are permitted provided that the following
conditions are met:
* Redistributions of source code must retain the above
copyright notice, this list of conditions and the following
disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* The names of contributors to this software may not be used
to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
Cass Everitt - cass@r3.nu
*/
#ifndef NV_QUATERNION_H
#define NV_QUATERNION_H
namespace nv
{
template <class T> class vec2;
template <class T> class vec3;
template <class T> class vec4;
////////////////////////////////////////////////////////////////////////////////
//
// Quaternion
//
////////////////////////////////////////////////////////////////////////////////
template< class T>
class quaternion
{
public:
quaternion() : x(0.0), y(0.0), z(0.0), w(0.0)
{
}
quaternion(const T v[4])
{
set_value(v);
}
quaternion(T q0, T q1, T q2, T q3)
{
set_value(q0, q1, q2, q3);
}
quaternion(const matrix4<T> &m)
{
set_value(m);
}
quaternion(const vec3<T> &axis, T radians)
{
set_value(axis, radians);
}
quaternion(const vec3<T> &rotateFrom, const vec3<T> &rotateTo)
{
set_value(rotateFrom, rotateTo);
}
quaternion(const vec3<T> &from_look, const vec3<T> &from_up,
const vec3<T> &to_look, const vec3<T> &to_up)
{
set_value(from_look, from_up, to_look, to_up);
}
const T *get_value() const
{
return &_array[0];
}
void get_value(T &q0, T &q1, T &q2, T &q3) const
{
q0 = _array[0];
q1 = _array[1];
q2 = _array[2];
q3 = _array[3];
}
quaternion &set_value(T q0, T q1, T q2, T q3)
{
_array[0] = q0;
_array[1] = q1;
_array[2] = q2;
_array[3] = q3;
return *this;
}
void get_value(vec3<T> &axis, T &radians) const
{
radians = T(acos(_array[3]) * T(2.0));
if (radians == T(0.0))
{
axis = vec3<T>(0.0, 0.0, 1.0);
}
else
{
axis[0] = _array[0];
axis[1] = _array[1];
axis[2] = _array[2];
axis = normalize(axis);
}
}
void get_value(matrix4<T> &m) const
{
T s, xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
T norm = _array[0] * _array[0] + _array[1] * _array[1] + _array[2] * _array[2] + _array[3] * _array[3];
s = (norm == T(0.0)) ? T(0.0) : (T(2.0) / norm);
xs = _array[0] * s;
ys = _array[1] * s;
zs = _array[2] * s;
wx = _array[3] * xs;
wy = _array[3] * ys;
wz = _array[3] * zs;
xx = _array[0] * xs;
xy = _array[0] * ys;
xz = _array[0] * zs;
yy = _array[1] * ys;
yz = _array[1] * zs;
zz = _array[2] * zs;
m(0,0) = T(T(1.0) - (yy + zz));
m(1,0) = T(xy + wz);
m(2,0) = T(xz - wy);
m(0,1) = T(xy - wz);
m(1,1) = T(T(1.0) - (xx + zz));
m(2,1) = T(yz + wx);
m(0,2) = T(xz + wy);
m(1,2) = T(yz - wx);
m(2,2) = T(T(1.0) - (xx + yy));
m(3,0) = m(3,1) = m(3,2) = m(0,3) = m(1,3) = m(2,3) = T(0.0);
m(3,3) = T(1.0);
}
quaternion &set_value(const T *qp)
{
for (int i = 0; i < 4; i++)
{
_array[i] = qp[i];
}
return *this;
}
quaternion &set_value(const matrix4<T> &m)
{
T tr, s;
int i, j, k;
const int nxt[3] = { 1, 2, 0 };
tr = m(0,0) + m(1,1) + m(2,2);
if (tr > T(0))
{
s = T(sqrt(tr + m(3,3)));
_array[3] = T(s * 0.5);
s = T(0.5) / s;
_array[0] = T((m(1,2) - m(2,1)) * s);
_array[1] = T((m(2,0) - m(0,2)) * s);
_array[2] = T((m(0,1) - m(1,0)) * s);
}
else
{
i = 0;
if (m(1,1) > m(0,0))
{
i = 1;
}
if (m(2,2) > m(i,i))
{
i = 2;
}
j = nxt[i];
k = nxt[j];
s = T(sqrt((m(i,j) - (m(j,j) + m(k,k))) + T(1.0)));
_array[i] = T(s * 0.5);
s = T(0.5 / s);
_array[3] = T((m(j,k) - m(k,j)) * s);
_array[j] = T((m(i,j) + m(j,i)) * s);
_array[k] = T((m(i,k) + m(k,i)) * s);
}
return *this;
}
quaternion &set_value(const vec3<T> &axis, T theta)
{
T sqnorm = square_norm(axis);
if (sqnorm == T(0.0))
{
// axis too small.
x = y = z = T(0.0);
w = T(1.0);
}
else
{
theta *= T(0.5);
T sin_theta = T(sin(theta));
if (sqnorm != T(1))
{
sin_theta /= T(sqrt(sqnorm));
}
x = sin_theta * axis[0];
y = sin_theta * axis[1];
z = sin_theta * axis[2];
w = T(cos(theta));
}
return *this;
}
quaternion &set_value(const vec3<T> &rotateFrom, const vec3<T> &rotateTo)
{
vec3<T> p1, p2;
T alpha;
p1 = normalize(rotateFrom);
p2 = normalize(rotateTo);
alpha = dot(p1, p2);
if (alpha == T(1.0))
{
*this = quaternion();
return *this;
}
// ensures that the anti-parallel case leads to a positive dot
if (alpha == T(-1.0))
{
vec3<T> v;
if (p1[0] != p1[1] || p1[0] != p1[2])
{
v = vec3<T>(p1[1], p1[2], p1[0]);
}
else
{
v = vec3<T>(-p1[0], p1[1], p1[2]);
}
v -= p1 * dot(p1, v);
v = normalize(v);
set_value(v, T(3.1415926));
return *this;
}
p1 = normalize(cross(p1, p2));
set_value(p1,T(acos(alpha)));
return *this;
}
quaternion &set_value(const vec3<T> &from_look, const vec3<T> &from_up,
const vec3<T> &to_look, const vec3<T> &to_up)
{
quaternion r_look = quaternion(from_look, to_look);
vec3<T> rotated_from_up(from_up);
r_look.mult_vec(rotated_from_up);
quaternion r_twist = quaternion(rotated_from_up, to_up);
*this = r_twist;
*this *= r_look;
return *this;
}
quaternion &operator *= (const quaternion<T> &qr)
{
quaternion ql(*this);
w = ql.w * qr.w - ql.x * qr.x - ql.y * qr.y - ql.z * qr.z;
x = ql.w * qr.x + ql.x * qr.w + ql.y * qr.z - ql.z * qr.y;
y = ql.w * qr.y + ql.y * qr.w + ql.z * qr.x - ql.x * qr.z;
z = ql.w * qr.z + ql.z * qr.w + ql.x * qr.y - ql.y * qr.x;
return *this;
}
friend quaternion normalize(const quaternion<T> &q)
{
quaternion r(q);
T rnorm = T(1.0) / T(sqrt(q.w * q.w + q.x * q.x + q.y * q.y + q.z * q.z));
r.x *= rnorm;
r.y *= rnorm;
r.z *= rnorm;
r.w *= rnorm;
}
friend quaternion<T> conjugate(const quaternion<T> &q)
{
quaternion<T> r(q);
r._array[0] *= T(-1.0);
r._array[1] *= T(-1.0);
r._array[2] *= T(-1.0);
return r;
}
friend quaternion<T> inverse(const quaternion<T> &q)
{
return conjugate(q);
}
//
// Quaternion multiplication with cartesian vector
// v' = q*v*q(star)
//
void mult_vec(const vec3<T> &src, vec3<T> &dst) const
{
T v_coef = w * w - x * x - y * y - z * z;
T u_coef = T(2.0) * (src[0] * x + src[1] * y + src[2] * z);
T c_coef = T(2.0) * w;
dst.v[0] = v_coef * src.v[0] + u_coef * x + c_coef * (y * src.v[2] - z * src.v[1]);
dst.v[1] = v_coef * src.v[1] + u_coef * y + c_coef * (z * src.v[0] - x * src.v[2]);
dst.v[2] = v_coef * src.v[2] + u_coef * z + c_coef * (x * src.v[1] - y * src.v[0]);
}
void mult_vec(vec3<T> &src_and_dst) const
{
mult_vec(vec3<T>(src_and_dst), src_and_dst);
}
void scale_angle(T scaleFactor)
{
vec3<T> axis;
T radians;
get_value(axis, radians);
radians *= scaleFactor;
set_value(axis, radians);
}
friend quaternion<T> slerp(const quaternion<T> &p, const quaternion<T> &q, T alpha)
{
quaternion r;
T cos_omega = p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
// if B is on opposite hemisphere from A, use -B instead
int bflip;
if ((bflip = (cos_omega < T(0))))
{
cos_omega = -cos_omega;
}
// complementary interpolation parameter
T beta = T(1) - alpha;
if (cos_omega >= T(1))
{
return p;
}
T omega = T(acos(cos_omega));
T one_over_sin_omega = T(1.0) / T(sin(omega));
beta = T(sin(omega*beta) * one_over_sin_omega);
alpha = T(sin(omega*alpha) * one_over_sin_omega);
if (bflip)
{
alpha = -alpha;
}
r.x = beta * p._array[0]+ alpha * q._array[0];
r.y = beta * p._array[1]+ alpha * q._array[1];
r.z = beta * p._array[2]+ alpha * q._array[2];
r.w = beta * p._array[3]+ alpha * q._array[3];
return r;
}
T &operator [](int i)
{
return _array[i];
}
const T &operator [](int i) const
{
return _array[i];
}
friend bool operator == (const quaternion<T> &lhs, const quaternion<T> &rhs)
{
bool r = true;
for (int i = 0; i < 4; i++)
{
r &= lhs._array[i] == rhs._array[i];
}
return r;
}
friend bool operator != (const quaternion<T> &lhs, const quaternion<T> &rhs)
{
bool r = true;
for (int i = 0; i < 4; i++)
{
r &= lhs._array[i] == rhs._array[i];
}
return r;
}
friend quaternion<T> operator * (const quaternion<T> &lhs, const quaternion<T> &rhs)
{
quaternion r(lhs);
r *= rhs;
return r;
}
union
{
struct
{
T x;
T y;
T z;
T w;
};
T _array[4];
};
};
};
#endif

260
Common/nvShaderUtils.h Normal file
View File

@ -0,0 +1,260 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
*
* Utility functions for compiling shaders and programs
*
* Author: Evan Hart
* Copyright (c) NVIDIA Corporation. All rights reserved.
*
*/
#ifndef NV_SHADER_UTILS_H
#define NV_SHADER_UTILS_H
#include <stdio.h>
#include <string.h>
namespace nv {
//
//
////////////////////////////////////////////////////////////
inline GLuint CompileGLSLShader(GLenum target, const char *shader) {
GLuint object;
object = glCreateShader(target);
if (!object) {
return object;
}
glShaderSource(object, 1, &shader, NULL);
glCompileShader(object);
// check if shader compiled
GLint compiled = 0;
glGetShaderiv(object, GL_COMPILE_STATUS, &compiled);
if (!compiled) {
#ifdef NV_REPORT_COMPILE_ERRORS
char temp[256] = "";
glGetShaderInfoLog(object, 256, NULL, temp);
fprintf(stderr, "Compile failed:\n%s\n", temp);
#endif
glDeleteShader(object);
return 0;
}
return object;
}
//
//
////////////////////////////////////////////////////////////
inline GLuint CompileGLSLShaderFromFile(GLenum target, const char *filename) {
FILE *shaderFile;
char *text;
long size;
size_t fsize = 0;
// read files as binary to prevent problems from newline translation
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
if (fopen_s(&shaderFile, filename, "rb") != 0)
#else
if ((shaderFile = fopen(filename, "rb")) == 0)
#endif
{
return 0;
}
// Get the length of the file
fseek(shaderFile, 0, SEEK_END);
size = ftell(shaderFile);
// Read the file contents from the start, then close file and add a null
// terminator
fseek(shaderFile, 0, SEEK_SET);
text = new char[size + 1];
fsize = fread(text, size, 1, shaderFile);
fclose(shaderFile);
if (fsize == 0) {
printf("CompileGLSLShaderFromFile(), error... fsize = 0\n");
}
text[size] = '\0';
GLuint object = CompileGLSLShader(target, text);
delete[] text;
return object;
}
// Create a program composed of vertex and fragment shaders.
inline GLuint LinkGLSLProgram(GLuint vertexShader, GLuint fragmentShader) {
GLuint program = glCreateProgram();
glAttachShader(program, vertexShader);
glAttachShader(program, fragmentShader);
glLinkProgram(program);
#ifdef NV_REPORT_COMPILE_ERRORS
// Get error log.
GLint charsWritten, infoLogLength;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength);
char *infoLog = new char[infoLogLength];
glGetProgramInfoLog(program, infoLogLength, &charsWritten, infoLog);
printf(infoLog);
delete[] infoLog;
#endif
// Test linker result.
GLint linkSucceed = GL_FALSE;
glGetProgramiv(program, GL_LINK_STATUS, &linkSucceed);
if (linkSucceed == GL_FALSE) {
glDeleteProgram(program);
return 0;
}
return program;
}
// Create a program composed of vertex, geometry and fragment shaders.
inline GLuint LinkGLSLProgram(GLuint vertexShader, GLuint geometryShader,
GLint inputType, GLint vertexOut,
GLint outputType, GLuint fragmentShader) {
GLuint program = glCreateProgram();
glAttachShader(program, vertexShader);
glAttachShader(program, geometryShader);
glProgramParameteriEXT(program, GL_GEOMETRY_INPUT_TYPE_EXT, inputType);
glProgramParameteriEXT(program, GL_GEOMETRY_VERTICES_OUT_EXT, vertexOut);
glProgramParameteriEXT(program, GL_GEOMETRY_OUTPUT_TYPE_EXT, outputType);
glAttachShader(program, fragmentShader);
glLinkProgram(program);
#ifdef NV_REPORT_COMPILE_ERRORS
// Get error log.
GLint charsWritten, infoLogLength;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength);
char *infoLog = new char[infoLogLength];
glGetProgramInfoLog(program, infoLogLength, &charsWritten, infoLog);
printf(infoLog);
delete[] infoLog;
#endif
// Test linker result.
GLint linkSucceed = GL_FALSE;
glGetProgramiv(program, GL_LINK_STATUS, &linkSucceed);
if (linkSucceed == GL_FALSE) {
glDeleteProgram(program);
return 0;
}
return program;
}
//
//
////////////////////////////////////////////////////////////
inline GLuint CompileASMShader(GLenum program_type, const char *code) {
GLuint program_id;
glGenProgramsARB(1, &program_id);
glBindProgramARB(program_type, program_id);
glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB,
(GLsizei)strlen(code), (GLubyte *)code);
GLint error_pos;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
if (error_pos != -1) {
#ifdef NV_REPORT_COMPILE_ERRORS
const GLubyte *error_string;
error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos,
error_string);
#endif
return 0;
}
return program_id;
}
//
//
////////////////////////////////////////////////////////////
inline GLuint CompileASMShaderFromFile(GLenum target, const char *filename) {
FILE *shaderFile;
char *text;
long size;
size_t fsize = 0;
// read files as binary to prevent problems from newline translation
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
if (fopen_s(&shaderFile, filename, "rb") != 0)
#else
if ((shaderFile = fopen(filename, "rb")) == 0)
#endif
{
return 0;
}
// Get the length of the file
fseek(shaderFile, 0, SEEK_END);
size = ftell(shaderFile);
// Read the file contents from the start, then close file and add a null
// terminator
fseek(shaderFile, 0, SEEK_SET);
text = new char[size + 1];
fsize = fread(text, size, 1, shaderFile);
fclose(shaderFile);
if (fsize == 0) {
printf("CompileGLSLShaderFromFile(), error... fsize = 0\n");
}
text[size] = '\0';
GLuint program_id = CompileASMShader(target, text);
delete[] text;
return program_id;
}
} // namespace nv
#endif

1074
Common/nvVector.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

236
Common/param.h Normal file
View File

@ -0,0 +1,236 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Simple parameter system
sgreen@nvidia.com 4/2001
*/
#ifndef PARAM_H
#define PARAM_H
#include <iomanip>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
// base class for named parameter
class ParamBase {
public:
ParamBase(const char *name) : m_name(name) {}
virtual ~ParamBase() {}
std::string &GetName() { return m_name; }
virtual float GetFloatValue() = 0;
virtual int GetIntValue() = 0;
virtual std::string GetValueString() = 0;
virtual void Reset() = 0;
virtual void Increment() = 0;
virtual void Decrement() = 0;
virtual float GetPercentage() = 0;
virtual void SetPercentage(float p) = 0;
virtual void Write(std::ostream &stream) = 0;
virtual void Read(std::istream &stream) = 0;
virtual bool IsList() = 0;
protected:
std::string m_name;
};
// derived class for single-valued parameter
template <class T>
class Param : public ParamBase {
public:
Param(const char *name, T value = 0, T min = 0, T max = 10000, T step = 1,
T *ptr = 0)
: ParamBase(name),
m_default(value),
m_min(min),
m_max(max),
m_step(step),
m_precision(3) {
if (ptr) {
m_ptr = ptr;
} else {
m_ptr = &m_value;
}
*m_ptr = value;
}
~Param() {}
T GetValue() const { return *m_ptr; }
T SetValue(const T value) { *m_ptr = value; }
float GetFloatValue() { return (float)*m_ptr; }
int GetIntValue() { return (int)*m_ptr; }
std::string GetValueString() {
std::ostringstream ost;
ost << std::setprecision(m_precision) << std::fixed;
ost << *m_ptr;
return ost.str();
}
void SetPrecision(int x) { m_precision = x; }
float GetPercentage() { return (*m_ptr - m_min) / (float)(m_max - m_min); }
void SetPercentage(float p) { *m_ptr = (T)(m_min + p * (m_max - m_min)); }
void Reset() { *m_ptr = m_default; }
void Increment() {
*m_ptr += m_step;
if (*m_ptr > m_max) {
*m_ptr = m_max;
}
}
void Decrement() {
*m_ptr -= m_step;
if (*m_ptr < m_min) {
*m_ptr = m_min;
}
}
void Write(std::ostream &stream) {
stream << m_name << " " << *m_ptr << '\n';
}
void Read(std::istream &stream) { stream >> m_name >> *m_ptr; }
bool IsList() { return false; }
private:
T m_value;
T *m_ptr; // pointer to value declared elsewhere
T m_default, m_min, m_max, m_step;
int m_precision; // number of digits after decimal point in string output
};
const Param<int> dummy("error");
// list of parameters
class ParamList : public ParamBase {
public:
ParamList(const char *name = "") : ParamBase(name) { active = true; }
~ParamList() {}
float GetFloatValue() { return 0.0f; }
int GetIntValue() { return 0; }
void AddParam(ParamBase *param) {
m_params.push_back(param);
m_map[param->GetName()] = param;
m_current = m_params.begin();
}
// look-up parameter based on name
ParamBase *GetParam(char *name) {
ParamBase *p = m_map[name];
if (p) {
return p;
} else {
return (ParamBase *)&dummy;
}
}
ParamBase *GetParam(int i) { return m_params[i]; }
ParamBase *GetCurrent() { return *m_current; }
int GetSize() { return (int)m_params.size(); }
std::string GetValueString() { return m_name; }
// functions to traverse list
void Reset() { m_current = m_params.begin(); }
void Increment() {
m_current++;
if (m_current == m_params.end()) {
m_current = m_params.begin();
}
}
void Decrement() {
if (m_current == m_params.begin()) {
m_current = m_params.end() - 1;
} else {
m_current--;
}
}
float GetPercentage() { return 0.0f; }
void SetPercentage(float /*p*/) {}
void Write(std::ostream &stream) {
stream << m_name << '\n';
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
p != m_params.end(); ++p) {
(*p)->Write(stream);
}
}
void Read(std::istream &stream) {
stream >> m_name;
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
p != m_params.end(); ++p) {
(*p)->Read(stream);
}
}
bool IsList() { return true; }
void ResetAll() {
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
p != m_params.end(); ++p) {
(*p)->Reset();
}
}
protected:
bool active;
std::vector<ParamBase *> m_params;
std::map<std::string, ParamBase *> m_map;
std::vector<ParamBase *>::const_iterator m_current;
};
#endif

307
Common/paramgl.h Normal file
View File

@ -0,0 +1,307 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
ParamListGL
- class derived from ParamList to do simple OpenGL rendering of a parameter
list sgg 8/2001
*/
#ifndef PARAMGL_H
#define PARAMGL_H
#if defined(__APPLE__) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/freeglut.h>
#endif
#include <param.h>
#include <string.h>
inline void beginWinCoords(void) {
glMatrixMode(GL_MODELVIEW);
glPushMatrix();
glLoadIdentity();
glTranslatef(0.0, (GLfloat)(glutGet(GLUT_WINDOW_HEIGHT) - 1.0), 0.0);
glScalef(1.0, -1.0, 1.0);
glMatrixMode(GL_PROJECTION);
glPushMatrix();
glLoadIdentity();
glOrtho(0, glutGet(GLUT_WINDOW_WIDTH), 0, glutGet(GLUT_WINDOW_HEIGHT), -1, 1);
glMatrixMode(GL_MODELVIEW);
}
inline void endWinCoords(void) {
glMatrixMode(GL_PROJECTION);
glPopMatrix();
glMatrixMode(GL_MODELVIEW);
glPopMatrix();
}
inline void glPrint(int x, int y, const char *s, void *font) {
glRasterPos2f((GLfloat)x, (GLfloat)y);
int len = (int)strlen(s);
for (int i = 0; i < len; i++) {
glutBitmapCharacter(font, s[i]);
}
}
inline void glPrintShadowed(int x, int y, const char *s, void *font,
float *color) {
glColor3f(0.0, 0.0, 0.0);
glPrint(x - 1, y - 1, s, font);
glColor3fv((GLfloat *)color);
glPrint(x, y, s, font);
}
class ParamListGL : public ParamList {
public:
ParamListGL(const char *name = "")
: ParamList(name),
m_active(true),
m_text_color_selected(1.0, 1.0, 1.0),
m_text_color_unselected(0.75, 0.75, 0.75),
m_text_color_shadow(0.0, 0.0, 0.0),
m_bar_color_outer(0.25, 0.25, 0.25),
m_bar_color_inner(1.0, 1.0, 1.0) {
m_font = (void *)GLUT_BITMAP_9_BY_15; // GLUT_BITMAP_8_BY_13;
m_font_h = 15;
m_bar_x = 260;
m_bar_w = 250;
m_bar_h = 10;
m_bar_offset = 5;
m_text_x = 5;
m_separation = 15;
m_value_x = 200;
m_start_x = 0;
m_start_y = 0;
}
void Render(int x, int y, bool shadow = false) {
beginWinCoords();
m_start_x = x;
m_start_y = y;
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
p != m_params.end(); ++p) {
if ((*p)->IsList()) {
ParamListGL *list = (ParamListGL *)(*p);
list->Render(x + 10, y);
y += m_separation * list->GetSize();
} else {
if (p == m_current) {
glColor3fv(&m_text_color_selected.r);
} else {
glColor3fv(&m_text_color_unselected.r);
}
if (shadow) {
glPrintShadowed(x + m_text_x, y + m_font_h, (*p)->GetName().c_str(),
m_font,
(p == m_current) ? &m_text_color_selected.r
: &m_text_color_unselected.r);
glPrintShadowed(x + m_value_x, y + m_font_h,
(*p)->GetValueString().c_str(), m_font,
(p == m_current) ? &m_text_color_selected.r
: &m_text_color_unselected.r);
} else {
glPrint(x + m_text_x, y + m_font_h, (*p)->GetName().c_str(), m_font);
glPrint(x + m_value_x, y + m_font_h, (*p)->GetValueString().c_str(),
m_font);
}
glColor3fv((GLfloat *)&m_bar_color_outer.r);
glBegin(GL_LINE_LOOP);
glVertex2f((GLfloat)(x + m_bar_x), (GLfloat)(y + m_bar_offset));
glVertex2f((GLfloat)(x + m_bar_x + m_bar_w),
(GLfloat)(y + m_bar_offset));
glVertex2f((GLfloat)(x + m_bar_x + m_bar_w),
(GLfloat)(y + m_bar_offset + m_bar_h));
glVertex2f((GLfloat)(x + m_bar_x),
(GLfloat)(y + m_bar_offset + m_bar_h));
glEnd();
glColor3fv((GLfloat *)&m_bar_color_inner.r);
glRectf(
(GLfloat)(x + m_bar_x), (GLfloat)(y + m_bar_offset + m_bar_h),
(GLfloat)(x + m_bar_x + ((m_bar_w - 1) * (*p)->GetPercentage())),
(GLfloat)(y + m_bar_offset + 1));
y += m_separation;
}
}
endWinCoords();
}
bool Mouse(int x, int y, int button = GLUT_LEFT_BUTTON,
int state = GLUT_DOWN) {
if ((y < m_start_y) ||
(y > (int)(m_start_y + (m_separation * m_params.size()) - 1))) {
m_active = false;
return false;
}
m_active = true;
int i = (y - m_start_y) / m_separation;
if ((button == GLUT_LEFT_BUTTON) && (state == GLUT_DOWN)) {
#if defined(__GNUC__) && (__GNUC__ < 3)
m_current = &m_params[i];
#else
// MJH: workaround since the version of vector::at used here is
// non-standard
for (m_current = m_params.begin(); m_current != m_params.end() && i > 0;
m_current++, i--)
;
// m_current = (std::vector<ParamBase
// *>::const_iterator)&m_params.at(i);
#endif
if ((x > m_bar_x) && (x < m_bar_x + m_bar_w)) {
Motion(x, y);
}
}
return true;
}
bool Motion(int x, int y) {
if ((y < m_start_y) ||
(y > m_start_y + (m_separation * (int)m_params.size()) - 1)) {
return false;
}
if (x < m_bar_x) {
(*m_current)->SetPercentage(0.0);
return true;
}
if (x > m_bar_x + m_bar_w) {
(*m_current)->SetPercentage(1.0);
return true;
}
(*m_current)->SetPercentage((x - m_bar_x) / (float)m_bar_w);
return true;
}
void Special(int key, int x, int y) {
if (!m_active) return;
switch (key) {
case GLUT_KEY_DOWN:
Increment();
break;
case GLUT_KEY_UP:
Decrement();
break;
case GLUT_KEY_RIGHT:
GetCurrent()->Increment();
break;
case GLUT_KEY_LEFT:
GetCurrent()->Decrement();
break;
case GLUT_KEY_HOME:
GetCurrent()->Reset();
break;
case GLUT_KEY_END:
GetCurrent()->SetPercentage(1.0);
break;
}
glutPostRedisplay();
}
void SetFont(void *font, int height) {
m_font = font;
m_font_h = height;
}
void SetSelectedColor(float r, float g, float b) {
m_text_color_selected = Color(r, g, b);
}
void SetUnSelectedColor(float r, float g, float b) {
m_text_color_unselected = Color(r, g, b);
}
void SetBarColorInner(float r, float g, float b) {
m_bar_color_inner = Color(r, g, b);
}
void SetBarColorOuter(float r, float g, float b) {
m_bar_color_outer = Color(r, g, b);
}
void SetActive(bool b) { m_active = b; }
private:
void *m_font;
int m_font_h; // font height
int m_bar_x; // bar start x position
int m_bar_w; // bar width
int m_bar_h; // bar height
int m_text_x; // text start x position
int m_separation; // bar separation in y
int m_value_x; // value text x position
int m_bar_offset; // bar offset in y
int m_start_x, m_start_y;
bool m_active;
struct Color {
Color(float _r, float _g, float _b) {
r = _r;
g = _g;
b = _b;
}
float r, g, b;
};
Color m_text_color_selected;
Color m_text_color_unselected;
Color m_text_color_shadow;
Color m_bar_color_outer;
Color m_bar_color_inner;
};
#endif

View File

@ -0,0 +1,128 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
////////////////////////////////////////////////////////////////////////////////
//
// Utility funcs to wrap up saving a surface or the back buffer as a PPM file
// In addition, wraps up a threshold comparision of two PPMs.
//
// These functions are designed to be used to implement an automated QA testing
// for SDK samples.
//
// Author: Bryan Dudash
// Email: sdkfeedback@nvidia.com
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
#include <helper_functions.h>
#include <rendercheck_d3d10.h>
HRESULT CheckRenderD3D10::ActiveRenderTargetToPPM(ID3D10Device *pDevice,
const char *zFileName) {
ID3D10RenderTargetView *pRTV = NULL;
pDevice->OMGetRenderTargets(1, &pRTV, NULL);
ID3D10Resource *pSourceResource = NULL;
pRTV->GetResource(&pSourceResource);
return ResourceToPPM(pDevice, pSourceResource, zFileName);
}
HRESULT CheckRenderD3D10::ResourceToPPM(ID3D10Device *pDevice,
ID3D10Resource *pResource,
const char *zFileName) {
D3D10_RESOURCE_DIMENSION rType;
pResource->GetType(&rType);
if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) {
printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
return E_FAIL;
}
ID3D10Texture2D *pSourceTexture = (ID3D10Texture2D *)pResource;
ID3D10Texture2D *pTargetTexture = NULL;
D3D10_TEXTURE2D_DESC desc;
pSourceTexture->GetDesc(&desc);
desc.BindFlags = 0;
desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
desc.Usage = D3D10_USAGE_STAGING;
if (FAILED(pDevice->CreateTexture2D(&desc, NULL, &pTargetTexture))) {
printf(
"SurfaceToPPM: Unable to create target Texture resoruce! Aborting... "
"\n");
return E_FAIL;
}
pDevice->CopyResource(pTargetTexture, pSourceTexture);
D3D10_MAPPED_TEXTURE2D mappedTex2D;
pTargetTexture->Map(0, D3D10_MAP_READ, 0, &mappedTex2D);
// Need to convert from dx pitch to pitch=width
unsigned char *pPPMData = new unsigned char[desc.Width * desc.Height * 4];
for (unsigned int iHeight = 0; iHeight < desc.Height; iHeight++) {
memcpy(
&(pPPMData[iHeight * desc.Width * 4]),
(unsigned char *)(mappedTex2D.pData) + iHeight * mappedTex2D.RowPitch,
desc.Width * 4);
}
pTargetTexture->Unmap(0);
// Prepends the PPM header info and bumps byte data afterwards
sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);
delete[] pPPMData;
pTargetTexture->Release();
return S_OK;
}
bool CheckRenderD3D10::PPMvsPPM(const char *src_file, const char *ref_file,
const char *exec_path, const float epsilon,
const float threshold) {
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
if (ref_file_path == NULL) {
printf(
"CheckRenderD3D10::PPMvsPPM unable to find <%s> in <%s> Aborting "
"comparison!\n",
ref_file, exec_path);
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
ref_file);
printf("Aborting comparison!\n");
printf(" FAILURE!\n");
return false;
}
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
true);
}

View File

@ -0,0 +1,53 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#ifndef _RENDERCHECK_D3D10_H_
#define _RENDERCHECK_D3D10_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <d3d10.h>
class CheckRenderD3D10 {
public:
CheckRenderD3D10() {}
static HRESULT ActiveRenderTargetToPPM(ID3D10Device *pDevice,
const char *zFileName);
static HRESULT ResourceToPPM(ID3D10Device *pDevice, ID3D10Resource *pResource,
const char *zFileName);
static bool PPMvsPPM(const char *src_file, const char *ref_file,
const char *exec_path, const float epsilon,
const float threshold = 0.0f);
};
#endif

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

View File

@ -1,4 +1,4 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. /* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions

167
Common/rendercheck_d3d9.cpp Normal file
View File

@ -0,0 +1,167 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
////////////////////////////////////////////////////////////////////////////////
//
// Utility funcs to wrap up savings a surface or the back buffer as a PPM file
// In addition, wraps up a threshold comparision of two PPMs.
//
// These functions are designed to be used to implement an automated QA testing
// for SDK samples.
//
// Author: Bryan Dudash
// Email: sdkfeedback@nvidia.com
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
#include <helper_functions.h>
#include <rendercheck_d3d9.h>
// originally copied from checkrender_gl.cpp and slightly modified
bool CheckRenderD3D9::PPMvsPPM(const char *src_file, const char *ref_file,
const char *exec_path, const float epsilon,
const float threshold) {
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
if (ref_file_path == NULL) {
printf(
"CheckRenderD3D9::PPMvsPPM unable to find <%s> in <%s> Aborting "
"comparison!\n",
ref_file, exec_path);
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
ref_file);
printf("Aborting comparison!\n");
printf(" FAILURE!\n");
return false;
}
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
true);
};
HRESULT CheckRenderD3D9::BackbufferToPPM(IDirect3DDevice9 *pDevice,
const char *zFileName) {
IDirect3DSurface9 *pSurface = NULL;
if (FAILED(
pDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &pSurface))) {
printf("Unable to get the back buffer. Aborting...\n");
return E_FAIL;
}
// D3DXSaveSurfaceToFile("C:\\bing.dds",D3DXIFF_DDS,pSurface,NULL,NULL);
HRESULT hr = S_OK;
hr = SurfaceToPPM(pDevice, pSurface, zFileName);
pSurface->Release();
return hr;
}
HRESULT CheckRenderD3D9::SurfaceToPPM(IDirect3DDevice9 *pDevice,
IDirect3DSurface9 *pSurface,
const char *zFileName) {
D3DSURFACE_DESC pDesc;
pSurface->GetDesc(&pDesc);
// $$ For now only support common 8bit formats. TODO: support for more
// complex formats via conversion?
if (!(pDesc.Format == D3DFMT_A8R8G8B8 || pDesc.Format == D3DFMT_X8R8G8B8)) {
return E_INVALIDARG;
}
IDirect3DTexture9 *pTargetTex = NULL;
if (FAILED(pDevice->CreateTexture(pDesc.Width, pDesc.Height, 1,
D3DUSAGE_DYNAMIC, pDesc.Format,
D3DPOOL_SYSTEMMEM, &pTargetTex, NULL))) {
printf("Unable to create texture for surface transfer! Aborting...\n");
return E_FAIL;
}
IDirect3DSurface9 *pTargetSurface = NULL;
if (FAILED(pTargetTex->GetSurfaceLevel(0, &pTargetSurface))) {
printf("Unable to get surface for surface transfer! Aborting...\n");
return E_FAIL;
}
// This is required because we cannot lock a D3DPOOL_DEAULT surface directly.
// So, we copy to our sysmem surface.
if (FAILED(pDevice->GetRenderTargetData(pSurface, pTargetSurface))) {
printf(
"Unable to GetRenderTargetData() for surface transfer! Aborting...\n");
return E_FAIL;
}
D3DLOCKED_RECT lockedRect;
HRESULT hr = pTargetSurface->LockRect(&lockedRect, NULL, 0);
// Need to convert from dx pitch to pitch=width
//
// $ PPM is BGR and not RGB it seems. Saved image looks "funny" in viewer(red
// and blue swapped), but since ref will be dumped using same method, this is
// ok.
// however, if we want the saved image to be properly colored, then we
// can swizzle the color bytes here.
unsigned char *pPPMData = new unsigned char[pDesc.Width * pDesc.Height * 4];
for (unsigned int iHeight = 0; iHeight < pDesc.Height; iHeight++) {
#if 1 // swizzle to implment RGB to BGR conversion.
for (unsigned int iWidth = 0; iWidth < pDesc.Width; iWidth++) {
DWORD color = *(DWORD *)((unsigned char *)(lockedRect.pBits) +
iHeight * lockedRect.Pitch + iWidth * 4);
// R<->B, [7:0] <-> [23:16], swizzle
color = ((color & 0xFF) << 16) | (color & 0xFF00) |
((color & 0xFF0000) >> 16) | (color & 0xFF000000);
memcpy(&(pPPMData[(iHeight * pDesc.Width + iWidth) * 4]),
(unsigned char *)&color, 4);
}
#else
memcpy(&(pPPMData[iHeight * pDesc.Width * 4]),
(unsigned char *)(lockedRect.pBits) + iHeight * lockedRect.Pitch,
pDesc.Width * 4);
#endif
}
pTargetSurface->UnlockRect();
// Prepends the PPM header info and bumps byte data afterwards
sdkSavePPM4ub(zFileName, pPPMData, pDesc.Width, pDesc.Height);
delete[] pPPMData;
pTargetSurface->Release();
pTargetTex->Release();
return S_OK;
}

54
Common/rendercheck_d3d9.h Normal file
View File

@ -0,0 +1,54 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#ifndef _RENDERCHECK_D3D9_H_
#define _RENDERCHECK_D3D9_H_
#include <assert.h>
#include <d3d9.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
class CheckRenderD3D9 {
public:
CheckRenderD3D9() {}
static HRESULT BackbufferToPPM(IDirect3DDevice9 *pDevice,
const char *zFileName);
static HRESULT SurfaceToPPM(IDirect3DDevice9 *pDevice,
IDirect3DSurface9 *pSurface,
const char *zFileName);
static bool PPMvsPPM(const char *src_file, const char *ref_file,
const char *exec_path, const float epsilon,
const float threshold = 0.0f);
};
#endif

1347
Common/rendercheck_gl.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,9 @@ PROJECTS := $(filter-out $(FILTER_OUT),$(PROJECTS))
%.ph_clobber : %.ph_clobber :
+@$(MAKE) -C $(dir $*) clobber $(USE_DEVICE) +@$(MAKE) -C $(dir $*) clobber $(USE_DEVICE)
%.ph_run :
+@$(MAKE) -C $(dir $*) run
all: $(addsuffix .ph_build,$(PROJECTS)) all: $(addsuffix .ph_build,$(PROJECTS))
@echo "Finished building CUDA samples" @echo "Finished building CUDA samples"
@ -62,3 +65,5 @@ tidy:
clean: tidy $(addsuffix .ph_clean,$(PROJECTS)) clean: tidy $(addsuffix .ph_clean,$(PROJECTS))
clobber: clean $(addsuffix .ph_clobber,$(PROJECTS)) clobber: clean $(addsuffix .ph_clobber,$(PROJECTS))
run: $(addsuffix .ph_run,$(PROJECTS))

137
README.md
View File

@ -1,11 +1,19 @@
# CUDA Samples # CUDA Samples
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.4 update 1](https://developer.nvidia.com/cuda-downloads). Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads).
## Release Notes ## Release Notes
This section describes the release notes for the CUDA Samples on GitHub only. This section describes the release notes for the CUDA Samples on GitHub only.
### CUDA 11.5
* Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
* Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
* Added `cuDLAErrorReporting`. Demonstrate DLA error detection via CUDA.
* Added `graphMemoryNodes`. Demonstrates memory allocations and frees within CUDA graphs using Graph APIs and Stream Capture APIs.
* Added `graphMemoryFootprint`. Demonstrates how graph memory nodes re-use virtual addresses and physical memory.
* All samples from CUDA toolkit are now available on [GitHub](https://github.com/nvidia/cuda-samples).
### CUDA 11.4 update 1 ### CUDA 11.4 update 1
* Added support for VS Code on linux platform. * Added support for VS Code on linux platform.
@ -116,7 +124,7 @@ This is the first release of CUDA Samples on GitHub:
### Prerequisites ### Prerequisites
Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html). For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
### Getting the CUDA Samples ### Getting the CUDA Samples
@ -173,39 +181,104 @@ The samples makefiles can take advantage of certain options:
### Samples by OS ### Samples by OS
#### Linux #### Linux
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** | **[simpleAssert_nvrtc](./Samples/simpleAssert_nvrtc)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[graphMemoryFootprint](./Samples/graphMemoryFootprint)** | **[MC_EstimatePiQ](./Samples/MC_EstimatePiQ)** |
---|---|---|---| ---|---|---|---|
**[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[reductionMultiBlockCG](./Samples/reductionMultiBlockCG)** | **[cuDLAStandaloneMode](./Samples/cuDLAStandaloneMode)** | **[conjugateGradientPrecond](./Samples/conjugateGradientPrecond)** | **[ptxjit](./Samples/ptxjit)** |
**[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[threadMigration](./Samples/threadMigration)** | **[EGLStream_CUDA_CrossGPU](./Samples/EGLStream_CUDA_CrossGPU)** | **[threadFenceReduction](./Samples/threadFenceReduction)** | **[simpleAtomicIntrinsics_nvrtc](./Samples/simpleAtomicIntrinsics_nvrtc)** |
**[matrixMul](./Samples/matrixMul)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[shfl_scan](./Samples/shfl_scan)** | **[clock](./Samples/clock)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[MC_EstimatePiP](./Samples/MC_EstimatePiP)** |
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[transpose](./Samples/transpose)** | **[simpleMultiCopy](./Samples/simpleMultiCopy)** | **[cuDLAErrorReporting](./Samples/cuDLAErrorReporting)** | **[concurrentKernels](./Samples/concurrentKernels)** |
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cppIntegration](./Samples/cppIntegration)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** |
**[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[MC_EstimatePiInlineP](./Samples/MC_EstimatePiInlineP)** |
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[simpleAssert](./Samples/simpleAssert)** | **[simpleTemplates](./Samples/simpleTemplates)** |
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[cuHook](./Samples/cuHook)** | **[simpleCUDA2GL](./Samples/simpleCUDA2GL)** | **[matrixMul](./Samples/matrixMul)** | **[quasirandomGenerator_nvrtc](./Samples/quasirandomGenerator_nvrtc)** |
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleTextureDrv](./Samples/simpleTextureDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[simpleCallback](./Samples/simpleCallback)** | **[batchCUBLAS](./Samples/batchCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleAtomicIntrinsics](./Samples/simpleAtomicIntrinsics)** |
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** | **[newdelete](./Samples/newdelete)** | **[bicubicTexture](./Samples/bicubicTexture)** | **[dxtc](./Samples/dxtc)** | **[cudaOpenMP](./Samples/cudaOpenMP)** |
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[cdpQuadtree](./Samples/cdpQuadtree)** | **[cdpBezierTessellation](./Samples/cdpBezierTessellation)** | **[randomFog](./Samples/randomFog)** | **[bilateralFilter](./Samples/bilateralFilter)** | **[conjugateGradient](./Samples/conjugateGradient)** |
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[reduction](./Samples/reduction)** | **[particles](./Samples/particles)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[Mandelbrot](./Samples/Mandelbrot)** | **[binomialOptions_nvrtc](./Samples/binomialOptions_nvrtc)** |
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[mergeSort](./Samples/mergeSort)** | **[HSOpticalFlow](./Samples/HSOpticalFlow)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** |
**[convolutionTexture](./Samples/convolutionTexture)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
**[eigenvalues](./Samples/eigenvalues)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[cuSolverSp_LowlevelCholesky](./Samples/cuSolverSp_LowlevelCholesky)** | **[topologyQuery](./Samples/topologyQuery)** |
**[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[volumeRender](./Samples/volumeRender)** | **[stereoDisparity](./Samples/stereoDisparity)** | **[simpleTexture](./Samples/simpleTexture)** |
**[simpleStreams](./Samples/simpleStreams)** | **[smokeParticles](./Samples/smokeParticles)** | **[simpleMultiGPU](./Samples/simpleMultiGPU)** | **[deviceQueryDrv](./Samples/deviceQueryDrv)** |
**[fastWalshTransform](./Samples/fastWalshTransform)** | **[quasirandomGenerator](./Samples/quasirandomGenerator)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
**[conjugateGradientUM](./Samples/conjugateGradientUM)** | **[simpleVoteIntrinsics_nvrtc](./Samples/simpleVoteIntrinsics_nvrtc)** | **[simpleLayeredTexture](./Samples/simpleLayeredTexture)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** |
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[matrixMulCUBLAS](./Samples/matrixMulCUBLAS)** | **[histEqualizationNPP](./Samples/histEqualizationNPP)** |
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[recursiveGaussian](./Samples/recursiveGaussian)** | **[imageDenoising](./Samples/imageDenoising)** | **[FunctionPointers](./Samples/FunctionPointers)** |
**[simpleGL](./Samples/simpleGL)** | **[segmentationTreeThrust](./Samples/segmentationTreeThrust)** | **[scalarProd](./Samples/scalarProd)** | **[SobolQRNG](./Samples/SobolQRNG)** |
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simplePitchLinearTexture](./Samples/simplePitchLinearTexture)** | **[freeImageInteropNPP](./Samples/freeImageInteropNPP)** |
**[template](./Samples/template)** | **[dwtHaar1D](./Samples/dwtHaar1D)** | **[postProcessGL](./Samples/postProcessGL)** | **[BlackScholes](./Samples/BlackScholes)** |
**[volumeFiltering](./Samples/volumeFiltering)** | **[simpleCUFFT_callback](./Samples/simpleCUFFT_callback)** | **[UnifiedMemoryStreams](./Samples/UnifiedMemoryStreams)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
**[deviceQuery](./Samples/deviceQuery)** | **[simpleHyperQ](./Samples/simpleHyperQ)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[cuSolverSp_LowlevelQR](./Samples/cuSolverSp_LowlevelQR)** |
**[inlinePTX](./Samples/inlinePTX)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[cuDLAHybridMode](./Samples/cuDLAHybridMode)** |
**[asyncAPI](./Samples/asyncAPI)** | **[MC_EstimatePiInlineQ](./Samples/MC_EstimatePiInlineQ)** | **[scan](./Samples/scan)** | **[simpleCooperativeGroups](./Samples/simpleCooperativeGroups)** |
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleTemplates_nvrtc](./Samples/simpleTemplates_nvrtc)** | **[simpleTexture3D](./Samples/simpleTexture3D)** | **[lineOfSight](./Samples/lineOfSight)** |
**[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[binomialOptions](./Samples/binomialOptions)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[bindlessTexture](./Samples/bindlessTexture)** |
**[simpleCUFFT_2d_MGPU](./Samples/simpleCUFFT_2d_MGPU)** | **[simplePrintf](./Samples/simplePrintf)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[histogram](./Samples/histogram)** |
**[matrixMulDynlinkJIT](./Samples/matrixMulDynlinkJIT)** | **[simpleP2P](./Samples/simpleP2P)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[vectorAddDrv](./Samples/vectorAddDrv)** |
**[sortingNetworks](./Samples/sortingNetworks)** | **[alignedTypes](./Samples/alignedTypes)** | **[inlinePTX_nvrtc](./Samples/inlinePTX_nvrtc)** | **[simpleCubemapTexture](./Samples/simpleCubemapTexture)** |
**[simpleIPC](./Samples/simpleIPC)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[radixSortThrust](./Samples/radixSortThrust)** | **[MonteCarloMultiGPU](./Samples/MonteCarloMultiGPU)** |
**[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** | **[vectorAdd](./Samples/vectorAdd)** | **[cdpSimplePrint](./Samples/cdpSimplePrint)** | **[FilterBorderControlNPP](./Samples/FilterBorderControlNPP)** |
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[convolutionSeparable](./Samples/convolutionSeparable)** | **[nbody](./Samples/nbody)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** |
**[simpleSeparateCompilation](./Samples/simpleSeparateCompilation)** | **[c++11_cuda](./Samples/c++11_cuda)** | **[fluidsGL](./Samples/fluidsGL)** | **[bandwidthTest](./Samples/bandwidthTest)** |
**[clock_nvrtc](./Samples/clock_nvrtc)** | **[graphMemoryNodes](./Samples/graphMemoryNodes)** | **[cdpQuadtree](./Samples/cdpQuadtree)** | **[interval](./Samples/interval)** |
**[boxFilter](./Samples/boxFilter)** | **[matrixMul_nvrtc](./Samples/matrixMul_nvrtc)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cppOverload](./Samples/cppOverload)** |
**[marchingCubes](./Samples/marchingCubes)** | **[cuSolverRf](./Samples/cuSolverRf)** | **[BlackScholes_nvrtc](./Samples/BlackScholes_nvrtc)** | **[cdpAdvancedQuicksort](./Samples/cdpAdvancedQuicksort)** |
**[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[cdpSimpleQuicksort](./Samples/cdpSimpleQuicksort)** | **[simpleOccupancy](./Samples/simpleOccupancy)** | **[simpleSurfaceWrite](./Samples/simpleSurfaceWrite)** |
**[simpleCUFFT_MGPU](./Samples/simpleCUFFT_MGPU)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[convolutionFFT2D](./Samples/convolutionFFT2D)** | **[reduction](./Samples/reduction)** |
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[SobelFilter](./Samples/SobelFilter)** | **[dct8x8](./Samples/dct8x8)** | **[fp16ScalarProduct](./Samples/fp16ScalarProduct)** |
**[FDTD3d](./Samples/FDTD3d)** | **[oceanFFT](./Samples/oceanFFT)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[StreamPriorities](./Samples/StreamPriorities)** |
**[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[MC_SingleAsianOptionP](./Samples/MC_SingleAsianOptionP)** | **[simpleMPI](./Samples/simpleMPI)** |
#### Windows #### Windows
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** | **[simpleAssert_nvrtc](./Samples/simpleAssert_nvrtc)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[graphMemoryFootprint](./Samples/graphMemoryFootprint)** | **[MC_EstimatePiQ](./Samples/MC_EstimatePiQ)** |
---|---|---|---| ---|---|---|---|
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** | **[reductionMultiBlockCG](./Samples/reductionMultiBlockCG)** | **[conjugateGradientPrecond](./Samples/conjugateGradientPrecond)** | **[ptxjit](./Samples/ptxjit)** | **[threadMigration](./Samples/threadMigration)** |
**[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[matrixMul](./Samples/matrixMul)** | **[threadFenceReduction](./Samples/threadFenceReduction)** | **[simpleAtomicIntrinsics_nvrtc](./Samples/simpleAtomicIntrinsics_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[clock](./Samples/clock)** |
**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[MC_EstimatePiP](./Samples/MC_EstimatePiP)** | **[transpose](./Samples/transpose)** | **[simpleMultiCopy](./Samples/simpleMultiCopy)** |
**[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[concurrentKernels](./Samples/concurrentKernels)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cppIntegration](./Samples/cppIntegration)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** |
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[MC_EstimatePiInlineP](./Samples/MC_EstimatePiInlineP)** |
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[simpleD3D10](./Samples/simpleD3D10)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[fluidsD3D9](./Samples/fluidsD3D9)** | **[boxFilterNPP](./Samples/boxFilterNPP)** |
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[simpleAssert](./Samples/simpleAssert)** | **[simpleTemplates](./Samples/simpleTemplates)** | **[simpleCUDA2GL](./Samples/simpleCUDA2GL)** | **[matrixMul](./Samples/matrixMul)** |
**[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[quasirandomGenerator_nvrtc](./Samples/quasirandomGenerator_nvrtc)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleTextureDrv](./Samples/simpleTextureDrv)** |
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCallback](./Samples/simpleCallback)** | **[SLID3D10Texture](./Samples/SLID3D10Texture)** | **[batchCUBLAS](./Samples/batchCUBLAS)** |
**[simpleD3D12](./Samples/simpleD3D12)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleAtomicIntrinsics](./Samples/simpleAtomicIntrinsics)** | **[newdelete](./Samples/newdelete)** | **[bicubicTexture](./Samples/bicubicTexture)** |
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[dxtc](./Samples/dxtc)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[cdpBezierTessellation](./Samples/cdpBezierTessellation)** | **[randomFog](./Samples/randomFog)** |
**[cdpQuadtree](./Samples/cdpQuadtree)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[bilateralFilter](./Samples/bilateralFilter)** | **[conjugateGradient](./Samples/conjugateGradient)** | **[particles](./Samples/particles)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
**[reduction](./Samples/reduction)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[Mandelbrot](./Samples/Mandelbrot)** | **[binomialOptions_nvrtc](./Samples/binomialOptions_nvrtc)** | **[simpleD3D10RenderTarget](./Samples/simpleD3D10RenderTarget)** | **[mergeSort](./Samples/mergeSort)** |
**[HSOpticalFlow](./Samples/HSOpticalFlow)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[convolutionTexture](./Samples/convolutionTexture)** | **[simpleVulkan](./Samples/simpleVulkan)** |
**[simpleD3D9Texture](./Samples/simpleD3D9Texture)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[eigenvalues](./Samples/eigenvalues)** |
**[simpleD3D10Texture](./Samples/simpleD3D10Texture)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[cuSolverSp_LowlevelCholesky](./Samples/cuSolverSp_LowlevelCholesky)** | **[topologyQuery](./Samples/topologyQuery)** |
**[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[volumeRender](./Samples/volumeRender)** | **[stereoDisparity](./Samples/stereoDisparity)** | **[simpleTexture](./Samples/simpleTexture)** |
**[simpleStreams](./Samples/simpleStreams)** | **[smokeParticles](./Samples/smokeParticles)** | **[simpleMultiGPU](./Samples/simpleMultiGPU)** | **[deviceQueryDrv](./Samples/deviceQueryDrv)** |
**[fastWalshTransform](./Samples/fastWalshTransform)** | **[quasirandomGenerator](./Samples/quasirandomGenerator)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
**[conjugateGradientUM](./Samples/conjugateGradientUM)** | **[simpleVoteIntrinsics_nvrtc](./Samples/simpleVoteIntrinsics_nvrtc)** | **[simpleLayeredTexture](./Samples/simpleLayeredTexture)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** |
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[matrixMulCUBLAS](./Samples/matrixMulCUBLAS)** | **[histEqualizationNPP](./Samples/histEqualizationNPP)** |
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[recursiveGaussian](./Samples/recursiveGaussian)** | **[imageDenoising](./Samples/imageDenoising)** | **[FunctionPointers](./Samples/FunctionPointers)** |
**[simpleGL](./Samples/simpleGL)** | **[segmentationTreeThrust](./Samples/segmentationTreeThrust)** | **[scalarProd](./Samples/scalarProd)** | **[SobolQRNG](./Samples/SobolQRNG)** |
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simplePitchLinearTexture](./Samples/simplePitchLinearTexture)** | **[freeImageInteropNPP](./Samples/freeImageInteropNPP)** |
**[template](./Samples/template)** | **[dwtHaar1D](./Samples/dwtHaar1D)** | **[simpleD3D11Texture](./Samples/simpleD3D11Texture)** | **[postProcessGL](./Samples/postProcessGL)** |
**[BlackScholes](./Samples/BlackScholes)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[volumeFiltering](./Samples/volumeFiltering)** | **[UnifiedMemoryStreams](./Samples/UnifiedMemoryStreams)** |
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleHyperQ](./Samples/simpleHyperQ)** | **[cuSolverSp_LowlevelQR](./Samples/cuSolverSp_LowlevelQR)** |
**[inlinePTX](./Samples/inlinePTX)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[asyncAPI](./Samples/asyncAPI)** | **[MC_EstimatePiInlineQ](./Samples/MC_EstimatePiInlineQ)** |
**[scan](./Samples/scan)** | **[simpleCooperativeGroups](./Samples/simpleCooperativeGroups)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleTemplates_nvrtc](./Samples/simpleTemplates_nvrtc)** |
**[simpleTexture3D](./Samples/simpleTexture3D)** | **[lineOfSight](./Samples/lineOfSight)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[binomialOptions](./Samples/binomialOptions)** |
**[simpleAttributes](./Samples/simpleAttributes)** | **[bindlessTexture](./Samples/bindlessTexture)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCUFFT_2d_MGPU](./Samples/simpleCUFFT_2d_MGPU)** |
**[simplePrintf](./Samples/simplePrintf)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[histogram](./Samples/histogram)** | **[matrixMulDynlinkJIT](./Samples/matrixMulDynlinkJIT)** |
**[simpleP2P](./Samples/simpleP2P)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[vectorAddDrv](./Samples/vectorAddDrv)** | **[sortingNetworks](./Samples/sortingNetworks)** |
**[alignedTypes](./Samples/alignedTypes)** | **[inlinePTX_nvrtc](./Samples/inlinePTX_nvrtc)** | **[simpleCubemapTexture](./Samples/simpleCubemapTexture)** | **[simpleIPC](./Samples/simpleIPC)** |
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[radixSortThrust](./Samples/radixSortThrust)** | **[MonteCarloMultiGPU](./Samples/MonteCarloMultiGPU)** | **[vectorAdd](./Samples/vectorAdd)** |
**[VFlockingD3D10](./Samples/VFlockingD3D10)** | **[simpleD3D9](./Samples/simpleD3D9)** | **[cdpSimplePrint](./Samples/cdpSimplePrint)** | **[FilterBorderControlNPP](./Samples/FilterBorderControlNPP)** |
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[convolutionSeparable](./Samples/convolutionSeparable)** | **[nbody](./Samples/nbody)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** |
**[simpleSeparateCompilation](./Samples/simpleSeparateCompilation)** | **[c++11_cuda](./Samples/c++11_cuda)** | **[fluidsGL](./Samples/fluidsGL)** | **[bandwidthTest](./Samples/bandwidthTest)** |
**[clock_nvrtc](./Samples/clock_nvrtc)** | **[graphMemoryNodes](./Samples/graphMemoryNodes)** | **[cdpQuadtree](./Samples/cdpQuadtree)** | **[interval](./Samples/interval)** |
**[boxFilter](./Samples/boxFilter)** | **[matrixMul_nvrtc](./Samples/matrixMul_nvrtc)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cppOverload](./Samples/cppOverload)** |
**[marchingCubes](./Samples/marchingCubes)** | **[cuSolverRf](./Samples/cuSolverRf)** | **[BlackScholes_nvrtc](./Samples/BlackScholes_nvrtc)** | **[cdpAdvancedQuicksort](./Samples/cdpAdvancedQuicksort)** |
**[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[cdpSimpleQuicksort](./Samples/cdpSimpleQuicksort)** | **[simpleOccupancy](./Samples/simpleOccupancy)** | **[simpleSurfaceWrite](./Samples/simpleSurfaceWrite)** |
**[simpleCUFFT_MGPU](./Samples/simpleCUFFT_MGPU)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[convolutionFFT2D](./Samples/convolutionFFT2D)** | **[reduction](./Samples/reduction)** |
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[SobelFilter](./Samples/SobelFilter)** | **[dct8x8](./Samples/dct8x8)** | **[fp16ScalarProduct](./Samples/fp16ScalarProduct)** |
**[FDTD3d](./Samples/FDTD3d)** | **[oceanFFT](./Samples/oceanFFT)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
**[MC_SingleAsianOptionP](./Samples/MC_SingleAsianOptionP)** | **[simpleMPI](./Samples/simpleMPI)** |
## Dependencies ## Dependencies
@ -374,5 +447,5 @@ Answers to frequently asked questions about CUDA can be found at http://develope
## References ## References
* [CUDA Programming Guide](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html) * [CUDA Programming Guide](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html)
* [Accelerated Computing Blog](https://devblogs.nvidia.com/category/accelerated-computing/) * [Accelerated Computing Blog](https://developer.nvidia.com/blog/?tags=accelerated-computing)

View File

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -0,0 +1,7 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -0,0 +1,10 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/BlackScholes"
}
]
}

15
Samples/BlackScholes/.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -0,0 +1,243 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This sample evaluates fair call and put prices for a
* given set of European options by Black-Scholes formula.
* See supplied whitepaper for more explanations.
*/
#include <helper_functions.h> // helper functions for string parsing
#include <helper_cuda.h> // helper functions CUDA error checking and initialization
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options on CPU
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
float *h_StockPrice, float *h_OptionStrike,
float *h_OptionYears, float Riskfree,
float Volatility, int optN);
////////////////////////////////////////////////////////////////////////////////
// Process an array of OptN options on GPU
////////////////////////////////////////////////////////////////////////////////
#include "BlackScholes_kernel.cuh"
////////////////////////////////////////////////////////////////////////////////
// Helper function, returning uniformly distributed
// random float in [low, high] range
////////////////////////////////////////////////////////////////////////////////
float RandFloat(float low, float high) {
float t = (float)rand() / (float)RAND_MAX;
return (1.0f - t) * low + t * high;
}
////////////////////////////////////////////////////////////////////////////////
// Data configuration
////////////////////////////////////////////////////////////////////////////////
const int OPT_N = 4000000;
const int NUM_ITERATIONS = 512;
const int OPT_SZ = OPT_N * sizeof(float);
const float RISKFREE = 0.02f;
const float VOLATILITY = 0.30f;
#define DIV_UP(a, b) (((a) + (b)-1) / (b))
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
// Start logs
printf("[%s] - Starting...\n", argv[0]);
//'h_' prefix - CPU (host) memory space
float
// Results calculated by CPU for reference
*h_CallResultCPU,
*h_PutResultCPU,
// CPU copy of GPU results
*h_CallResultGPU, *h_PutResultGPU,
// CPU instance of input data
*h_StockPrice, *h_OptionStrike, *h_OptionYears;
//'d_' prefix - GPU (device) memory space
float
// Results calculated by GPU
*d_CallResult,
*d_PutResult,
// GPU instance of input data
*d_StockPrice, *d_OptionStrike, *d_OptionYears;
double delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;
StopWatchInterface *hTimer = NULL;
int i;
findCudaDevice(argc, (const char **)argv);
sdkCreateTimer(&hTimer);
printf("Initializing data...\n");
printf("...allocating CPU memory for options.\n");
h_CallResultCPU = (float *)malloc(OPT_SZ);
h_PutResultCPU = (float *)malloc(OPT_SZ);
h_CallResultGPU = (float *)malloc(OPT_SZ);
h_PutResultGPU = (float *)malloc(OPT_SZ);
h_StockPrice = (float *)malloc(OPT_SZ);
h_OptionStrike = (float *)malloc(OPT_SZ);
h_OptionYears = (float *)malloc(OPT_SZ);
printf("...allocating GPU memory for options.\n");
checkCudaErrors(cudaMalloc((void **)&d_CallResult, OPT_SZ));
checkCudaErrors(cudaMalloc((void **)&d_PutResult, OPT_SZ));
checkCudaErrors(cudaMalloc((void **)&d_StockPrice, OPT_SZ));
checkCudaErrors(cudaMalloc((void **)&d_OptionStrike, OPT_SZ));
checkCudaErrors(cudaMalloc((void **)&d_OptionYears, OPT_SZ));
printf("...generating input data in CPU mem.\n");
srand(5347);
// Generate options set
for (i = 0; i < OPT_N; i++) {
h_CallResultCPU[i] = 0.0f;
h_PutResultCPU[i] = -1.0f;
h_StockPrice[i] = RandFloat(5.0f, 30.0f);
h_OptionStrike[i] = RandFloat(1.0f, 100.0f);
h_OptionYears[i] = RandFloat(0.25f, 10.0f);
}
printf("...copying input data to GPU mem.\n");
// Copy options data to GPU memory for further processing
checkCudaErrors(
cudaMemcpy(d_StockPrice, h_StockPrice, OPT_SZ, cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(d_OptionStrike, h_OptionStrike, OPT_SZ,
cudaMemcpyHostToDevice));
checkCudaErrors(
cudaMemcpy(d_OptionYears, h_OptionYears, OPT_SZ, cudaMemcpyHostToDevice));
printf("Data init done.\n\n");
printf("Executing Black-Scholes GPU kernel (%i iterations)...\n",
NUM_ITERATIONS);
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&hTimer);
sdkStartTimer(&hTimer);
for (i = 0; i < NUM_ITERATIONS; i++) {
BlackScholesGPU<<<DIV_UP((OPT_N / 2), 128), 128 /*480, 128*/>>>(
(float2 *)d_CallResult, (float2 *)d_PutResult, (float2 *)d_StockPrice,
(float2 *)d_OptionStrike, (float2 *)d_OptionYears, RISKFREE, VOLATILITY,
OPT_N);
getLastCudaError("BlackScholesGPU() execution failed\n");
}
checkCudaErrors(cudaDeviceSynchronize());
sdkStopTimer(&hTimer);
gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;
// Both call and put is calculated
printf("Options count : %i \n", 2 * OPT_N);
printf("BlackScholesGPU() time : %f msec\n", gpuTime);
printf("Effective memory bandwidth: %f GB/s\n",
((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
printf("Gigaoptions per second : %f \n\n",
((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
printf(
"BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u "
"options, NumDevsUsed = %u, Workgroup = %u\n",
(((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime * 1e-3,
(2 * OPT_N), 1, 128);
printf("\nReading back GPU results...\n");
// Read back GPU results to compare them to CPU results
checkCudaErrors(cudaMemcpy(h_CallResultGPU, d_CallResult, OPT_SZ,
cudaMemcpyDeviceToHost));
checkCudaErrors(
cudaMemcpy(h_PutResultGPU, d_PutResult, OPT_SZ, cudaMemcpyDeviceToHost));
printf("Checking the results...\n");
printf("...running CPU calculations.\n\n");
// Calculate options values on CPU
BlackScholesCPU(h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike,
h_OptionYears, RISKFREE, VOLATILITY, OPT_N);
printf("Comparing the results...\n");
// Calculate max absolute difference and L1 distance
// between CPU and GPU results
sum_delta = 0;
sum_ref = 0;
max_delta = 0;
for (i = 0; i < OPT_N; i++) {
ref = h_CallResultCPU[i];
delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
if (delta > max_delta) {
max_delta = delta;
}
sum_delta += delta;
sum_ref += fabs(ref);
}
L1norm = sum_delta / sum_ref;
printf("L1 norm: %E\n", L1norm);
printf("Max absolute error: %E\n\n", max_delta);
printf("Shutting down...\n");
printf("...releasing GPU memory.\n");
checkCudaErrors(cudaFree(d_OptionYears));
checkCudaErrors(cudaFree(d_OptionStrike));
checkCudaErrors(cudaFree(d_StockPrice));
checkCudaErrors(cudaFree(d_PutResult));
checkCudaErrors(cudaFree(d_CallResult));
printf("...releasing CPU memory.\n");
free(h_OptionYears);
free(h_OptionStrike);
free(h_StockPrice);
free(h_PutResultGPU);
free(h_CallResultGPU);
free(h_PutResultCPU);
free(h_CallResultCPU);
sdkDeleteTimer(&hTimer);
printf("Shutdown done.\n");
printf("\n[BlackScholes] - Test Summary\n");
if (L1norm > 1e-6) {
printf("Test failed!\n");
exit(EXIT_FAILURE);
}
printf(
"\nNOTE: The CUDA Samples are not meant for performance measurements. "
"Results may vary when GPU Boost is enabled.\n\n");
printf("Test passed\n");
exit(EXIT_SUCCESS);
}

View File

@ -0,0 +1,86 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
////////////////////////////////////////////////////////////////////////////////
// Polynomial approximation of cumulative normal distribution function
////////////////////////////////////////////////////////////////////////////////
static double CND(double d) {
const double A1 = 0.31938153;
const double A2 = -0.356563782;
const double A3 = 1.781477937;
const double A4 = -1.821255978;
const double A5 = 1.330274429;
const double RSQRT2PI = 0.39894228040143267793994605993438;
double K = 1.0 / (1.0 + 0.2316419 * fabs(d));
double cnd = RSQRT2PI * exp(-0.5 * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if (d > 0) cnd = 1.0 - cnd;
return cnd;
}
////////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
////////////////////////////////////////////////////////////////////////////////
static void BlackScholesBodyCPU(float &callResult, float &putResult,
float Sf, // Stock price
float Xf, // Option strike
float Tf, // Option years
float Rf, // Riskless rate
float Vf // Volatility rate
) {
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
double sqrtT = sqrt(T);
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
double d2 = d1 - V * sqrtT;
double CNDD1 = CND(d1);
double CNDD2 = CND(d2);
// Calculate Call and Put simultaneously
double expRT = exp(-R * T);
callResult = (float)(S * CNDD1 - X * expRT * CNDD2);
putResult = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
float *h_StockPrice, float *h_OptionStrike,
float *h_OptionYears, float Riskfree,
float Volatility, int optN) {
for (int opt = 0; opt < optN; opt++)
BlackScholesBodyCPU(h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt],
h_OptionStrike[opt], h_OptionYears[opt], Riskfree,
Volatility);
}

View File

@ -0,0 +1,106 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
////////////////////////////////////////////////////////////////////////////////
// Polynomial approximation of cumulative normal distribution function
////////////////////////////////////////////////////////////////////////////////
__device__ inline float cndGPU(float d) {
const float A1 = 0.31938153f;
const float A2 = -0.356563782f;
const float A3 = 1.781477937f;
const float A4 = -1.821255978f;
const float A5 = 1.330274429f;
const float RSQRT2PI = 0.39894228040143267793994605993438f;
float K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
float cnd = RSQRT2PI * __expf(-0.5f * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if (d > 0) cnd = 1.0f - cnd;
return cnd;
}
////////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
////////////////////////////////////////////////////////////////////////////////
__device__ inline void BlackScholesBodyGPU(float &CallResult, float &PutResult,
float S, // Stock price
float X, // Option strike
float T, // Option years
float R, // Riskless rate
float V // Volatility rate
) {
float sqrtT, expRT;
float d1, d2, CNDD1, CNDD2;
sqrtT = __fdividef(1.0F, rsqrtf(T));
d1 = __fdividef(__logf(S / X) + (R + 0.5f * V * V) * T, V * sqrtT);
d2 = d1 - V * sqrtT;
CNDD1 = cndGPU(d1);
CNDD2 = cndGPU(d2);
// Calculate Call and Put simultaneously
expRT = __expf(-R * T);
CallResult = S * CNDD1 - X * expRT * CNDD2;
PutResult = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options on GPU
////////////////////////////////////////////////////////////////////////////////
__launch_bounds__(128) __global__
void BlackScholesGPU(float2 *__restrict d_CallResult,
float2 *__restrict d_PutResult,
float2 *__restrict d_StockPrice,
float2 *__restrict d_OptionStrike,
float2 *__restrict d_OptionYears, float Riskfree,
float Volatility, int optN) {
////Thread index
// const int tid = blockDim.x * blockIdx.x + threadIdx.x;
////Total number of threads in execution grid
// const int THREAD_N = blockDim.x * gridDim.x;
const int opt = blockDim.x * blockIdx.x + threadIdx.x;
// Calculating 2 options per thread to increase ILP (instruction level
// parallelism)
if (opt < (optN / 2)) {
float callResult1, callResult2;
float putResult1, putResult2;
BlackScholesBodyGPU(callResult1, putResult1, d_StockPrice[opt].x,
d_OptionStrike[opt].x, d_OptionYears[opt].x, Riskfree,
Volatility);
BlackScholesBodyGPU(callResult2, putResult2, d_StockPrice[opt].y,
d_OptionStrike[opt].y, d_OptionYears[opt].y, Riskfree,
Volatility);
d_CallResult[opt] = make_float2(callResult1, callResult2);
d_PutResult[opt] = make_float2(putResult1, putResult2);
}
}

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2017
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes", "BlackScholes_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,114 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>BlackScholes_vs2017</RootNamespace>
<ProjectName>BlackScholes</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
<AdditionalOptions>--threads 0</AdditionalOptions>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<CudaCompile Include="BlackScholes.cu" />
<ClCompile Include="BlackScholes_gold.cpp" />
<None Include="BlackScholes_kernel.cuh" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2019
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes", "BlackScholes_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>BlackScholes_vs2019</RootNamespace>
<ProjectName>BlackScholes</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
<AdditionalOptions>--threads 0</AdditionalOptions>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<CudaCompile Include="BlackScholes.cu" />
<ClCompile Include="BlackScholes_gold.cpp" />
<None Include="BlackScholes_kernel.cuh" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,365 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
#Detect if installed version of GCC supports required C++11
ifeq ($(TARGET_OS),linux)
empty :=
space := $(empty) $(empty)
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
#Create version number without "."
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
# Make sure the version number has at least 3 decimals
GCCVERSION += 00
# Remove spaces from the version number
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
#$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
else
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
SAMPLE_ENABLED := 0
endif
endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
else
SMS ?= 35 37 50 52 60 61 70 75 80 86
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ALL_CCFLAGS += -maxrregcount=16 --threads 0 --std=c++11
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: BlackScholes
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
BlackScholes.o:BlackScholes.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
BlackScholes_gold.o:BlackScholes_gold.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
BlackScholes: BlackScholes.o BlackScholes_gold.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./BlackScholes
clean:
rm -f BlackScholes BlackScholes.o BlackScholes_gold.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/BlackScholes
clobber: clean

View File

@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>BlackScholes</name>
<cflags>
<flag>-maxrregcount=16</flag>
</cflags>
<description><![CDATA[This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.]]></description>
<devicecompilation>whole</devicecompilation>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../Common</path>
</includepaths>
<keyconcepts>
<concept level="basic">Computational Finance</concept>
</keyconcepts>
<keywords>
<keyword>CUDA</keyword>
<keyword>Computational Finance</keyword>
<keyword>option pricing</keyword>
<keyword>Black-Scholes</keyword>
</keywords>
<libraries>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>BlackScholes.cu</primary_file>
<scopes>
<scope>1:CUDA Basic Topics</scope>
<scope>3:Computational Finance</scope>
</scopes>
<sm-arch>sm35</sm-arch>
<sm-arch>sm37</sm-arch>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch>
<supported_envs>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
<env>
<platform>windows7</platform>
</env>
<env>
<arch>x86_64</arch>
<platform>macosx</platform>
</env>
<env>
<arch>arm</arch>
</env>
<env>
<arch>ppc64le</arch>
<platform>linux</platform>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>Black-Scholes Option Pricing</title>
<type>exe</type>
<whitepaper>doc\BlackScholes.pdf</whitepaper>
</entry>

View File

@ -0,0 +1,67 @@
# BlackScholes - Black-Scholes Option Pricing
## Description
This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.
## Key Concepts
Computational Finance
## Supported SM Architectures
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux, Windows
## Supported CPU Architecture
x86_64, ppc64le, armv7l
## CUDA APIs involved
## Prerequisites
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run
### Windows
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
```
*_vs<version>.sln - for Visual Studio <version>
```
Each individual sample has its own set of solution files in its directory:
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
## References (for more details)

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -0,0 +1,7 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -0,0 +1,10 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/BlackScholes_nvrtc"
}
]
}

View File

@ -0,0 +1,15 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -0,0 +1,269 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This sample evaluates fair call and put prices for a
* given set of European options by Black-Scholes formula.
* See supplied whitepaper for more explanations.
*/
#include <cuda_runtime.h>
#include <nvrtc_helper.h>
#include <helper_functions.h> // helper functions for string parsing
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options on CPU
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
float *h_StockPrice, float *h_OptionStrike,
float *h_OptionYears, float Riskfree,
float Volatility, int optN);
////////////////////////////////////////////////////////////////////////////////
// Process an array of OptN options on GPU
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Helper function, returning uniformly distributed
// random float in [low, high] range
////////////////////////////////////////////////////////////////////////////////
float RandFloat(float low, float high) {
float t = (float)rand() / (float)RAND_MAX;
return (1.0f - t) * low + t * high;
}
////////////////////////////////////////////////////////////////////////////////
// Data configuration
////////////////////////////////////////////////////////////////////////////////
const int OPT_N = 4000000;
const int NUM_ITERATIONS = 512;
const int OPT_SZ = OPT_N * sizeof(float);
const float RISKFREE = 0.02f;
const float VOLATILITY = 0.30f;
#define DIV_UP(a, b) (((a) + (b)-1) / (b))
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
// Start logs
printf("[%s] - Starting...\n", argv[0]);
//'h_' prefix - CPU (host) memory space
float
// Results calculated by CPU for reference
*h_CallResultCPU,
*h_PutResultCPU,
// CPU copy of GPU results
*h_CallResultGPU, *h_PutResultGPU,
// CPU instance of input data
*h_StockPrice, *h_OptionStrike, *h_OptionYears;
//'d_' prefix - GPU (device) memory space
CUdeviceptr
// Results calculated by GPU
d_CallResult,
d_PutResult,
// GPU instance of input data
d_StockPrice, d_OptionStrike, d_OptionYears;
double delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;
StopWatchInterface *hTimer = NULL;
int i;
sdkCreateTimer(&hTimer);
printf("Initializing data...\n");
printf("...allocating CPU memory for options.\n");
h_CallResultCPU = (float *)malloc(OPT_SZ);
h_PutResultCPU = (float *)malloc(OPT_SZ);
h_CallResultGPU = (float *)malloc(OPT_SZ);
h_PutResultGPU = (float *)malloc(OPT_SZ);
h_StockPrice = (float *)malloc(OPT_SZ);
h_OptionStrike = (float *)malloc(OPT_SZ);
h_OptionYears = (float *)malloc(OPT_SZ);
char *cubin, *kernel_file;
size_t cubinSize;
kernel_file = sdkFindFilePath("BlackScholes_kernel.cuh", argv[0]);
// Compile the kernel BlackScholes_kernel.
compileFileToCUBIN(kernel_file, argc, argv, &cubin, &cubinSize, 0);
CUmodule module = loadCUBIN(cubin, argc, argv);
CUfunction kernel_addr;
checkCudaErrors(cuModuleGetFunction(&kernel_addr, module, "BlackScholesGPU"));
printf("...allocating GPU memory for options.\n");
checkCudaErrors(cuMemAlloc(&d_CallResult, OPT_SZ));
checkCudaErrors(cuMemAlloc(&d_PutResult, OPT_SZ));
checkCudaErrors(cuMemAlloc(&d_StockPrice, OPT_SZ));
checkCudaErrors(cuMemAlloc(&d_OptionStrike, OPT_SZ));
checkCudaErrors(cuMemAlloc(&d_OptionYears, OPT_SZ));
printf("...generating input data in CPU mem.\n");
srand(5347);
// Generate options set
for (i = 0; i < OPT_N; i++) {
h_CallResultCPU[i] = 0.0f;
h_PutResultCPU[i] = -1.0f;
h_StockPrice[i] = RandFloat(5.0f, 30.0f);
h_OptionStrike[i] = RandFloat(1.0f, 100.0f);
h_OptionYears[i] = RandFloat(0.25f, 10.0f);
}
printf("...copying input data to GPU mem.\n");
// Copy options data to GPU memory for further processing
checkCudaErrors(cuMemcpyHtoD(d_StockPrice, h_StockPrice, OPT_SZ));
checkCudaErrors(cuMemcpyHtoD(d_OptionStrike, h_OptionStrike, OPT_SZ));
checkCudaErrors(cuMemcpyHtoD(d_OptionYears, h_OptionYears, OPT_SZ));
printf("Data init done.\n\n");
printf("Executing Black-Scholes GPU kernel (%i iterations)...\n",
NUM_ITERATIONS);
sdkResetTimer(&hTimer);
sdkStartTimer(&hTimer);
dim3 cudaBlockSize(128, 1, 1);
dim3 cudaGridSize(DIV_UP(OPT_N / 2, 128), 1, 1);
float risk = RISKFREE;
float volatility = VOLATILITY;
int optval = OPT_N;
void *arr[] = {(void *)&d_CallResult, (void *)&d_PutResult,
(void *)&d_StockPrice, (void *)&d_OptionStrike,
(void *)&d_OptionYears, (void *)&risk,
(void *)&volatility, (void *)&optval};
for (i = 0; i < NUM_ITERATIONS; i++) {
checkCudaErrors(cuLaunchKernel(kernel_addr, cudaGridSize.x, cudaGridSize.y,
cudaGridSize.z, /* grid dim */
cudaBlockSize.x, cudaBlockSize.y,
cudaBlockSize.z, /* block dim */
0, 0, /* shared mem, stream */
&arr[0], /* arguments */
0));
}
checkCudaErrors(cuCtxSynchronize());
sdkStopTimer(&hTimer);
gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;
// Both call and put is calculated
printf("Options count : %i \n", 2 * OPT_N);
printf("BlackScholesGPU() time : %f msec\n", gpuTime);
printf("Effective memory bandwidth: %f GB/s\n",
((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
printf("Gigaoptions per second : %f \n\n",
((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
printf(
"BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u "
"options, NumDevsUsed = %u, Workgroup = %u\n",
(((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime * 1e-3,
(2 * OPT_N), 1, 128);
printf("\nReading back GPU results...\n");
// Read back GPU results to compare them to CPU results
checkCudaErrors(cuMemcpyDtoH(h_CallResultGPU, d_CallResult, OPT_SZ));
checkCudaErrors(cuMemcpyDtoH(h_PutResultGPU, d_PutResult, OPT_SZ));
printf("Checking the results...\n");
printf("...running CPU calculations.\n\n");
// Calculate options values on CPU
BlackScholesCPU(h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike,
h_OptionYears, RISKFREE, VOLATILITY, OPT_N);
printf("Comparing the results...\n");
// Calculate max absolute difference and L1 distance
// between CPU and GPU results
sum_delta = 0;
sum_ref = 0;
max_delta = 0;
for (i = 0; i < OPT_N; i++) {
ref = h_CallResultCPU[i];
delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
if (delta > max_delta) {
max_delta = delta;
}
sum_delta += delta;
sum_ref += fabs(ref);
}
L1norm = sum_delta / sum_ref;
printf("L1 norm: %E\n", L1norm);
printf("Max absolute error: %E\n\n", max_delta);
printf("Shutting down...\n");
printf("...releasing GPU memory.\n");
checkCudaErrors(cuMemFree(d_OptionYears));
checkCudaErrors(cuMemFree(d_OptionStrike));
checkCudaErrors(cuMemFree(d_StockPrice));
checkCudaErrors(cuMemFree(d_PutResult));
checkCudaErrors(cuMemFree(d_CallResult));
printf("...releasing CPU memory.\n");
free(h_OptionYears);
free(h_OptionStrike);
free(h_StockPrice);
free(h_PutResultGPU);
free(h_CallResultGPU);
free(h_PutResultCPU);
free(h_CallResultCPU);
sdkDeleteTimer(&hTimer);
printf("Shutdown done.\n");
printf("\n[%s] - Test Summary\n", argv[0]);
if (L1norm > 1e-6) {
printf("Test failed!\n");
exit(EXIT_FAILURE);
}
printf("Test passed\n");
exit(EXIT_SUCCESS);
}

View File

@ -0,0 +1,88 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
///////////////////////////////////////////////////////////////////////////////
// Polynomial approximation of cumulative normal distribution function
///////////////////////////////////////////////////////////////////////////////
static double CND(double d) {
const double A1 = 0.31938153;
const double A2 = -0.356563782;
const double A3 = 1.781477937;
const double A4 = -1.821255978;
const double A5 = 1.330274429;
const double RSQRT2PI = 0.39894228040143267793994605993438;
double K = 1.0 / (1.0 + 0.2316419 * fabs(d));
double cnd = RSQRT2PI * exp(-0.5 * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if (d > 0) cnd = 1.0 - cnd;
return cnd;
}
///////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
///////////////////////////////////////////////////////////////////////////////
static void BlackScholesBodyCPU(float &callResult, float &putResult,
float Sf, // Stock price
float Xf, // Option strike
float Tf, // Option years
float Rf, // Riskless rate
float Vf // Volatility rate
) {
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
double sqrtT = sqrt(T);
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
double d2 = d1 - V * sqrtT;
double CNDD1 = CND(d1);
double CNDD2 = CND(d2);
// Calculate Call and Put simultaneously
double expRT = exp(-R * T);
callResult = (float)(S * CNDD1 - X * expRT * CNDD2);
putResult = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options
////////////////////////////////////////////////////////////////////////////////
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
float *h_StockPrice, float *h_OptionStrike,
float *h_OptionYears, float Riskfree,
float Volatility, int optN) {
for (int opt = 0; opt < optN; opt++)
BlackScholesBodyCPU(h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt],
h_OptionStrike[opt], h_OptionYears[opt], Riskfree,
Volatility);
}

View File

@ -0,0 +1,103 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
///////////////////////////////////////////////////////////////////////////////
// Polynomial approximation of cumulative normal distribution function
///////////////////////////////////////////////////////////////////////////////
__device__ inline float cndGPU(float d) {
const float A1 = 0.31938153f;
const float A2 = -0.356563782f;
const float A3 = 1.781477937f;
const float A4 = -1.821255978f;
const float A5 = 1.330274429f;
const float RSQRT2PI = 0.39894228040143267793994605993438f;
float K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
float cnd = RSQRT2PI * __expf(-0.5f * d * d) *
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
if (d > 0) cnd = 1.0f - cnd;
return cnd;
}
///////////////////////////////////////////////////////////////////////////////
// Black-Scholes formula for both call and put
///////////////////////////////////////////////////////////////////////////////
__device__ inline void BlackScholesBodyGPU(float &CallResult, float &PutResult,
float S, // Stock price
float X, // Option strike
float T, // Option years
float R, // Riskless rate
float V // Volatility rate
) {
float sqrtT, expRT;
float d1, d2, CNDD1, CNDD2;
sqrtT = __fdividef(1.0F, rsqrtf(T));
d1 = __fdividef(__logf(S / X) + (R + 0.5f * V * V) * T, V * sqrtT);
d2 = d1 - V * sqrtT;
CNDD1 = cndGPU(d1);
CNDD2 = cndGPU(d2);
// Calculate Call and Put simultaneously
expRT = __expf(-R * T);
CallResult = S * CNDD1 - X * expRT * CNDD2;
PutResult = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
}
////////////////////////////////////////////////////////////////////////////////
// Process an array of optN options on GPU
////////////////////////////////////////////////////////////////////////////////
extern "C" __launch_bounds__(128) __global__
void BlackScholesGPU(float2 *__restrict d_CallResult,
float2 *__restrict d_PutResult,
float2 *__restrict d_StockPrice,
float2 *__restrict d_OptionStrike,
float2 *__restrict d_OptionYears, float Riskfree,
float Volatility, int optN) {
////Thread index
const int opt = blockDim.x * blockIdx.x + threadIdx.x;
// Calculating 2 options per thread to increase ILP (instruction level
// parallelism)
if (opt < (optN / 2)) {
float callResult1, callResult2;
float putResult1, putResult2;
BlackScholesBodyGPU(callResult1, putResult1, d_StockPrice[opt].x,
d_OptionStrike[opt].x, d_OptionYears[opt].x, Riskfree,
Volatility);
BlackScholesBodyGPU(callResult2, putResult2, d_StockPrice[opt].y,
d_OptionStrike[opt].y, d_OptionYears[opt].y, Riskfree,
Volatility);
d_CallResult[opt] = make_float2(callResult1, callResult2);
d_PutResult[opt] = make_float2(putResult1, putResult2);
}
}

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2017
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes_nvrtc", "BlackScholes_nvrtc_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,114 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>BlackScholes_nvrtc_vs2017</RootNamespace>
<ProjectName>BlackScholes_nvrtc</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/BlackScholes_nvrtc.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration></CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
<AdditionalOptions>--threads 0</AdditionalOptions>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="BlackScholes.cpp" />
<ClCompile Include="BlackScholes_gold.cpp" />
<None Include="BlackScholes_kernel.cuh" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2019
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes_nvrtc", "BlackScholes_nvrtc_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>BlackScholes_nvrtc_vs2019</RootNamespace>
<ProjectName>BlackScholes_nvrtc</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/BlackScholes_nvrtc.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration></CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
<AdditionalOptions>--threads 0</AdditionalOptions>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="BlackScholes.cpp" />
<ClCompile Include="BlackScholes_gold.cpp" />
<None Include="BlackScholes_kernel.cuh" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,422 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
CCFLAGS += -g
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
SAMPLE_ENABLED := 1
# This sample is not supported on ARMv7
ifeq ($(TARGET_ARCH),armv7l)
$(info >>> WARNING - BlackScholes_nvrtc is not supported on ARMv7 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on QNX
ifeq ($(TARGET_OS),qnx)
$(info >>> WARNING - BlackScholes_nvrtc is not supported on QNX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
# libNVRTC specific libraries
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -L$(CUDA_PATH)/lib -F/Library/Frameworks -framework CUDA
endif
#Detect if installed version of GCC supports required C++11
ifeq ($(TARGET_OS),linux)
empty :=
space := $(empty) $(empty)
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
#Create version number without "."
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
# Make sure the version number has at least 3 decimals
GCCVERSION += 00
# Remove spaces from the version number
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
#$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
else
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
SAMPLE_ENABLED := 0
endif
endif
ifeq ($(TARGET_OS),darwin)
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
else
ifeq ($(TARGET_ARCH),x86_64)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
ifdef TARGET_OVERRIDE
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
endif
endif
ifeq ($(TARGET_ARCH),ppc64le)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
endif
ifeq ($(HOST_ARCH),ppc64le)
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
endif
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
SAMPLE_ENABLED := 0
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda
endif
endif
ALL_CCFLAGS += --threads 0 --std=c++11
INCLUDES += -I$(CUDA_PATH)/include
LIBRARIES += -lnvrtc
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: BlackScholes_nvrtc
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
BlackScholes.o:BlackScholes.cpp
$(EXEC) $(HOST_COMPILER) $(INCLUDES) $(CCFLAGS) $(EXTRA_CCFLAGS) -o $@ -c $<
BlackScholes_gold.o:BlackScholes_gold.cpp
$(EXEC) $(HOST_COMPILER) $(INCLUDES) $(CCFLAGS) $(EXTRA_CCFLAGS) -o $@ -c $<
BlackScholes_nvrtc: BlackScholes.o BlackScholes_gold.o
$(EXEC) $(HOST_COMPILER) $(LDFLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./BlackScholes_nvrtc
clean:
rm -f BlackScholes_nvrtc BlackScholes.o BlackScholes_gold.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/BlackScholes_nvrtc
clobber: clean

View File

@ -0,0 +1,71 @@
# BlackScholes_nvrtc - Black-Scholes Option Pricing with libNVRTC
## Description
This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula, compiling the CUDA kernels involved at runtime using NVRTC.
## Key Concepts
Computational Finance, Runtime Compilation
## Supported SM Architectures
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux, Windows
## Supported CPU Architecture
x86_64, ppc64le, aarch64
## CUDA APIs involved
## Dependencies needed to build/run
[NVRTC](../../README.md#nvrtc)
## Prerequisites
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
### Windows
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
```
*_vs<version>.sln - for Visual Studio <version>
```
Each individual sample has its own set of solution files in its directory:
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, aarch64.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=aarch64` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
## References (for more details)

View File

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -0,0 +1,7 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -0,0 +1,10 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/EGLStream_CUDA_CrossGPU"
}
]
}

View File

@ -0,0 +1,15 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -0,0 +1,453 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
SAMPLE_ENABLED := 1
# This sample is not supported on Mac OSX
ifeq ($(TARGET_OS),darwin)
$(info >>> WARNING - EGLStream_CUDA_CrossGPU is not supported on Mac OSX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on android
ifeq ($(TARGET_OS),android)
$(info >>> WARNING - EGLStream_CUDA_CrossGPU is not supported on android - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
# Makefile include to help find EGL Libraries
include ./findegl.mk
# EGL specific libraries
ifneq ($(TARGET_OS),darwin)
LIBRARIES += -lEGL
endif
#Detect if installed version of GCC supports required C++11
ifeq ($(TARGET_OS),linux)
empty :=
space := $(empty) $(empty)
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
#Create version number without "."
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
# Make sure the version number has at least 3 decimals
GCCVERSION += 00
# Remove spaces from the version number
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
#$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
else
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
SAMPLE_ENABLED := 0
endif
endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
else
SMS ?= 35 37 50 52 60 61 70 75 80 86
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ifeq ($(TARGET_OS),darwin)
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
else
ifeq ($(TARGET_ARCH),x86_64)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
ifdef TARGET_OVERRIDE
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
endif
endif
ifeq ($(TARGET_ARCH),ppc64le)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
endif
ifeq ($(HOST_ARCH),ppc64le)
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
endif
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
SAMPLE_ENABLED := 0
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda
endif
endif
ALL_CCFLAGS += --threads 0 --std=c++11
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: EGLStream_CUDA_CrossGPU
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
cuda_consumer.o:cuda_consumer.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
cuda_producer.o:cuda_producer.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
eglstrm_common.o:eglstrm_common.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
kernel.o:kernel.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
main.o:main.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
EGLStream_CUDA_CrossGPU: cuda_consumer.o cuda_producer.o eglstrm_common.o kernel.o main.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./EGLStream_CUDA_CrossGPU
clean:
rm -f EGLStream_CUDA_CrossGPU cuda_consumer.o cuda_producer.o eglstrm_common.o kernel.o main.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/EGLStream_CUDA_CrossGPU
clobber: clean

View File

@ -0,0 +1,78 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>EGLStream_CUDA_CrossGPU</name>
<cuda_api_list>
<driver>cuDeviceGet</driver>
<driver>cuDeviceGetAttribute</driver>
<driver>cuDeviceComputeCapability</driver>
<driver>cuDeviceGetCount</driver>
<driver>cuDeviceGetName</driver>
<driver>cuGraphicsResourceGetMappedEglFrame</driver>
<driver>cuEGLStreamConsumerAcquireFrame</driver>
<driver>cuEGLStreamConsumerReleaseFrame</driver>
<driver>cuEGLStreamProducerReturnFrame</driver>
<driver>cuEGLStreamProducerPresentFrame</driver>
<driver>cuCtxCreate</driver>
<driver>cuMemAlloc</driver>
<driver>cuMemFree</driver>
<driver>cuMemcpy3D</driver>
<driver>cuStreamCreate</driver>
<driver>cuCtxPushCurrent</driver>
<driver>cuCtxPopCurrent</driver>
<driver>cuCtxDestroy</driver>
</cuda_api_list>
<description><![CDATA[Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.]]></description>
<devicecompilation>whole</devicecompilation>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../Common</path>
</includepaths>
<keyconcepts>
<concept level="basic">EGLStreams Interop</concept>
</keyconcepts>
<keywords>
<keyword>EGL Streams</keyword>
</keywords>
<libraries>
<library os="linux">cuda</library>
<library framework="true" os="macosx">CUDA</library>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>main.cpp</primary_file>
<required_dependencies>
<dependency>EGL</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Advanced Topics</scope>
<scope>2:Graphics Interop</scope>
</scopes>
<sm-arch>sm35</sm-arch>
<sm-arch>sm37</sm-arch>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch>
<supported_envs>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
<env>
<arch>arm</arch>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>EGLStream_CUDA_CrossGPU</title>
<type>exe</type>
</entry>

View File

@ -0,0 +1,64 @@
# EGLStream_CUDA_CrossGPU - EGLStream_CUDA_CrossGPU
## Description
Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.
## Key Concepts
EGLStreams Interop
## Supported SM Architectures
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux
## Supported CPU Architecture
x86_64, armv7l
## CUDA APIs involved
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount, cuDeviceGetName, cuGraphicsResourceGetMappedEglFrame, cuEGLStreamConsumerAcquireFrame, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuCtxCreate, cuMemAlloc, cuMemFree, cuMemcpy3D, cuStreamCreate, cuCtxPushCurrent, cuCtxPopCurrent, cuCtxDestroy
## Dependencies needed to build/run
[EGL](../../README.md#egl)
## Prerequisites
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, armv7l.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=armv7l` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
## References (for more details)

View File

@ -0,0 +1,258 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Simple CUDA consumer rendering sample app
//
#include <cuda_runtime.h>
#include "cuda_consumer.h"
#include "eglstrm_common.h"
#include <math.h>
#include <unistd.h>
#if defined(EXTENSION_LIST)
EXTENSION_LIST(EXTLST_EXTERN)
#endif
CUgraphicsResource cudaResource;
static int count_acq = 0;
static double acquire_time[25000] = {0}, total_time_acq = 0;
static int count_rel = 0;
static double rel_time[25000] = {0}, total_time_rel = 0;
void acquireApiStat(void);
void acquireApiStat(void) {
int i = 0;
double min = 10000000, max = 0;
double average_launch_time = 0, standard_deviation = 0;
if (count_acq == 0) return;
// lets compute the standard deviation
min = max = acquire_time[1];
average_launch_time = (total_time_acq - acquire_time[0]) / count_acq;
for (i = 1; i < count_acq; i++) {
standard_deviation += (acquire_time[i] - average_launch_time) *
(acquire_time[i] - average_launch_time);
if (acquire_time[i] < min) min = acquire_time[i];
if (acquire_time[i] > max) max = acquire_time[i];
}
standard_deviation = sqrt(standard_deviation / count_acq);
printf("acquire Avg: %lf\n", average_launch_time);
printf("acquire SD: %lf\n", standard_deviation);
printf("acquire min: %lf\n", min);
printf("acquire max: %lf\n", max);
min = max = rel_time[1];
average_launch_time = (total_time_rel - rel_time[0]) / count_rel;
for (i = 1; i < count_rel; i++) {
standard_deviation += (rel_time[i] - average_launch_time) *
(rel_time[i] - average_launch_time);
if (rel_time[i] < min) min = rel_time[i];
if (rel_time[i] > max) max = rel_time[i];
}
standard_deviation = sqrt(standard_deviation / count_rel);
printf("release Avg: %lf\n", average_launch_time);
printf("release SD: %lf\n", standard_deviation);
printf("release min: %lf\n", min);
printf("release max: %lf\n", max);
}
CUresult cudaConsumerAcquireFrame(test_cuda_consumer_s *cudaConsumer,
int frameNumber) {
CUresult cuStatus = CUDA_SUCCESS;
CUeglFrame cudaEgl;
struct timespec start, end;
EGLint streamState = 0;
double curTime;
if (!cudaConsumer) {
printf("%s: Bad parameter\n", __func__);
goto done;
}
while (1) {
if (!eglQueryStreamKHR(cudaConsumer->eglDisplay, cudaConsumer->eglStream,
EGL_STREAM_STATE_KHR, &streamState)) {
printf("Cuda Consumer: eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
cuStatus = CUDA_ERROR_UNKNOWN;
goto done;
}
if (streamState == EGL_STREAM_STATE_DISCONNECTED_KHR) {
printf("Cuda Consumer: EGL_STREAM_STATE_DISCONNECTED_KHR received\n");
cuStatus = CUDA_ERROR_UNKNOWN;
goto done;
}
if (streamState == EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR) {
break;
}
}
if (cudaConsumer->profileAPI) {
getTime(&start);
}
cuStatus =
cuEGLStreamConsumerAcquireFrame(&(cudaConsumer->cudaConn), &cudaResource,
&cudaConsumer->consCudaStream, 16000);
if (cudaConsumer->profileAPI) {
getTime(&end);
curTime = TIME_DIFF(end, start);
acquire_time[count_acq++] = curTime;
if (count_acq == 25000) count_acq = 0;
total_time_acq += curTime;
}
if (cuStatus == CUDA_SUCCESS) {
CUdeviceptr pDevPtr = 0;
cudaError_t err;
cuStatus =
cuGraphicsResourceGetMappedEglFrame(&cudaEgl, cudaResource, 0, 0);
if (cuStatus != CUDA_SUCCESS) {
printf("Cuda get resource failed with %d\n", cuStatus);
goto done;
}
pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
err = cudaConsumer_filter(cudaConsumer->consCudaStream, (char *)pDevPtr,
WIDTH * 4, HEIGHT, PROD_DATA + frameNumber,
CONS_DATA + frameNumber, frameNumber);
if (err != cudaSuccess) {
printf("Cuda Consumer: kernel failed with: %s\n",
cudaGetErrorString(err));
goto done;
}
}
done:
return cuStatus;
}
CUresult cudaConsumerReleaseFrame(test_cuda_consumer_s *cudaConsumer,
int frameNumber) {
CUresult cuStatus = CUDA_SUCCESS;
struct timespec start, end;
double curTime;
if (!cudaConsumer) {
printf("%s: Bad parameter\n", __func__);
goto done;
}
if (cudaConsumer->profileAPI) {
getTime(&start);
}
cuStatus = cuEGLStreamConsumerReleaseFrame(
&cudaConsumer->cudaConn, cudaResource, &cudaConsumer->consCudaStream);
if (cudaConsumer->profileAPI) {
getTime(&end);
curTime = TIME_DIFF(end, start);
rel_time[count_rel++] = curTime;
if (count_rel == 25000) count_rel = 0;
total_time_rel += curTime;
}
if (cuStatus != CUDA_SUCCESS) {
printf("cuEGLStreamConsumerReleaseFrame failed, status:%d\n", cuStatus);
goto done;
}
done:
return cuStatus;
}
CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer) {
CUdevice device;
CUresult status = CUDA_SUCCESS;
if (CUDA_SUCCESS != (status = cuInit(0))) {
printf("Failed to initialize CUDA\n");
return status;
}
if (CUDA_SUCCESS !=
(status = cuDeviceGet(&device, cudaConsumer->cudaDevId))) {
printf("failed to get CUDA device\n");
return status;
}
if (CUDA_SUCCESS !=
(status = cuCtxCreate(&cudaConsumer->context, 0, device))) {
printf("failed to create CUDA context\n");
return status;
}
int major = 0, minor = 0;
char deviceName[256];
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device);
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
device);
cuDeviceGetName(deviceName, 256, device);
printf(
"CUDA Consumer on GPU Device %d: \"%s\" with compute capability "
"%d.%d\n\n",
device, deviceName, major, minor);
cuCtxPopCurrent(&cudaConsumer->context);
if (major < 6) {
printf(
"EGLStream_CUDA_CrossGPU requires SM 6.0 or higher arch GPU. "
"Exiting...\n");
exit(2); // EXIT_WAIVED
}
return status;
}
CUresult cuda_consumer_init(test_cuda_consumer_s *cudaConsumer,
TestArgs *args) {
CUresult status = CUDA_SUCCESS;
int bufferSize;
cudaConsumer->charCnt = args->charCnt;
bufferSize = args->charCnt;
cudaConsumer->pCudaCopyMem = (unsigned char *)malloc(bufferSize);
if (cudaConsumer->pCudaCopyMem == NULL) {
printf("Cuda Consumer: malloc failed\n");
goto done;
}
status = cuStreamCreate(&cudaConsumer->consCudaStream, 0);
if (status != CUDA_SUCCESS) {
printf("Cuda Consumer: cuStreamCreate failed, status:%d\n", status);
goto done;
}
atexit(acquireApiStat);
done:
return status;
}
CUresult cuda_consumer_Deinit(test_cuda_consumer_s *cudaConsumer) {
if (cudaConsumer->pCudaCopyMem) {
free(cudaConsumer->pCudaCopyMem);
}
return cuEGLStreamConsumerDisconnect(&cudaConsumer->cudaConn);
}

View File

@ -0,0 +1,66 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: CUDA consumer header file
//
#ifndef _CUDA_CONSUMER_H_
#define _CUDA_CONSUMER_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cudaEGL.h"
#include "eglstrm_common.h"
#include <cuda_runtime.h>
#include <cuda.h>
typedef struct _test_cuda_consumer_s {
CUcontext context;
CUeglStreamConnection cudaConn;
int cudaDevId;
EGLDisplay eglDisplay;
EGLStreamKHR eglStream;
unsigned int charCnt;
char *cudaBuf;
bool profileAPI;
unsigned char *pCudaCopyMem;
CUstream consCudaStream;
} test_cuda_consumer_s;
CUresult cuda_consumer_init(test_cuda_consumer_s *cudaConsumer, TestArgs *args);
CUresult cuda_consumer_Deinit(test_cuda_consumer_s *cudaConsumer);
CUresult cudaConsumerAcquireFrame(test_cuda_consumer_s *data, int frameNumber);
CUresult cudaConsumerReleaseFrame(test_cuda_consumer_s *data, int frameNumber);
CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer);
cudaError_t cudaConsumer_filter(CUstream cStream, char *pSrc, int width,
int height, char expectedVal, char newVal,
int frameNumber);
cudaError_t cudaGetValueMismatch(void);
#endif

View File

@ -0,0 +1,288 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Simple cuda EGL stream producer app
//
#include "cudaEGL.h"
#include "cuda_producer.h"
#include "eglstrm_common.h"
#include <cuda_runtime.h>
#if defined(EXTENSION_LIST)
EXTENSION_LIST(EXTLST_EXTERN)
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
#include "cuda_runtime.h"
#include "math.h"
int cudaPresentReturnData = INIT_DATA;
int fakePresent = 0;
CUeglFrame fakeFrame;
CUdeviceptr cudaPtrFake;
extern bool isCrossDevice;
void cudaProducerPrepareFrame(CUeglFrame *cudaEgl, CUdeviceptr cudaPtr,
int bufferSize) {
cudaEgl->frame.pPitch[0] = (void *)cudaPtr;
cudaEgl->width = WIDTH;
cudaEgl->depth = 0;
cudaEgl->height = HEIGHT;
cudaEgl->pitch = WIDTH * 4;
cudaEgl->frameType = CU_EGL_FRAME_TYPE_PITCH;
cudaEgl->planeCount = 1;
cudaEgl->numChannels = 4;
cudaEgl->eglColorFormat = CU_EGL_COLOR_FORMAT_ARGB;
cudaEgl->cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
}
static int count_present = 0, count_return = 0;
static double present_time[25000] = {0}, total_time_present = 0;
static double return_time[25000] = {0}, total_time_return = 0;
void presentApiStat(void);
void presentApiStat(void) {
int i = 0;
double min = 10000000, max = 0;
double average_launch_time = 0, standard_deviation = 0;
if (count_present == 0) return;
// lets compute the standard deviation
min = max = present_time[1];
average_launch_time = (total_time_present) / count_present;
for (i = 1; i < count_present; i++) {
standard_deviation += (present_time[i] - average_launch_time) *
(present_time[i] - average_launch_time);
if (present_time[i] < min) min = present_time[i];
if (present_time[i] > max) max = present_time[i];
}
standard_deviation = sqrt(standard_deviation / count_present);
printf("present Avg: %lf\n", average_launch_time);
printf("present SD: %lf\n", standard_deviation);
printf("present min: %lf\n", min);
printf("present max: %lf\n", max);
min = max = return_time[1];
average_launch_time = (total_time_return - return_time[0]) / count_return;
for (i = 1; i < count_return; i++) {
standard_deviation += (return_time[i] - average_launch_time) *
(return_time[i] - average_launch_time);
if (return_time[i] < min) min = return_time[i];
if (return_time[i] > max) max = return_time[i];
}
standard_deviation = sqrt(standard_deviation / count_return);
printf("return Avg: %lf\n", average_launch_time);
printf("return SD: %lf\n", standard_deviation);
printf("return min: %lf\n", min);
printf("return max: %lf\n", max);
}
CUresult cudaProducerPresentFrame(test_cuda_producer_s *cudaProducer,
CUeglFrame cudaEgl, int t) {
static int flag = 0;
CUresult status = CUDA_SUCCESS;
struct timespec start, end;
double curTime;
CUdeviceptr pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
cudaProducer_filter(cudaProducer->prodCudaStream, (char *)pDevPtr, WIDTH * 4,
HEIGHT, cudaPresentReturnData, PROD_DATA + t, t);
if (cudaProducer->profileAPI) {
getTime(&start);
}
status = cuEGLStreamProducerPresentFrame(&cudaProducer->cudaConn, cudaEgl,
&cudaProducer->prodCudaStream);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: Present frame failed, status:%d\n", status);
goto done;
}
flag++;
if (cudaProducer->profileAPI && flag > 10) {
getTime(&end);
curTime = TIME_DIFF(end, start);
present_time[count_present++] = curTime;
if (count_present == 25000) count_present = 0;
total_time_present += curTime;
}
done:
return status;
}
int flag = 0;
CUresult cudaProducerReturnFrame(test_cuda_producer_s *cudaProducer,
CUeglFrame cudaEgl, int t) {
CUresult status = CUDA_SUCCESS;
struct timespec start, end;
double curTime;
CUdeviceptr pDevPtr = 0;
pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
if (cudaProducer->profileAPI) {
getTime(&start);
}
while (1) {
status = cuEGLStreamProducerReturnFrame(&cudaProducer->cudaConn, &cudaEgl,
&cudaProducer->prodCudaStream);
if (status == CUDA_ERROR_LAUNCH_TIMEOUT) {
continue;
} else if (status != CUDA_SUCCESS) {
printf("Cuda Producer: Return frame failed, status:%d\n", status);
goto done;
}
break;
}
if (cudaProducer->profileAPI) {
getTime(&end);
curTime = TIME_DIFF(end, start);
return_time[count_return++] = curTime;
if (count_return == 25000) count_return = 0;
total_time_return += curTime;
}
if (flag % 2 == 0) {
cudaPresentReturnData++;
}
cudaProducer_filter(cudaProducer->prodCudaStream, (char *)pDevPtr, WIDTH * 4,
HEIGHT, CONS_DATA + t, cudaPresentReturnData, t);
flag++;
done:
return status;
}
CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer) {
CUdevice device;
CUresult status = CUDA_SUCCESS;
if (CUDA_SUCCESS != (status = cuInit(0))) {
printf("Failed to initialize CUDA\n");
return status;
}
if (CUDA_SUCCESS !=
(status = cuDeviceGet(&device, cudaProducer->cudaDevId))) {
printf("failed to get CUDA device\n");
return status;
}
if (CUDA_SUCCESS !=
(status = cuCtxCreate(&cudaProducer->context, 0, device))) {
printf("failed to create CUDA context\n");
return status;
}
int major = 0, minor = 0;
char deviceName[256];
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
device);
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
device);
cuDeviceGetName(deviceName, 256, device);
printf(
"CUDA Producer on GPU Device %d: \"%s\" with compute capability "
"%d.%d\n\n",
device, deviceName, major, minor);
cuCtxPopCurrent(&cudaProducer->context);
if (major < 6) {
printf(
"EGLStream_CUDA_CrossGPU requires SM 6.0 or higher arch GPU. "
"Exiting...\n");
exit(2); // EXIT_WAIVED
}
return status;
}
CUresult cudaProducerInit(test_cuda_producer_s *cudaProducer, TestArgs *args) {
CUresult status = CUDA_SUCCESS;
int bufferSize;
cudaProducer->charCnt = args->charCnt;
bufferSize = cudaProducer->charCnt;
cudaProducer->tempBuff = (char *)malloc(bufferSize);
if (!cudaProducer->tempBuff) {
printf("Cuda Producer: Failed to allocate image buffer\n");
status = CUDA_ERROR_UNKNOWN;
goto done;
}
memset((void *)cudaProducer->tempBuff, INIT_DATA, cudaProducer->charCnt);
// Fill this init data
status = cuMemAlloc(&cudaProducer->cudaPtr, bufferSize);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
goto done;
}
status = cuMemcpyHtoD(cudaProducer->cudaPtr, (void *)(cudaProducer->tempBuff),
bufferSize);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuMemCpy failed, status:%d\n", status);
goto done;
}
// Fill this init data
status = cuMemAlloc(&cudaProducer->cudaPtr1, bufferSize);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
goto done;
}
status = cuMemcpyHtoD(cudaProducer->cudaPtr1,
(void *)(cudaProducer->tempBuff), bufferSize);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuMemCpy failed, status:%d\n", status);
goto done;
}
status = cuStreamCreate(&cudaProducer->prodCudaStream, 0);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuStreamCreate failed, status:%d\n", status);
goto done;
}
// Fill this init data
status = cuMemAlloc(&cudaPtrFake, 100);
if (status != CUDA_SUCCESS) {
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
goto done;
}
atexit(presentApiStat);
done:
return status;
}
CUresult cudaProducerDeinit(test_cuda_producer_s *cudaProducer) {
if (cudaProducer->tempBuff) {
free(cudaProducer->tempBuff);
}
if (cudaProducer->cudaPtr) {
cuMemFree(cudaProducer->cudaPtr);
}
return cuEGLStreamProducerDisconnect(&cudaProducer->cudaConn);
}

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Simple cuda producer header file
//
#ifndef _CUDA_PRODUCER_H_
#define _CUDA_PRODUCER_H_
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include "cudaEGL.h"
#include "eglstrm_common.h"
#include <cuda_runtime.h>
#include <cuda.h>
typedef struct _test_cuda_producer_s {
// Stream params
CUcontext context;
CUeglStreamConnection cudaConn;
int cudaDevId;
EGLStreamKHR eglStream;
EGLDisplay eglDisplay;
unsigned int charCnt;
bool profileAPI;
char *tempBuff;
CUdeviceptr cudaPtr;
CUdeviceptr cudaPtr1;
CUstream prodCudaStream;
} test_cuda_producer_s;
CUresult cudaProducerInit(test_cuda_producer_s *cudaProducer, TestArgs *args);
CUresult cudaProducerPresentFrame(test_cuda_producer_s *parserArg,
CUeglFrame cudaEgl, int t);
CUresult cudaProducerReturnFrame(test_cuda_producer_s *parserArg,
CUeglFrame cudaEgl, int t);
CUresult cudaProducerDeinit(test_cuda_producer_s *cudaProducer);
CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer);
cudaError_t cudaProducer_filter(CUstream cStream, char *pSrc, int width,
int height, char expectedVal, char newVal,
int frameNumber);
void cudaProducerPrepareFrame(CUeglFrame *cudaEgl, CUdeviceptr cudaPtr,
int bufferSize);
#endif

View File

@ -0,0 +1,423 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Common egl stream functions
//
#include "eglstrm_common.h"
EGLStreamKHR g_producerEglStream = EGL_NO_STREAM_KHR;
EGLStreamKHR g_consumerEglStream = EGL_NO_STREAM_KHR;
EGLDisplay g_producerEglDisplay = EGL_NO_DISPLAY;
EGLDisplay g_consumerEglDisplay = EGL_NO_DISPLAY;
int cudaDevIndexProd = -1;
int cudaDevIndexCons = -1;
#if defined(EXTENSION_LIST)
EXTENSION_LIST(EXTLST_DECL)
typedef void (*extlst_fnptr_t)(void);
static struct {
extlst_fnptr_t *fnptr;
char const *name;
bool is_dgpu; // This function is need only for dgpu case
} extensionList[] = {EXTENSION_LIST(EXTLST_ENTRY)};
int eglSetupExtensions(bool isCrossDevice) {
unsigned int i;
for (i = 0; i < (sizeof(extensionList) / sizeof(*extensionList)); i++) {
// load the dgpu function only if we are running cross device test
if ((!extensionList[i].is_dgpu) ||
(extensionList[i].is_dgpu == isCrossDevice)) {
*extensionList[i].fnptr = eglGetProcAddress(extensionList[i].name);
if (*extensionList[i].fnptr == NULL) {
printf("Couldn't get address of %s()\n", extensionList[i].name);
return 0;
}
}
}
return 1;
}
int EGLStreamInit(bool isCrossDevice, int isConsumer,
EGLNativeFileDescriptorKHR fileDesc) {
static const EGLint streamAttrFIFOMode[] = {
EGL_STREAM_FIFO_LENGTH_KHR, 5, EGL_SUPPORT_REUSE_NV, EGL_FALSE, EGL_NONE};
EGLDisplay eglDisplay[2] = {0};
EGLStreamKHR eglStream[2] = {0};
EGLBoolean eglStatus;
#define MAX_EGL_DEVICES 4
EGLDeviceEXT devices[MAX_EGL_DEVICES];
EGLint numDevices = 0;
eglStatus = eglQueryDevicesEXT(MAX_EGL_DEVICES, devices, &numDevices);
if (eglStatus != EGL_TRUE) {
printf("Error querying EGL devices\n");
goto Done;
}
if (numDevices == 0) {
printf("No EGL devices found\n");
eglStatus = EGL_FALSE;
goto Done;
}
// If cross device, create discrete GPU stream first and then create the
// integrated GPU stream to connect to it via fd. The other way round fails
// in producer connect.
//
// TODO: Find out if this EGL behavior is by design.
if (isConsumer) {
int egl_device_id = 0;
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
EGLAttrib cuda_device;
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
EGL_CUDA_DEVICE_NV, &cuda_device);
if (eglStatus == EGL_TRUE) {
cudaDevIndexCons = cuda_device; // We select first EGL-CUDA Capable
// device for consumer.
printf(
"Found EGL-CUDA Capable device with CUDA Device id = %d out of "
"egl_device_id = %d\n",
(int)cudaDevIndexCons, egl_device_id);
break;
}
}
if (egl_device_id >= numDevices) {
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
goto Done;
}
g_consumerEglDisplay = eglGetPlatformDisplayEXT(
EGL_PLATFORM_DEVICE_EXT, (void *)devices[egl_device_id], NULL);
if (g_consumerEglDisplay == EGL_NO_DISPLAY) {
printf("Could not get EGL display from device. \n");
eglStatus = EGL_FALSE;
goto Done;
}
eglStatus = eglInitialize(g_consumerEglDisplay, 0, 0);
if (!eglStatus) {
printf("EGL failed to initialize. \n");
eglStatus = EGL_FALSE;
goto Done;
}
g_consumerEglStream =
eglCreateStreamKHR(g_consumerEglDisplay, streamAttrFIFOMode);
if (g_consumerEglStream == EGL_NO_STREAM_KHR) {
printf("Could not create EGL stream.\n");
eglStatus = EGL_FALSE;
goto Done;
}
eglStatus = eglStreamAttribKHR(g_consumerEglDisplay, g_consumerEglStream,
EGL_CONSUMER_LATENCY_USEC_KHR, 16000);
if (eglStatus != EGL_TRUE) {
printf("eglStreamAttribKHR EGL_CONSUMER_LATENCY_USEC_KHR failed\n");
goto Done;
}
eglStatus =
eglStreamAttribKHR(g_consumerEglDisplay, g_consumerEglStream,
EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR, 16000);
if (eglStatus != EGL_TRUE) {
printf(
"eglStreamAttribKHR EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR failed\n");
goto Done;
}
}
if (!isConsumer) { // Producer
if (fileDesc == EGL_NO_FILE_DESCRIPTOR_KHR) {
printf("Cuda Producer received bad file descriptor\n");
eglStatus = EGL_FALSE;
goto Done;
}
int egl_device_id = 0;
int egl_cuda_devices = 0;
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
EGLAttrib cuda_device = -1;
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
EGL_CUDA_DEVICE_NV, &cuda_device);
if (eglStatus == EGL_TRUE) {
egl_cuda_devices++;
if (isCrossDevice && (egl_cuda_devices > 1)) {
// We select second EGL-CUDA Capable device for producer.
cudaDevIndexProd = (int)cuda_device;
printf(
"Found EGL-CUDA Capable device with CUDA Device id = %d "
"egl_device_id = %d \n",
(int)cudaDevIndexProd, egl_device_id);
break;
}
if (!isCrossDevice) {
// We select first EGL-CUDA Capable device for producer same as
// consumer.
cudaDevIndexProd = (int)cuda_device;
printf(
"Found EGL-CUDA Capable device with CUDA Device id = %d "
"egl_device_id = %d \n",
(int)cudaDevIndexProd, egl_device_id);
break;
}
}
}
if (egl_device_id >= numDevices) {
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
goto Done;
}
g_producerEglDisplay = eglGetPlatformDisplayEXT(
EGL_PLATFORM_DEVICE_EXT, (void *)devices[egl_device_id], NULL);
if (g_producerEglDisplay == EGL_NO_DISPLAY) {
printf("Could not get EGL display from device. \n");
eglStatus = EGL_FALSE;
goto Done;
}
eglStatus = eglInitialize(g_producerEglDisplay, 0, 0);
if (!eglStatus) {
printf("EGL failed to initialize. \n");
eglStatus = EGL_FALSE;
goto Done;
}
g_producerEglStream =
eglCreateStreamFromFileDescriptorKHR(g_producerEglDisplay, fileDesc);
close(fileDesc);
if (g_producerEglStream == EGL_NO_STREAM_KHR) {
printf("CUDA Producer Could not create EGL stream.\n");
eglStatus = EGL_FALSE;
goto Done;
} else {
printf("Producer created EGLStream for the GPU.\n");
}
}
Done:
return eglStatus == EGL_TRUE ? 1 : 0;
}
void EGLStreamFini(void) {
if (g_producerEglStream != EGL_NO_STREAM_KHR) {
eglDestroyStreamKHR(g_producerEglDisplay, g_producerEglStream);
}
if (g_consumerEglStream != g_producerEglStream) {
if (g_consumerEglStream != EGL_NO_STREAM_KHR) {
eglDestroyStreamKHR(g_consumerEglDisplay, g_consumerEglStream);
}
}
}
int UnixSocketConnect(const char *socket_name) {
int sock_fd = -1;
struct sockaddr_un sock_addr;
int wait_loop = 0;
sock_fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (sock_fd < 0) {
printf("%s: socket create failed.\n", __func__);
return -1;
}
if (verbose) printf("%s: send_fd: sock_fd: %d\n", __func__, sock_fd);
memset(&sock_addr, 0, sizeof(struct sockaddr_un));
sock_addr.sun_family = AF_UNIX;
strncpy(sock_addr.sun_path, socket_name, sizeof(sock_addr.sun_path) - 1);
while (connect(sock_fd, (const struct sockaddr *)&sock_addr,
sizeof(struct sockaddr_un))) {
if (wait_loop < 60) {
if (!wait_loop)
printf("Waiting for EGL stream producer ");
else
printf(".");
fflush(stdout);
sleep(1);
wait_loop++;
} else {
printf("\n%s: Waiting timed out\n", __func__);
return -1;
}
}
if (wait_loop) printf("\n");
if (verbose) printf("%s: Wait is done\n", __func__);
return sock_fd;
}
/* Send <fd_to_send> (a file descriptor) to another process */
/* over a unix domain socket named <socket_name>. */
/* <socket_name> can be any nonexistant filename. */
int EGLStreamSendfd(int send_fd, int fd_to_send) {
struct msghdr msg;
struct iovec iov[1];
char ctrl_buf[CMSG_SPACE(sizeof(int))];
struct cmsghdr *cmsg = NULL;
void *data;
int res;
memset(&msg, 0, sizeof(msg));
iov[0].iov_len = 1; // must send at least 1 byte
iov[0].iov_base = (void *)"x"; // any byte value (value ignored)
msg.msg_iov = iov;
msg.msg_iovlen = 1;
memset(ctrl_buf, 0, sizeof(ctrl_buf));
msg.msg_control = ctrl_buf;
msg.msg_controllen = sizeof(ctrl_buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
data = CMSG_DATA(cmsg);
*(int *)data = fd_to_send;
msg.msg_controllen = cmsg->cmsg_len;
res = sendmsg(send_fd, &msg, 0);
if (res <= 0) {
printf("%s: sendmsg failed", __func__);
return -1;
}
return 0;
}
/* Listen on a unix domain socket named <socket_name>. */
/* Connect to it and return connect_fd */
int UnixSocketCreate(const char *socket_name) {
int listen_fd;
struct sockaddr_un sock_addr;
int connect_fd;
struct sockaddr_un connect_addr;
socklen_t connect_addr_len = 0;
listen_fd = socket(PF_UNIX, SOCK_STREAM, 0);
if (listen_fd < 0) {
printf("%s: socket create failed", __func__);
return -1;
}
if (verbose) printf("%s: listen_fd: %d\n", __func__, listen_fd);
unlink(socket_name);
memset(&sock_addr, 0, sizeof(struct sockaddr_un));
sock_addr.sun_family = AF_UNIX;
strncpy(sock_addr.sun_path, socket_name, sizeof(sock_addr.sun_path) - 1);
if (bind(listen_fd, (const struct sockaddr *)&sock_addr,
sizeof(struct sockaddr_un))) {
printf("i%s: bind error", __func__);
return -1;
}
if (listen(listen_fd, 1)) {
printf("%s: listen error", __func__);
return -1;
}
connect_fd =
accept(listen_fd, (struct sockaddr *)&connect_addr, &connect_addr_len);
if (verbose) printf("%s: connect_fd: %d\n", __func__, connect_fd);
close(listen_fd);
unlink(socket_name);
if (connect_fd < 0) {
printf("%s: accept failed\n", __func__);
return -1;
}
return connect_fd;
}
/* receive a file descriptor from another process. */
/* Returns the file descriptor. Note: the integer value */
/* of the file descriptor may be different from the */
/* integer value in the other process, but the file */
/* descriptors in each process will refer to the same file */
/* object in the kernel. */
int EGLStreamReceivefd(int connect_fd) {
struct msghdr msg;
struct iovec iov[1];
char msg_buf[1];
char ctrl_buf[CMSG_SPACE(sizeof(int))];
struct cmsghdr *cmsg;
void *data;
int recvfd;
memset(&msg, 0, sizeof(msg));
iov[0].iov_base = msg_buf;
iov[0].iov_len = sizeof(msg_buf);
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = ctrl_buf;
msg.msg_controllen = sizeof(ctrl_buf);
if (recvmsg(connect_fd, &msg, 0) <= 0) {
printf("%s: recvmsg failed", __func__);
return -1;
}
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg) {
printf("%s: NULL message header\n", __func__);
return -1;
}
if (cmsg->cmsg_level != SOL_SOCKET) {
printf("%s: Message level is not SOL_SOCKET\n", __func__);
return -1;
}
if (cmsg->cmsg_type != SCM_RIGHTS) {
printf("%s: Message type is not SCM_RIGHTS\n", __func__);
return -1;
}
data = CMSG_DATA(cmsg);
recvfd = *(int *)data;
return recvfd;
}
#endif

View File

@ -0,0 +1,113 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Common EGL stream functions header file
//
#ifndef _EGLSTRM_COMMON_H_
#define _EGLSTRM_COMMON_H_
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/un.h>
#include <time.h>
#include <unistd.h>
#include "cuda.h"
#include "cudaEGL.h"
#define TIME_DIFF(end, start) (getMicrosecond(end) - getMicrosecond(start))
extern EGLStreamKHR g_producerEglStream;
extern EGLStreamKHR g_consumerEglStream;
extern EGLDisplay g_producerEglDisplay;
extern EGLDisplay g_consumerEglDisplay;
extern int cudaDevIndexCons;
extern int cudaDevIndexProd;
extern bool verbose;
#define EXTENSION_LIST(T) \
T(PFNEGLCREATESTREAMKHRPROC, eglCreateStreamKHR) \
T(PFNEGLDESTROYSTREAMKHRPROC, eglDestroyStreamKHR) \
T(PFNEGLQUERYSTREAMKHRPROC, eglQueryStreamKHR) \
T(PFNEGLQUERYSTREAMU64KHRPROC, eglQueryStreamu64KHR) \
T(PFNEGLQUERYSTREAMTIMEKHRPROC, eglQueryStreamTimeKHR) \
T(PFNEGLSTREAMATTRIBKHRPROC, eglStreamAttribKHR) \
T(PFNEGLSTREAMCONSUMERACQUIREKHRPROC, eglStreamConsumerAcquireKHR) \
T(PFNEGLSTREAMCONSUMERRELEASEKHRPROC, eglStreamConsumerReleaseKHR) \
T(PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC, \
eglStreamConsumerGLTextureExternalKHR) \
T(PFNEGLQUERYDEVICESEXTPROC, eglQueryDevicesEXT) \
T(PFNEGLGETPLATFORMDISPLAYEXTPROC, eglGetPlatformDisplayEXT) \
T(PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC, eglGetStreamFileDescriptorKHR) \
T(PFNEGLQUERYDEVICEATTRIBEXTPROC, eglQueryDeviceAttribEXT) \
T(PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC, \
eglCreateStreamFromFileDescriptorKHR)
#define EXTLST_DECL(tx, x) tx x = NULL;
#define EXTLST_EXTERN(tx, x) extern tx x;
#define EXTLST_ENTRY(tx, x) {(extlst_fnptr_t *)&x, #x},
#define MAX_STRING_SIZE 256
#define INIT_DATA 0x01
#define PROD_DATA 0x07
#define CONS_DATA 0x04
#define SOCK_PATH "/tmp/tegra_sw_egl_socket"
typedef struct _TestArgs {
unsigned int charCnt;
bool isProducer;
} TestArgs;
extern int WIDTH, HEIGHT;
int eglSetupExtensions(bool is_dgpu);
int EGLStreamInit(bool isCrossDevice, int isConsumer,
EGLNativeFileDescriptorKHR fileDesc);
void EGLStreamFini(void);
int EGLStreamSetAttr(EGLDisplay display, EGLStreamKHR eglStream);
int UnixSocketConnect(const char *socket_name);
int EGLStreamSendfd(int send_fd, int fd_to_send);
int UnixSocketCreate(const char *socket_name);
int EGLStreamReceivefd(int connect_fd);
static clockid_t clock_id = CLOCK_MONOTONIC; // CLOCK_PROCESS_CPUTIME_ID;
static double getMicrosecond(struct timespec t) {
return ((t.tv_sec) * 1000000.0 + (t.tv_nsec) / 1.0e3);
}
static inline void getTime(struct timespec *t) { clock_gettime(clock_id, t); }
#endif

View File

@ -0,0 +1,160 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# findegl.mk is used to find the necessary EGL Libraries for specific distributions
# this is supported on Linux
#
################################################################################
# Determine OS platform and unix distribution
ifeq ("$(TARGET_OS)","linux")
# first search lsb_release
DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
ifeq ("$(DISTRO)","")
# second search and parse /etc/issue
DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]")
# ensure data from /etc/issue is valid
ifeq (,$(filter $(DISTRO),ubuntu fedora red rhel centos suse))
DISTRO =
endif
ifeq ("$(DISTRO)","")
# third, we can search in /etc/os-release or /etc/{distro}-release
DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
endif
endif
endif
ifeq ("$(TARGET_OS)","linux")
# $(info) >> findegl.mk -> LINUX path <<<)
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
ifeq ("$(UBUNTU)","0")
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
GLPATH := /usr/arm-linux-gnueabihf/lib
GLLINK := -L/usr/arm-linux-gnueabihf/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
GLPATH := /usr/aarch64-linux-gnu/lib
GLLINK := -L/usr/aarch64-linux-gnu/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
endif
else
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
ifneq ("$(UBUNTU_PKG_NAME)","")
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
endif
DFLT_PATH ?= /usr/lib
endif
endif
ifeq ("$(SUSE)","0")
GLPATH ?= /usr/X11R6/lib64
GLLINK ?= -L/usr/X11R6/lib64
DFLT_PATH ?= /usr/lib64
endif
ifeq ("$(FEDORA)","0")
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
ifeq ("$(RHEL)","0")
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
ifeq ("$(CENTOS)","0")
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
ifeq ("$(EGLLIB)","")
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
SAMPLE_ENABLED := 0
endif
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
endif
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
ifeq ("$(EGLHEADER)","")
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(EGLEXTHEADER)","")
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
SAMPLE_ENABLED := 0
endif
else
endif
ifeq ("$(TARGET_OS)","qnx")
HOST_CCFLAGS := -V5.4.0,gcc_ntoaarch64le
endif
# Attempt to compile a minimal EGL application and run to check if EGL_SUPPORT_REUSE_NV is supported in the EGL headers available.
ifneq ($(SAMPLE_ENABLED), 0)
$(shell printf "#include <EGL/egl.h>\n#include <EGL/eglext.h>\nint main() {\n#ifdef EGL_SUPPORT_REUSE_NV \n #error \"Compatible EGL header found\" \n return 0;\n#endif \n return 1;\n}" > test.c; )
EGL_DEFINES := $(shell $(HOST_COMPILER) $(HOST_CCFLAGS) $(CCFLAGS) $(EXTRA_CCFLAGS) -lEGL test.c -c 2>&1 | grep -ic "Compatible EGL header found";)
SHOULD_WAIVE := 0
ifeq ($(EGL_DEFINES),0)
SHOULD_WAIVE := 1
endif
ifeq ($(SHOULD_WAIVE),1)
$(info -----------------------------------------------------------------------------------------------)
$(info WARNING - NVIDIA EGL EXTENSIONS are not available in the present EGL headers)
$(info -----------------------------------------------------------------------------------------------)
$(info This CUDA Sample cannot be built if the EGL NVIDIA EXTENSIONS like EGL_SUPPORT_REUSE_NV are not supported in EGL headers.)
$(info This will be a dry-run of the Makefile.)
$(info Please install the latest khronos EGL headers and libs to build this sample)
$(info -----------------------------------------------------------------------------------------------)
SAMPLE_ENABLED := 0
endif
$(shell rm test.o test.c 2>/dev/null)
endif

View File

@ -0,0 +1,221 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "eglstrm_common.h"
#if defined(EXTENSION_LIST)
EXTENSION_LIST(EXTLST_EXTERN)
#endif
#include <cuda.h>
int parseCmdLine(int argc, char *argv[], TestArgs *args);
void printUsage(void);
int NUMTRIALS = 10;
int profileAPIs = 0;
bool verbose = 0;
bool isCrossDevice = 0;
// Parse the command line options. Returns FAILURE on a parse error, SUCCESS
// otherwise.
int parseCmdLine(int argc, char *argv[], TestArgs *args) {
int i;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "-h") == 0) {
printUsage();
exit(0);
} else if (strcmp(argv[i], "-n") == 0) {
++i;
if (sscanf(argv[i], "%d", &NUMTRIALS) != 1 || NUMTRIALS <= 0) {
printf("Invalid trial count: %s should be > 0\n", argv[i]);
return -1;
}
} else if (strcmp(argv[i], "-profile") == 0) {
profileAPIs = 1;
} else if (strcmp(argv[i], "-crossdev") == 0) {
isCrossDevice = 1;
} else if (strcmp(argv[i], "-width") == 0) {
++i;
if (sscanf(argv[i], "%d", &WIDTH) != 1 || (WIDTH <= 0)) {
printf("Width should be greater than 0\n");
return -1;
}
} else if (strcmp(argv[i], "-height") == 0) {
++i;
if (sscanf(argv[i], "%d", &HEIGHT) != 1 || (HEIGHT <= 0)) {
printf("Width should be greater than 0\n");
return -1;
}
} else if (0 == strcmp(&argv[i][1], "proctype")) {
++i;
if (!strcasecmp(argv[i], "prod")) {
args->isProducer = 1;
} else if (!strcasecmp(argv[i], "cons")) {
args->isProducer = 0;
} else {
printf("%s: Bad Process Type: %s\n", __func__, argv[i]);
return 1;
}
} else if (strcmp(argv[i], "-v") == 0) {
verbose = 1;
} else {
printf("Unknown option: %s\n", argv[i]);
return -1;
}
}
if (isCrossDevice) {
int deviceCount = 0;
CUresult error_id = cuInit(0);
if (error_id != CUDA_SUCCESS) {
printf("cuInit(0) returned %d\n", error_id);
printf("Result = FAIL\n");
exit(EXIT_FAILURE);
}
error_id = cuDeviceGetCount(&deviceCount);
if (error_id != CUDA_SUCCESS) {
printf("cuDeviceGetCount returned %d\n", (int)error_id);
printf("Result = FAIL\n");
exit(EXIT_FAILURE);
}
int iGPUexists = 0;
CUdevice dev;
for (dev = 0; dev < deviceCount; ++dev) {
int integrated = 0;
CUresult error_result = cuDeviceGetAttribute(
&integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
if (error_result != CUDA_SUCCESS) {
printf("cuDeviceGetAttribute returned error : %d\n", (int)error_result);
exit(EXIT_FAILURE);
}
if (integrated) {
iGPUexists = 1;
}
}
if (!iGPUexists) {
printf("No Integrated GPU found in the system.\n");
printf(
"-crossdev option is only supported on systems with an Integrated "
"GPU and a Discrete GPU\n");
printf("Waiving the execution\n");
exit(EXIT_SUCCESS);
}
}
if (!eglSetupExtensions(isCrossDevice)) {
printf("SetupExtentions failed \n");
exit(EXIT_FAILURE);
}
#define MAX_EGL_DEVICES 4
EGLDeviceEXT devices[MAX_EGL_DEVICES];
EGLint numDevices = 0;
EGLBoolean eglStatus =
eglQueryDevicesEXT(MAX_EGL_DEVICES, devices, &numDevices);
if (eglStatus != EGL_TRUE) {
printf("Error querying EGL devices\n");
exit(EXIT_FAILURE);
}
if (numDevices == 0) {
printf("No EGL devices found\n");
eglStatus = EGL_FALSE;
exit(2); // EXIT_WAIVED
}
int egl_device_id = 0;
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
EGLAttrib cuda_device;
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
EGL_CUDA_DEVICE_NV, &cuda_device);
if (eglStatus == EGL_TRUE) {
break;
}
}
if (egl_device_id >= numDevices) {
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
exit(2); // EXIT_WAIVED
}
if (isCrossDevice) {
if (numDevices == 1) {
printf(
"Found only one EGL device, cannot setup cross GPU streams. "
"Waiving\n");
eglStatus = EGL_FALSE;
exit(2); // EXIT_WAIVED
}
}
return 0;
}
void launchProducer(TestArgs *args) {
/* Cross-process creation of producer */
char argsProducer[1024];
char str[256];
strcpy(argsProducer, "./EGLStream_CUDA_CrossGPU -proctype prod ");
if (isCrossDevice) {
sprintf(str, "-crossdev ");
strcat(argsProducer, str);
}
if (verbose) {
sprintf(str, "-v ");
strcat(argsProducer, str);
}
/*Make the process run in bg*/
strcat(argsProducer, "& ");
printf("\n%s: Crossproc Producer command: %s \n", __func__, argsProducer);
/*Create crossproc Producer*/
system(argsProducer);
/*Enable crossproc Consumer in the same process */
args->isProducer = 0;
}
void printUsage(void) {
printf("Usage:\n");
printf(" -h Print this help message\n");
printf(" -n n Exit after running n trials. Set to 10 by default\n");
printf(
" -profile Profile time taken by ReleaseAPI. Not set by default\n");
printf(" -crossdev Run with producer on idgpu and consumer on dgpu\n");
printf(" -dgpu (same as -crossdev, deprecated)\n");
printf(" -v verbose output\n");
}

View File

@ -0,0 +1,140 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// DESCRIPTION: Simple CUDA consumer rendering sample app
//
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <string.h>
#include "eglstrm_common.h"
extern bool isCrossDevice;
__device__ static unsigned int numErrors = 0, errorFound = 0;
__device__ void checkProducerDataGPU(char *data, int size, char expectedVal,
int frameNumber) {
if ((data[blockDim.x * blockIdx.x + threadIdx.x] != expectedVal) &&
(!errorFound)) {
printf("Producer FOUND:%d expected: %d at %d for trial %d %d\n",
data[blockDim.x * blockIdx.x + threadIdx.x], expectedVal,
(blockDim.x * blockIdx.x + threadIdx.x), frameNumber, numErrors);
numErrors++;
errorFound = 1;
return;
}
}
__device__ void checkConsumerDataGPU(char *data, int size, char expectedVal,
int frameNumber) {
if ((data[blockDim.x * blockIdx.x + threadIdx.x] != expectedVal) &&
(!errorFound)) {
printf("Consumer FOUND:%d expected: %d at %d for trial %d %d\n",
data[blockDim.x * blockIdx.x + threadIdx.x], expectedVal,
(blockDim.x * blockIdx.x + threadIdx.x), frameNumber, numErrors);
numErrors++;
errorFound = 1;
return;
}
}
__global__ void writeDataToBuffer(char *pSrc, char newVal) {
pSrc[blockDim.x * blockIdx.x + threadIdx.x] = newVal;
}
__global__ void testKernelConsumer(char *pSrc, char size, char expectedVal,
char newVal, int frameNumber) {
checkConsumerDataGPU(pSrc, size, expectedVal, frameNumber);
}
__global__ void testKernelProducer(char *pSrc, char size, char expectedVal,
char newVal, int frameNumber) {
checkProducerDataGPU(pSrc, size, expectedVal, frameNumber);
}
__global__ void getNumErrors(int *numErr) { *numErr = numErrors; }
cudaError_t cudaProducer_filter(cudaStream_t pStream, char *pSrc, int width,
int height, char expectedVal, char newVal,
int frameNumber) {
// in case where consumer is on dgpu and producer is on igpu when return is
// called the frame is not copied back to igpu. So the consumer changes is not
// visible to producer
if (isCrossDevice == 0) {
testKernelProducer<<<(width * height) / 1024, 1024, 1, pStream>>>(
pSrc, width * height, expectedVal, newVal, frameNumber);
}
writeDataToBuffer<<<(width * height) / 1024, 1024, 1, pStream>>>(pSrc,
newVal);
return cudaSuccess;
};
cudaError_t cudaConsumer_filter(cudaStream_t cStream, char *pSrc, int width,
int height, char expectedVal, char newVal,
int frameNumber) {
testKernelConsumer<<<(width * height) / 1024, 1024, 1, cStream>>>(
pSrc, width * height, expectedVal, newVal, frameNumber);
writeDataToBuffer<<<(width * height) / 1024, 1024, 1, cStream>>>(pSrc,
newVal);
return cudaSuccess;
};
cudaError_t cudaGetValueMismatch() {
int numErr_h;
int *numErr_d = NULL;
cudaError_t err = cudaSuccess;
err = cudaMalloc(&numErr_d, sizeof(int));
if (err != cudaSuccess) {
printf("Cuda Main: cudaMalloc failed with %s\n", cudaGetErrorString(err));
return err;
}
getNumErrors<<<1, 1>>>(numErr_d);
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
printf("Cuda Main: cudaDeviceSynchronize failed with %s\n",
cudaGetErrorString(err));
}
err = cudaMemcpy(&numErr_h, numErr_d, sizeof(int), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
printf("Cuda Main: cudaMemcpy failed with %s\n", cudaGetErrorString(err));
cudaFree(numErr_d);
return err;
}
err = cudaFree(numErr_d);
if (err != cudaSuccess) {
printf("Cuda Main: cudaFree failed with %s\n", cudaGetErrorString(err));
return err;
}
if (numErr_h > 0) {
return cudaErrorUnknown;
}
return cudaSuccess;
}

View File

@ -0,0 +1,392 @@
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cudaEGL.h"
#include "cuda_consumer.h"
#include "cuda_producer.h"
#include "eglstrm_common.h"
#include "helper.h"
#if defined(EXTENSION_LIST)
EXTENSION_LIST(EXTLST_EXTERN)
#endif
bool signal_stop = 0;
extern bool verbose;
static void sig_handler(int sig) {
signal_stop = 1;
printf("Signal: %d\n", sig);
}
void DoneCons(int consumerStatus, int send_fd) {
EGLStreamFini();
// get the final status from producer, combine and print
int producerStatus = -1;
if (-1 == recv(send_fd, (void *)&producerStatus, sizeof(int), 0)) {
printf("%s: Cuda Consumer could not receive status from producer.\n",
__func__);
}
close(send_fd);
if (producerStatus == 0 && consumerStatus == 0) {
printf("&&&& EGLStream_CUDA_CrossGPU PASSED\n");
exit(EXIT_SUCCESS);
} else {
printf("&&&& EGLStream_CUDA_CrossGPU FAILED\n");
exit(EXIT_FAILURE);
}
}
void DoneProd(int producerStatus, int connect_fd) {
EGLStreamFini();
if (-1 == send(connect_fd, (void *)&producerStatus, sizeof(int), 0)) {
printf("%s: Cuda Producer could not send status to consumer.\n", __func__);
}
close(connect_fd);
if (producerStatus == 0) {
exit(EXIT_SUCCESS);
} else {
exit(EXIT_FAILURE);
}
}
int WIDTH = 8192, HEIGHT = 8192;
int main(int argc, char **argv) {
TestArgs args = {0, false};
CUresult curesult = CUDA_SUCCESS;
unsigned int j = 0;
cudaError_t err = cudaSuccess;
EGLNativeFileDescriptorKHR fileDescriptor = EGL_NO_FILE_DESCRIPTOR_KHR;
struct timespec start, end;
CUeglFrame cudaEgl1, cudaEgl2;
int consumerStatus = 0;
int send_fd = -1;
if (parseCmdLine(argc, argv, &args) < 0) {
printUsage();
curesult = CUDA_ERROR_UNKNOWN;
DoneCons(consumerStatus, send_fd);
}
printf("Width : %u, height: %u and iterations: %u\n", WIDTH, HEIGHT,
NUMTRIALS);
if (!args.isProducer) // Consumer code
{
test_cuda_consumer_s cudaConsumer;
memset(&cudaConsumer, 0, sizeof(test_cuda_consumer_s));
cudaConsumer.profileAPI = profileAPIs;
// Hook up Ctrl-C handler
signal(SIGINT, sig_handler);
if (!EGLStreamInit(isCrossDevice, !args.isProducer,
EGL_NO_FILE_DESCRIPTOR_KHR)) {
printf("EGLStream Init failed.\n");
curesult = CUDA_ERROR_UNKNOWN;
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
cudaConsumer.cudaDevId = cudaDevIndexCons;
curesult = cudaDeviceCreateConsumer(&cudaConsumer);
if (curesult != CUDA_SUCCESS) {
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
cuCtxPushCurrent(cudaConsumer.context);
launchProducer(&args);
args.charCnt = WIDTH * HEIGHT * 4;
curesult = cuda_consumer_init(&cudaConsumer, &args);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Consumer: Init failed, status: %d\n", curesult);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
cuCtxPopCurrent(&cudaConsumer.context);
send_fd = UnixSocketConnect(SOCK_PATH);
if (-1 == send_fd) {
printf("%s: Cuda Consumer cannot create socket %s\n", __func__,
SOCK_PATH);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
cuCtxPushCurrent(cudaConsumer.context);
cudaConsumer.eglStream = g_consumerEglStream;
cudaConsumer.eglDisplay = g_consumerEglDisplay;
// Send the EGL stream FD to producer
fileDescriptor = eglGetStreamFileDescriptorKHR(cudaConsumer.eglDisplay,
cudaConsumer.eglStream);
if (EGL_NO_FILE_DESCRIPTOR_KHR == fileDescriptor) {
printf("%s: Cuda Consumer could not get EGL file descriptor.\n",
__func__);
eglDestroyStreamKHR(cudaConsumer.eglDisplay, cudaConsumer.eglStream);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
if (verbose)
printf("%s: Cuda Consumer EGL stream FD obtained : %d.\n", __func__,
fileDescriptor);
int res = -1;
res = EGLStreamSendfd(send_fd, fileDescriptor);
if (-1 == res) {
printf("%s: Cuda Consumer could not send EGL file descriptor.\n",
__func__);
consumerStatus = -1;
close(fileDescriptor);
}
if (CUDA_SUCCESS !=
(curesult = cuEGLStreamConsumerConnect(&(cudaConsumer.cudaConn),
cudaConsumer.eglStream))) {
printf("FAILED Connect CUDA consumer with error %d\n", curesult);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
j = 0;
for (j = 0; j < NUMTRIALS; j++) {
curesult = cudaConsumerAcquireFrame(&cudaConsumer, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
curesult = cudaConsumerReleaseFrame(&cudaConsumer, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
curesult = cudaConsumerAcquireFrame(&cudaConsumer, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
curesult = cudaConsumerReleaseFrame(&cudaConsumer, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
}
cuCtxSynchronize();
close(fileDescriptor);
err = cudaGetValueMismatch();
if (err != cudaSuccess) {
printf("Consumer: App failed with value mismatch\n");
curesult = CUDA_ERROR_UNKNOWN;
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
EGLint streamState = 0;
if (!eglQueryStreamKHR(cudaConsumer.eglDisplay, cudaConsumer.eglStream,
EGL_STREAM_STATE_KHR, &streamState)) {
printf("Main, eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
curesult = CUDA_ERROR_UNKNOWN;
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
if (streamState != EGL_STREAM_STATE_DISCONNECTED_KHR) {
if (CUDA_SUCCESS != (curesult = cuda_consumer_Deinit(&cudaConsumer))) {
printf("Consumer Disconnect FAILED.\n");
consumerStatus = -1;
DoneCons(consumerStatus, send_fd);
}
}
} else // Producer
{
test_cuda_producer_s cudaProducer;
memset(&cudaProducer, 0, sizeof(test_cuda_producer_s));
cudaProducer.profileAPI = profileAPIs;
int producerStatus = 0;
setenv("CUDA_EGL_PRODUCER_RETURN_WAIT_TIMEOUT", "1600", 0);
int connect_fd = -1;
// Hook up Ctrl-C handler
signal(SIGINT, sig_handler);
// Create connection to Consumer
connect_fd = UnixSocketCreate(SOCK_PATH);
if (-1 == connect_fd) {
printf("%s: Cuda Producer could not create socket: %s.\n", __func__,
SOCK_PATH);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
// Get the file descriptor of the stream from the consumer process
// and re-create the EGL stream from it
fileDescriptor = EGLStreamReceivefd(connect_fd);
if (-1 == fileDescriptor) {
printf("%s: Cuda Producer could not receive EGL file descriptor \n",
__func__);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
if (!EGLStreamInit(isCrossDevice, 0, fileDescriptor)) {
printf("EGLStream Init failed.\n");
producerStatus = -1;
curesult = CUDA_ERROR_UNKNOWN;
DoneProd(producerStatus, connect_fd);
}
cudaProducer.eglDisplay = g_producerEglDisplay;
cudaProducer.eglStream = g_producerEglStream;
cudaProducer.cudaDevId = cudaDevIndexProd;
curesult = cudaDeviceCreateProducer(&cudaProducer);
if (curesult != CUDA_SUCCESS) {
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
args.charCnt = WIDTH * HEIGHT * 4;
cuCtxPushCurrent(cudaProducer.context);
curesult = cudaProducerInit(&cudaProducer, &args);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Producer: Init failed, status: %d\n", curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
// wait for consumer to connect first
int err = 0;
int wait_loop = 0;
EGLint streamState = 0;
do {
err = eglQueryStreamKHR(cudaProducer.eglDisplay, cudaProducer.eglStream,
EGL_STREAM_STATE_KHR, &streamState);
if ((0 != err) && (EGL_STREAM_STATE_CONNECTING_KHR != streamState)) {
sleep(1);
wait_loop++;
}
} while ((wait_loop < 10) && (0 != err) &&
(streamState != EGL_STREAM_STATE_CONNECTING_KHR));
if ((0 == err) || (wait_loop >= 10)) {
printf(
"%s: Cuda Producer eglQueryStreamKHR EGL_STREAM_STATE_KHR failed.\n",
__func__);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
if (CUDA_SUCCESS != (curesult = cuEGLStreamProducerConnect(
&(cudaProducer.cudaConn), cudaProducer.eglStream,
WIDTH, HEIGHT))) {
printf("Connect CUDA producer FAILED with error %d\n", curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
printf("main - Cuda Producer and Consumer Initialized.\n");
cudaProducerPrepareFrame(&cudaEgl1, cudaProducer.cudaPtr, args.charCnt);
cudaProducerPrepareFrame(&cudaEgl2, cudaProducer.cudaPtr1, args.charCnt);
j = 0;
for (j = 0; j < NUMTRIALS; j++) {
curesult = cudaProducerPresentFrame(&cudaProducer, cudaEgl1, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
j + 1, curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
curesult = cudaProducerPresentFrame(&cudaProducer, cudaEgl2, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
j + 1, curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
curesult = cudaProducerReturnFrame(&cudaProducer, cudaEgl1, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
j + 1, curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
curesult = cudaProducerReturnFrame(&cudaProducer, cudaEgl2, j);
if (curesult != CUDA_SUCCESS) {
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
j + 1, curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
}
cuCtxSynchronize();
err = cudaGetValueMismatch();
if (err != cudaSuccess) {
printf("Prod: App failed with value mismatch\n");
curesult = CUDA_ERROR_UNKNOWN;
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
printf("Tear Down Start.....\n");
if (!eglQueryStreamKHR(cudaProducer.eglDisplay, cudaProducer.eglStream,
EGL_STREAM_STATE_KHR, &streamState)) {
printf("Main, eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
curesult = CUDA_ERROR_UNKNOWN;
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
if (streamState != EGL_STREAM_STATE_DISCONNECTED_KHR) {
if (CUDA_SUCCESS != (curesult = cudaProducerDeinit(&cudaProducer))) {
printf("Producer Disconnect FAILED with %d\n", curesult);
producerStatus = -1;
DoneProd(producerStatus, connect_fd);
}
}
unsetenv("CUDA_EGL_PRODUCER_RETURN_WAIT_TIMEOUT");
}
return 0;
}

View File

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -0,0 +1,7 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -0,0 +1,10 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/EGLStream_CUDA_Interop"
}
]
}

View File

@ -0,0 +1,15 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -0,0 +1,433 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
SAMPLE_ENABLED := 1
# This sample is not supported on Mac OSX
ifeq ($(TARGET_OS),darwin)
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on Mac OSX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on ARMv7
ifeq ($(TARGET_ARCH),armv7l)
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on ARMv7 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on android
ifeq ($(TARGET_OS),android)
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on android - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
# Makefile include to help find EGL Libraries
include ./findegl.mk
# EGL specific libraries
ifneq ($(TARGET_OS),darwin)
LIBRARIES += -lEGL
endif
#Detect if installed version of GCC supports required C++11
ifeq ($(TARGET_OS),linux)
empty :=
space := $(empty) $(empty)
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
#Create version number without "."
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
# Make sure the version number has at least 3 decimals
GCCVERSION += 00
# Remove spaces from the version number
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
#$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
else
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
SAMPLE_ENABLED := 0
endif
endif
ifeq ($(TARGET_OS),darwin)
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
else
ifeq ($(TARGET_ARCH),x86_64)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
ifdef TARGET_OVERRIDE
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
endif
endif
ifeq ($(TARGET_ARCH),ppc64le)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
endif
ifeq ($(HOST_ARCH),ppc64le)
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
endif
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
SAMPLE_ENABLED := 0
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda
endif
endif
ALL_CCFLAGS += --threads 0 --std=c++11
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: EGLStream_CUDA_Interop
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
cuda_consumer.o:cuda_consumer.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
cuda_producer.o:cuda_producer.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
eglstrm_common.o:eglstrm_common.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
main.o:main.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
EGLStream_CUDA_Interop: cuda_consumer.o cuda_producer.o eglstrm_common.o main.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./EGLStream_CUDA_Interop
clean:
rm -f EGLStream_CUDA_Interop cuda_consumer.o cuda_producer.o eglstrm_common.o main.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/EGLStream_CUDA_Interop
clobber: clean

Some files were not shown because too many files have changed in this diff Show More