mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 12:29:16 +08:00
add and update samples for CUDA 11.5
This commit is contained in:
parent
3342d604fe
commit
1f76a2d110
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
294
Common/dynlink_d3d10.h
Normal file
294
Common/dynlink_d3d10.h
Normal file
|
@ -0,0 +1,294 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// File: dynlink_d3d10.h
|
||||
//
|
||||
// Shortcut macros and functions for using DX objects
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _DYNLINK_D3D10_H_
|
||||
#define _DYNLINK_D3D10_H_
|
||||
|
||||
// Standard Windows includes
|
||||
#include <windows.h>
|
||||
#include <initguid.h>
|
||||
#include <assert.h>
|
||||
#include <wchar.h>
|
||||
#include <mmsystem.h>
|
||||
#include <commctrl.h> // for InitCommonControls()
|
||||
#include <shellapi.h> // for ExtractIcon()
|
||||
#include <new.h> // for placement new
|
||||
#include <shlobj.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// CRT's memory leak detection
|
||||
#if defined(DEBUG) || defined(_DEBUG)
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
// Direct3D9 includes
|
||||
#include <d3d9.h>
|
||||
|
||||
// Direct3D10 includes
|
||||
#include <dxgi.h>
|
||||
#include <d3d10_1.h>
|
||||
#include <d3d10.h>
|
||||
|
||||
// XInput includes
|
||||
#include <xinput.h>
|
||||
|
||||
// strsafe.h deprecates old unsecure string functions. If you
|
||||
// really do not want to it to (not recommended), then uncomment the next line
|
||||
//#define STRSAFE_NO_DEPRECATE
|
||||
|
||||
#ifndef STRSAFE_NO_DEPRECATE
|
||||
#pragma deprecated("strncpy")
|
||||
#pragma deprecated("wcsncpy")
|
||||
#pragma deprecated("_tcsncpy")
|
||||
#pragma deprecated("wcsncat")
|
||||
#pragma deprecated("strncat")
|
||||
#pragma deprecated("_tcsncat")
|
||||
#endif
|
||||
|
||||
#pragma warning( disable : 4996 ) // disable deprecated warning
|
||||
#include <strsafe.h>
|
||||
#pragma warning( default : 4996 )
|
||||
|
||||
#include <DirectXMath.h>
|
||||
|
||||
using namespace DirectX;
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Structs
|
||||
//--------------------------------------------------------------------------------------
|
||||
struct DXUTD3D9DeviceSettings
|
||||
{
|
||||
UINT AdapterOrdinal;
|
||||
D3DDEVTYPE DeviceType;
|
||||
D3DFORMAT AdapterFormat;
|
||||
DWORD BehaviorFlags;
|
||||
D3DPRESENT_PARAMETERS pp;
|
||||
};
|
||||
|
||||
struct DXUTD3D10DeviceSettings
|
||||
{
|
||||
UINT AdapterOrdinal;
|
||||
D3D10_DRIVER_TYPE DriverType;
|
||||
UINT Output;
|
||||
DXGI_SWAP_CHAIN_DESC sd;
|
||||
UINT32 CreateFlags;
|
||||
UINT32 SyncInterval;
|
||||
DWORD PresentFlags;
|
||||
bool AutoCreateDepthStencil; // DXUT will create the a depth stencil resource and view if true
|
||||
DXGI_FORMAT AutoDepthStencilFormat;
|
||||
};
|
||||
|
||||
enum DXUTDeviceVersion { DXUT_D3D9_DEVICE, DXUT_D3D10_DEVICE };
|
||||
struct DXUTDeviceSettings
|
||||
{
|
||||
DXUTDeviceVersion ver;
|
||||
union
|
||||
{
|
||||
DXUTD3D9DeviceSettings d3d9; // only valid if ver == DXUT_D3D9_DEVICE
|
||||
DXUTD3D10DeviceSettings d3d10; // only valid if ver == DXUT_D3D10_DEVICE
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// Error codes
|
||||
//--------------------------------------------------------------------------------------
|
||||
#define DXUTERR_NODIRECT3D MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0901)
|
||||
#define DXUTERR_NOCOMPATIBLEDEVICES MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0902)
|
||||
#define DXUTERR_MEDIANOTFOUND MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0903)
|
||||
#define DXUTERR_NONZEROREFCOUNT MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0904)
|
||||
#define DXUTERR_CREATINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0905)
|
||||
#define DXUTERR_RESETTINGDEVICE MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0906)
|
||||
#define DXUTERR_CREATINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0907)
|
||||
#define DXUTERR_RESETTINGDEVICEOBJECTS MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x0908)
|
||||
#define DXUTERR_DEVICEREMOVED MAKE_HRESULT(SEVERITY_ERROR, FACILITY_ITF, 0x090A)
|
||||
|
||||
|
||||
typedef HRESULT(WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT, UINT32,
|
||||
ID3D10Device **);
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICE1)(IDXGIAdapter *, D3D10_DRIVER_TYPE, HMODULE, UINT,
|
||||
D3D10_FEATURE_LEVEL1, UINT, ID3D10Device1 **);
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATESTATEBLOCK)(ID3D10Device *pDevice, D3D10_STATE_BLOCK_MASK *pStateBlockMask,
|
||||
ID3D10StateBlock **ppStateBlock);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKUNION)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
||||
D3D10_STATE_BLOCK_MASK *pResult);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKINTERSECT)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
||||
D3D10_STATE_BLOCK_MASK *pResult);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDIFFERENCE)(D3D10_STATE_BLOCK_MASK *pA, D3D10_STATE_BLOCK_MASK *pB,
|
||||
D3D10_STATE_BLOCK_MASK *pResult);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
|
||||
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
|
||||
UINT RangeLength);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLECAPTURE)(D3D10_STATE_BLOCK_MASK *pMask,
|
||||
D3D10_DEVICE_STATE_TYPES StateType, UINT RangeStart,
|
||||
UINT RangeLength);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKENABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
|
||||
typedef HRESULT(WINAPI *LPD3D10STATEBLOCKMASKDISABLEALL)(D3D10_STATE_BLOCK_MASK *pMask);
|
||||
typedef BOOL (WINAPI *LPD3D10STATEBLOCKMASKGETSETTING)(D3D10_STATE_BLOCK_MASK *pMask,
|
||||
D3D10_DEVICE_STATE_TYPES StateType, UINT Entry);
|
||||
|
||||
typedef HRESULT(WINAPI *LPD3D10COMPILEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, LPCSTR pSrcFileName,
|
||||
CONST D3D10_SHADER_MACRO *pDefines,
|
||||
ID3D10Include *pInclude, UINT HLSLFlags, UINT FXFlags,
|
||||
ID3D10Blob **ppCompiledEffect, ID3D10Blob **ppErrors);
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
|
||||
ID3D10Device *pDevice,
|
||||
ID3D10EffectPool *pEffectPool,
|
||||
ID3D10Effect **ppEffect);
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEEFFECTPOOLFROMMEMORY)(void *pData, SIZE_T DataLength, UINT FXFlags,
|
||||
ID3D10Device *pDevice, ID3D10EffectPool **ppEffectPool);
|
||||
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN)(IDXGIAdapter *pAdapter,
|
||||
D3D10_DRIVER_TYPE DriverType,
|
||||
HMODULE Software,
|
||||
UINT Flags,
|
||||
UINT SDKVersion,
|
||||
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
|
||||
IDXGISwapChain **ppSwapChain,
|
||||
ID3D10Device **ppDevice);
|
||||
|
||||
typedef HRESULT(WINAPI *LPD3D10CREATEDEVICEANDSWAPCHAIN1)(IDXGIAdapter *pAdapter,
|
||||
D3D10_DRIVER_TYPE DriverType,
|
||||
HMODULE Software,
|
||||
UINT Flags,
|
||||
D3D10_FEATURE_LEVEL1 HardwareLevel,
|
||||
UINT SDKVersion,
|
||||
DXGI_SWAP_CHAIN_DESC *pSwapChainDesc,
|
||||
IDXGISwapChain **ppSwapChain,
|
||||
ID3D10Device1 **ppDevice);
|
||||
|
||||
// Module and function pointers
|
||||
static HMODULE g_hModDXGI = NULL;
|
||||
static HMODULE g_hModD3D10 = NULL;
|
||||
static HMODULE g_hModD3D101 = NULL;
|
||||
static LPCREATEDXGIFACTORY sFnPtr_CreateDXGIFactory = NULL;
|
||||
static LPD3D10CREATESTATEBLOCK sFnPtr_D3D10CreateStateBlock = NULL;
|
||||
static LPD3D10CREATEDEVICE sFnPtr_D3D10CreateDevice = NULL;
|
||||
static LPD3D10CREATEDEVICE1 sFnPtr_D3D10CreateDevice1 = NULL;
|
||||
static LPD3D10STATEBLOCKMASKUNION sFnPtr_D3D10StateBlockMaskUnion = NULL;
|
||||
static LPD3D10STATEBLOCKMASKINTERSECT sFnPtr_D3D10StateBlockMaskIntersect = NULL;
|
||||
static LPD3D10STATEBLOCKMASKDIFFERENCE sFnPtr_D3D10StateBlockMaskDifference = NULL;
|
||||
static LPD3D10STATEBLOCKMASKENABLECAPTURE sFnPtr_D3D10StateBlockMaskEnableCapture = NULL;
|
||||
static LPD3D10STATEBLOCKMASKDISABLECAPTURE sFnPtr_D3D10StateBlockMaskDisableCapture = NULL;
|
||||
static LPD3D10STATEBLOCKMASKENABLEALL sFnPtr_D3D10StateBlockMaskEnableAll = NULL;
|
||||
static LPD3D10STATEBLOCKMASKDISABLEALL sFnPtr_D3D10StateBlockMaskDisableAll = NULL;
|
||||
static LPD3D10STATEBLOCKMASKGETSETTING sFnPtr_D3D10StateBlockMaskGetSetting = NULL;
|
||||
static LPD3D10COMPILEEFFECTFROMMEMORY sFnPtr_D3D10CompileEffectFromMemory = NULL;
|
||||
static LPD3D10CREATEEFFECTFROMMEMORY sFnPtr_D3D10CreateEffectFromMemory = NULL;
|
||||
static LPD3D10CREATEEFFECTPOOLFROMMEMORY sFnPtr_D3D10CreateEffectPoolFromMemory = NULL;
|
||||
static LPD3D10CREATEDEVICEANDSWAPCHAIN sFnPtr_D3D10CreateDeviceAndSwapChain = NULL;
|
||||
static LPD3D10CREATEDEVICEANDSWAPCHAIN1 sFnPtr_D3D10CreateDeviceAndSwapChain1 = NULL;
|
||||
|
||||
// unload the D3D10 DLLs
|
||||
static bool dynlinkUnloadD3D10API(void)
|
||||
{
|
||||
if (g_hModD3D10)
|
||||
{
|
||||
FreeLibrary(g_hModD3D10);
|
||||
g_hModD3D10 = NULL;
|
||||
}
|
||||
|
||||
if (g_hModDXGI)
|
||||
{
|
||||
FreeLibrary(g_hModDXGI);
|
||||
g_hModDXGI = NULL;
|
||||
}
|
||||
|
||||
if (g_hModD3D101)
|
||||
{
|
||||
FreeLibrary(g_hModD3D101);
|
||||
g_hModD3D101 = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Dynamically load the D3D10 DLLs loaded and map the function pointers
|
||||
static bool dynlinkLoadD3D10API(void)
|
||||
{
|
||||
// First check to see if the D3D10 Library is present.
|
||||
// if it succeeds, then we can call GetProcAddress to grab all of the DX10 functions
|
||||
g_hModD3D10 = LoadLibrary("d3d10.dll");
|
||||
|
||||
if (g_hModD3D10 != NULL)
|
||||
{
|
||||
sFnPtr_D3D10CreateStateBlock = (LPD3D10CREATESTATEBLOCK) GetProcAddress(g_hModD3D10, "D3D10CreateStateBlock");
|
||||
sFnPtr_D3D10CreateDevice = (LPD3D10CREATEDEVICE) GetProcAddress(g_hModD3D10, "D3D10CreateDevice");
|
||||
|
||||
sFnPtr_D3D10StateBlockMaskUnion = (LPD3D10STATEBLOCKMASKUNION) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskUnion");
|
||||
sFnPtr_D3D10StateBlockMaskIntersect = (LPD3D10STATEBLOCKMASKINTERSECT) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskIntersect");
|
||||
sFnPtr_D3D10StateBlockMaskDifference = (LPD3D10STATEBLOCKMASKDIFFERENCE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDifference");
|
||||
sFnPtr_D3D10StateBlockMaskEnableCapture = (LPD3D10STATEBLOCKMASKENABLECAPTURE) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableCapture");
|
||||
sFnPtr_D3D10StateBlockMaskDisableCapture = (LPD3D10STATEBLOCKMASKDISABLECAPTURE)GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableCapture");
|
||||
|
||||
sFnPtr_D3D10StateBlockMaskEnableAll = (LPD3D10STATEBLOCKMASKENABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskEnableAll");
|
||||
sFnPtr_D3D10StateBlockMaskDisableAll = (LPD3D10STATEBLOCKMASKDISABLEALL) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskDisableAll");
|
||||
sFnPtr_D3D10StateBlockMaskGetSetting = (LPD3D10STATEBLOCKMASKGETSETTING) GetProcAddress(g_hModD3D10, "D3D10StateBlockMaskGetSetting");
|
||||
|
||||
sFnPtr_D3D10CompileEffectFromMemory = (LPD3D10COMPILEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CompileEffectFromMemory");
|
||||
sFnPtr_D3D10CreateEffectFromMemory = (LPD3D10CREATEEFFECTFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectFromMemory");
|
||||
sFnPtr_D3D10CreateEffectPoolFromMemory = (LPD3D10CREATEEFFECTPOOLFROMMEMORY) GetProcAddress(g_hModD3D10, "D3D10CreateEffectPoolFromMemory");
|
||||
|
||||
sFnPtr_D3D10CreateDeviceAndSwapChain = (LPD3D10CREATEDEVICEANDSWAPCHAIN) GetProcAddress(g_hModD3D10, "D3D10CreateDeviceAndSwapChain");
|
||||
}
|
||||
|
||||
g_hModDXGI = LoadLibrary("dxgi.dll");
|
||||
|
||||
if (g_hModDXGI)
|
||||
{
|
||||
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY) GetProcAddress(g_hModDXGI , "CreateDXGIFactory");
|
||||
}
|
||||
|
||||
// This may fail if this machine isn't Windows Vista SP1 or later
|
||||
g_hModD3D101 = LoadLibrary("d3d10_1.dll");
|
||||
|
||||
if (g_hModD3D101 != NULL)
|
||||
{
|
||||
sFnPtr_D3D10CreateDevice1 = (LPD3D10CREATEDEVICE1) GetProcAddress(g_hModD3D101, "D3D10CreateDevice1");
|
||||
sFnPtr_D3D10CreateDeviceAndSwapChain1 = (LPD3D10CREATEDEVICEANDSWAPCHAIN1) GetProcAddress(g_hModD3D101, "D3D10CreateDeviceAndSwapChain1");
|
||||
}
|
||||
|
||||
if (g_hModD3D10 == NULL || g_hModDXGI == NULL || g_hModD3D101 == NULL)
|
||||
{
|
||||
dynlinkUnloadD3D10API();
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
78
Common/multithreading.cpp
Normal file
78
Common/multithreading.cpp
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <multithreading.h>
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
// Create thread
|
||||
CUTThread cutStartThread(CUT_THREADROUTINE func, void *data) {
|
||||
return CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, data, 0, NULL);
|
||||
}
|
||||
|
||||
// Wait for thread to finish
|
||||
void cutEndThread(CUTThread thread) {
|
||||
WaitForSingleObject(thread, INFINITE);
|
||||
CloseHandle(thread);
|
||||
}
|
||||
|
||||
// Destroy thread
|
||||
void cutDestroyThread(CUTThread thread) {
|
||||
TerminateThread(thread, 0);
|
||||
CloseHandle(thread);
|
||||
}
|
||||
|
||||
// Wait for multiple threads
|
||||
void cutWaitForThreads(const CUTThread *threads, int num) {
|
||||
WaitForMultipleObjects(num, threads, true, INFINITE);
|
||||
|
||||
for (int i = 0; i < num; i++) {
|
||||
CloseHandle(threads[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
// Create thread
|
||||
CUTThread cutStartThread(CUT_THREADROUTINE func, void *data) {
|
||||
pthread_t thread;
|
||||
pthread_create(&thread, NULL, func, data);
|
||||
return thread;
|
||||
}
|
||||
|
||||
// Wait for thread to finish
|
||||
void cutEndThread(CUTThread thread) { pthread_join(thread, NULL); }
|
||||
|
||||
// Destroy thread
|
||||
void cutDestroyThread(CUTThread thread) { pthread_cancel(thread); }
|
||||
|
||||
// Wait for multiple threads
|
||||
void cutWaitForThreads(const CUTThread *threads, int num) {
|
||||
for (int i = 0; i < num; i++) {
|
||||
cutEndThread(threads[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
76
Common/multithreading.h
Normal file
76
Common/multithreading.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef MULTITHREADING_H
|
||||
#define MULTITHREADING_H
|
||||
|
||||
|
||||
//Simple portable thread library.
|
||||
|
||||
//Windows threads.
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
#include <windows.h>
|
||||
|
||||
typedef HANDLE CUTThread;
|
||||
typedef unsigned(WINAPI *CUT_THREADROUTINE)(void *);
|
||||
|
||||
#define CUT_THREADPROC unsigned WINAPI
|
||||
#define CUT_THREADEND return 0
|
||||
|
||||
#else
|
||||
//POSIX threads.
|
||||
#include <pthread.h>
|
||||
|
||||
typedef pthread_t CUTThread;
|
||||
typedef void *(*CUT_THREADROUTINE)(void *);
|
||||
|
||||
#define CUT_THREADPROC void
|
||||
#define CUT_THREADEND
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//Create thread.
|
||||
CUTThread cutStartThread(CUT_THREADROUTINE, void *data);
|
||||
|
||||
//Wait for thread to finish.
|
||||
void cutEndThread(CUTThread thread);
|
||||
|
||||
//Destroy thread.
|
||||
void cutDestroyThread(CUTThread thread);
|
||||
|
||||
//Wait for multiple threads.
|
||||
void cutWaitForThreads(const CUTThread *threads, int num);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
#endif //MULTITHREADING_H
|
111
Common/nvMath.h
Normal file
111
Common/nvMath.h
Normal file
|
@ -0,0 +1,111 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// Template math library for common 3D functionality
|
||||
//
|
||||
// This code is in part deriver from glh, a cross platform glut helper library.
|
||||
// The copyright for glh follows this notice.
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
Copyright (c) 2000 Cass Everitt
|
||||
Copyright (c) 2000 NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or
|
||||
without modification, are permitted provided that the following
|
||||
conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
* The names of contributors to this software may not be used
|
||||
to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Cass Everitt - cass@r3.nu
|
||||
*/
|
||||
|
||||
#ifndef NV_MATH_H
|
||||
#define NV_MATH_H
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <nvVector.h>
|
||||
#include <nvMatrix.h>
|
||||
#include <nvQuaternion.h>
|
||||
|
||||
#define NV_PI float(3.1415926535897932384626433832795)
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
typedef vec2<float> vec2f;
|
||||
typedef vec3<float> vec3f;
|
||||
typedef vec3<int> vec3i;
|
||||
typedef vec3<unsigned int> vec3ui;
|
||||
typedef vec4<float> vec4f;
|
||||
typedef matrix4<float> matrix4f;
|
||||
typedef quaternion<float> quaternionf;
|
||||
|
||||
|
||||
inline void applyRotation(const quaternionf &r)
|
||||
{
|
||||
float angle;
|
||||
vec3f axis;
|
||||
r.get_value(axis, angle);
|
||||
glRotatef(angle/3.1415926f * 180.0f, axis[0], axis[1], axis[2]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
540
Common/nvMatrix.h
Normal file
540
Common/nvMatrix.h
Normal file
|
@ -0,0 +1,540 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// Template math library for common 3D functionality
|
||||
//
|
||||
// nvMatrix.h - template matrix code
|
||||
//
|
||||
// This code is in part deriver from glh, a cross platform glut helper library.
|
||||
// The copyright for glh follows this notice.
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
Copyright (c) 2000 Cass Everitt
|
||||
Copyright (c) 2000 NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or
|
||||
without modification, are permitted provided that the following
|
||||
conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
* The names of contributors to this software may not be used
|
||||
to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Cass Everitt - cass@r3.nu
|
||||
*/
|
||||
|
||||
#ifndef NV_MATRIX_H
|
||||
#define NV_MATRIX_H
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
template <class T> class vec2;
|
||||
template <class T> class vec3;
|
||||
template <class T> class vec4;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Matrix
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T>
|
||||
class matrix4
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
matrix4()
|
||||
{
|
||||
make_identity();
|
||||
}
|
||||
|
||||
matrix4(T t)
|
||||
{
|
||||
set_value(t);
|
||||
}
|
||||
|
||||
matrix4(const T *m)
|
||||
{
|
||||
set_value(m);
|
||||
}
|
||||
|
||||
matrix4(T a00, T a01, T a02, T a03,
|
||||
T a10, T a11, T a12, T a13,
|
||||
T a20, T a21, T a22, T a23,
|
||||
T a30, T a31, T a32, T a33) :
|
||||
_11(a00), _12(a01), _13(a02), _14(a03),
|
||||
_21(a10), _22(a11), _23(a12), _24(a13),
|
||||
_31(a20), _32(a21), _33(a22), _34(a23),
|
||||
_41(a30), _42(a31), _43(a32), _44(a33)
|
||||
{}
|
||||
|
||||
|
||||
void get_value(T *mp) const
|
||||
{
|
||||
int c = 0;
|
||||
|
||||
for (int j=0; j < 4; j++)
|
||||
for (int i=0; i < 4; i++)
|
||||
{
|
||||
mp[c++] = element(i,j);
|
||||
}
|
||||
}
|
||||
|
||||
const T *get_value() const
|
||||
{
|
||||
return _array;
|
||||
}
|
||||
|
||||
void set_value(T *mp)
|
||||
{
|
||||
int c = 0;
|
||||
|
||||
for (int j=0; j < 4; j++)
|
||||
for (int i=0; i < 4; i++)
|
||||
{
|
||||
element(i,j) = mp[c++];
|
||||
}
|
||||
}
|
||||
|
||||
void set_value(T r)
|
||||
{
|
||||
for (int i=0; i < 4; i++)
|
||||
for (int j=0; j < 4; j++)
|
||||
{
|
||||
element(i,j) = r;
|
||||
}
|
||||
}
|
||||
|
||||
void make_identity()
|
||||
{
|
||||
element(0,0) = 1.0;
|
||||
element(0,1) = 0.0;
|
||||
element(0,2) = 0.0;
|
||||
element(0,3) = 0.0;
|
||||
|
||||
element(1,0) = 0.0;
|
||||
element(1,1) = 1.0;
|
||||
element(1,2) = 0.0;
|
||||
element(1,3) = 0.0;
|
||||
|
||||
element(2,0) = 0.0;
|
||||
element(2,1) = 0.0;
|
||||
element(2,2) = 1.0;
|
||||
element(2,3) = 0.0;
|
||||
|
||||
element(3,0) = 0.0;
|
||||
element(3,1) = 0.0;
|
||||
element(3,2) = 0.0;
|
||||
element(3,3) = 1.0;
|
||||
}
|
||||
|
||||
// set a uniform scale
|
||||
void set_scale(T s)
|
||||
{
|
||||
element(0,0) = s;
|
||||
element(1,1) = s;
|
||||
element(2,2) = s;
|
||||
}
|
||||
|
||||
void set_scale(const vec3<T> &s)
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
element(i,i) = s[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void set_translate(const vec3<T> &t)
|
||||
{
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
element(i,3) = t[i];
|
||||
}
|
||||
}
|
||||
|
||||
void set_row(int r, const vec4<T> &t)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
element(r,i) = t[i];
|
||||
}
|
||||
}
|
||||
|
||||
void set_column(int c, const vec4<T> &t)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
element(i,c) = t[i];
|
||||
}
|
||||
}
|
||||
|
||||
vec4<T> get_row(int r) const
|
||||
{
|
||||
vec4<T> v;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
v[i] = element(r,i);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
vec4<T> get_column(int c) const
|
||||
{
|
||||
vec4<T> v;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
v[i] = element(i,c);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
friend matrix4 inverse(const matrix4 &m)
|
||||
{
|
||||
matrix4 minv;
|
||||
|
||||
T r1[8], r2[8], r3[8], r4[8];
|
||||
T *s[4], *tmprow;
|
||||
|
||||
s[0] = &r1[0];
|
||||
s[1] = &r2[0];
|
||||
s[2] = &r3[0];
|
||||
s[3] = &r4[0];
|
||||
|
||||
register int i,j,p,jj;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
for (j=0; j<4; j++)
|
||||
{
|
||||
s[i][j] = m.element(i,j);
|
||||
|
||||
if (i==j)
|
||||
{
|
||||
s[i][j+4] = 1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
s[i][j+4] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
T scp[4];
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
scp[i] = T(fabs(s[i][0]));
|
||||
|
||||
for (j=1; j<4; j++)
|
||||
if (T(fabs(s[i][j])) > scp[i])
|
||||
{
|
||||
scp[i] = T(fabs(s[i][j]));
|
||||
}
|
||||
|
||||
if (scp[i] == 0.0)
|
||||
{
|
||||
return minv; // singular matrix!
|
||||
}
|
||||
}
|
||||
|
||||
int pivot_to;
|
||||
T scp_max;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
// select pivot row
|
||||
pivot_to = i;
|
||||
scp_max = T(fabs(s[i][i]/scp[i]));
|
||||
|
||||
// find out which row should be on top
|
||||
for (p=i+1; p<4; p++)
|
||||
if (T(fabs(s[p][i]/scp[p])) > scp_max)
|
||||
{
|
||||
scp_max = T(fabs(s[p][i]/scp[p]));
|
||||
pivot_to = p;
|
||||
}
|
||||
|
||||
// Pivot if necessary
|
||||
if (pivot_to != i)
|
||||
{
|
||||
tmprow = s[i];
|
||||
s[i] = s[pivot_to];
|
||||
s[pivot_to] = tmprow;
|
||||
T tmpscp;
|
||||
tmpscp = scp[i];
|
||||
scp[i] = scp[pivot_to];
|
||||
scp[pivot_to] = tmpscp;
|
||||
}
|
||||
|
||||
T mji;
|
||||
|
||||
// perform gaussian elimination
|
||||
for (j=i+1; j<4; j++)
|
||||
{
|
||||
mji = s[j][i]/s[i][i];
|
||||
s[j][i] = 0.0;
|
||||
|
||||
for (jj=i+1; jj<8; jj++)
|
||||
{
|
||||
s[j][jj] -= mji*s[i][jj];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s[3][3] == 0.0)
|
||||
{
|
||||
return minv; // singular matrix!
|
||||
}
|
||||
|
||||
//
|
||||
// Now we have an upper triangular matrix.
|
||||
//
|
||||
// x x x x | y y y y
|
||||
// 0 x x x | y y y y
|
||||
// 0 0 x x | y y y y
|
||||
// 0 0 0 x | y y y y
|
||||
//
|
||||
// we'll back substitute to get the inverse
|
||||
//
|
||||
// 1 0 0 0 | z z z z
|
||||
// 0 1 0 0 | z z z z
|
||||
// 0 0 1 0 | z z z z
|
||||
// 0 0 0 1 | z z z z
|
||||
//
|
||||
|
||||
T mij;
|
||||
|
||||
for (i=3; i>0; i--)
|
||||
{
|
||||
for (j=i-1; j > -1; j--)
|
||||
{
|
||||
mij = s[j][i]/s[i][i];
|
||||
|
||||
for (jj=j+1; jj<8; jj++)
|
||||
{
|
||||
s[j][jj] -= mij*s[i][jj];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
for (j=0; j<4; j++)
|
||||
{
|
||||
minv(i,j) = s[i][j+4] / s[i][i];
|
||||
}
|
||||
|
||||
return minv;
|
||||
}
|
||||
|
||||
|
||||
friend matrix4 transpose(const matrix4 &m)
|
||||
{
|
||||
matrix4 mtrans;
|
||||
|
||||
for (int i=0; i<4; i++)
|
||||
for (int j=0; j<4; j++)
|
||||
{
|
||||
mtrans(i,j) = m.element(j,i);
|
||||
}
|
||||
|
||||
return mtrans;
|
||||
}
|
||||
|
||||
matrix4 &operator *= (const matrix4 &rhs)
|
||||
{
|
||||
matrix4 mt(*this);
|
||||
set_value(T(0));
|
||||
|
||||
for (int i=0; i < 4; i++)
|
||||
for (int j=0; j < 4; j++)
|
||||
for (int c=0; c < 4; c++)
|
||||
{
|
||||
element(i,j) += mt(i,c) * rhs(c,j);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend matrix4 operator * (const matrix4 &lhs, const matrix4 &rhs)
|
||||
{
|
||||
matrix4 r(T(0));
|
||||
|
||||
for (int i=0; i < 4; i++)
|
||||
for (int j=0; j < 4; j++)
|
||||
for (int c=0; c < 4; c++)
|
||||
{
|
||||
r.element(i,j) += lhs(i,c) * rhs(c,j);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// dst = M * src
|
||||
vec4<T> operator *(const vec4<T> &src) const
|
||||
{
|
||||
vec4<T> r;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
r[i] = (src[0] * element(i,0) + src[1] * element(i,1) +
|
||||
src[2] * element(i,2) + src[3] * element(i,3));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// dst = src * M
|
||||
friend vec4<T> operator *(const vec4<T> &lhs, const matrix4 &rhs)
|
||||
{
|
||||
vec4<T> r;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
r[i] = (lhs[0] * rhs.element(0,i) + lhs[1] * rhs.element(1,i) +
|
||||
lhs[2] * rhs.element(2,i) + lhs[3] * rhs.element(3,i));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
T &operator()(int row, int col)
|
||||
{
|
||||
return element(row,col);
|
||||
}
|
||||
|
||||
const T &operator()(int row, int col) const
|
||||
{
|
||||
return element(row,col);
|
||||
}
|
||||
|
||||
T &element(int row, int col)
|
||||
{
|
||||
return _array[row | (col<<2)];
|
||||
}
|
||||
|
||||
const T &element(int row, int col) const
|
||||
{
|
||||
return _array[row | (col<<2)];
|
||||
}
|
||||
|
||||
matrix4 &operator *= (const T &r)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
element(0,i) *= r;
|
||||
element(1,i) *= r;
|
||||
element(2,i) *= r;
|
||||
element(3,i) *= r;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
matrix4 &operator += (const matrix4 &mat)
|
||||
{
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
element(0,i) += mat.element(0,i);
|
||||
element(1,i) += mat.element(1,i);
|
||||
element(2,i) += mat.element(2,i);
|
||||
element(3,i) += mat.element(3,i);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
friend bool operator == (const matrix4 &lhs, const matrix4 &rhs)
|
||||
{
|
||||
bool r = true;
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
r &= lhs._array[i] == rhs._array[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
friend bool operator != (const matrix4 &lhs, const matrix4 &rhs)
|
||||
{
|
||||
bool r = true;
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
r &= lhs._array[i] != rhs._array[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
T _11, _12, _13, _14; // standard names for components
|
||||
T _21, _22, _23, _24; // standard names for components
|
||||
T _31, _32, _33, _34; // standard names for components
|
||||
T _41, _42, _43, _44; // standard names for components
|
||||
};
|
||||
T _array[16]; // array access
|
||||
};
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
530
Common/nvQuaternion.h
Normal file
530
Common/nvQuaternion.h
Normal file
|
@ -0,0 +1,530 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// Template math library for common 3D functionality
|
||||
//
|
||||
// nvQuaterion.h - quaternion template and utility functions
|
||||
//
|
||||
// This code is in part deriver from glh, a cross platform glut helper library.
|
||||
// The copyright for glh follows this notice.
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
Copyright (c) 2000 Cass Everitt
|
||||
Copyright (c) 2000 NVIDIA Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or
|
||||
without modification, are permitted provided that the following
|
||||
conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
* The names of contributors to this software may not be used
|
||||
to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Cass Everitt - cass@r3.nu
|
||||
*/
|
||||
#ifndef NV_QUATERNION_H
|
||||
#define NV_QUATERNION_H
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
template <class T> class vec2;
|
||||
template <class T> class vec3;
|
||||
template <class T> class vec4;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Quaternion
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template< class T>
|
||||
class quaternion
|
||||
{
|
||||
public:
|
||||
|
||||
quaternion() : x(0.0), y(0.0), z(0.0), w(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
quaternion(const T v[4])
|
||||
{
|
||||
set_value(v);
|
||||
}
|
||||
|
||||
|
||||
quaternion(T q0, T q1, T q2, T q3)
|
||||
{
|
||||
set_value(q0, q1, q2, q3);
|
||||
}
|
||||
|
||||
|
||||
quaternion(const matrix4<T> &m)
|
||||
{
|
||||
set_value(m);
|
||||
}
|
||||
|
||||
|
||||
quaternion(const vec3<T> &axis, T radians)
|
||||
{
|
||||
set_value(axis, radians);
|
||||
}
|
||||
|
||||
|
||||
quaternion(const vec3<T> &rotateFrom, const vec3<T> &rotateTo)
|
||||
{
|
||||
set_value(rotateFrom, rotateTo);
|
||||
}
|
||||
|
||||
quaternion(const vec3<T> &from_look, const vec3<T> &from_up,
|
||||
const vec3<T> &to_look, const vec3<T> &to_up)
|
||||
{
|
||||
set_value(from_look, from_up, to_look, to_up);
|
||||
}
|
||||
|
||||
const T *get_value() const
|
||||
{
|
||||
return &_array[0];
|
||||
}
|
||||
|
||||
void get_value(T &q0, T &q1, T &q2, T &q3) const
|
||||
{
|
||||
q0 = _array[0];
|
||||
q1 = _array[1];
|
||||
q2 = _array[2];
|
||||
q3 = _array[3];
|
||||
}
|
||||
|
||||
quaternion &set_value(T q0, T q1, T q2, T q3)
|
||||
{
|
||||
_array[0] = q0;
|
||||
_array[1] = q1;
|
||||
_array[2] = q2;
|
||||
_array[3] = q3;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void get_value(vec3<T> &axis, T &radians) const
|
||||
{
|
||||
radians = T(acos(_array[3]) * T(2.0));
|
||||
|
||||
if (radians == T(0.0))
|
||||
{
|
||||
axis = vec3<T>(0.0, 0.0, 1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
axis[0] = _array[0];
|
||||
axis[1] = _array[1];
|
||||
axis[2] = _array[2];
|
||||
axis = normalize(axis);
|
||||
}
|
||||
}
|
||||
|
||||
void get_value(matrix4<T> &m) const
|
||||
{
|
||||
T s, xs, ys, zs, wx, wy, wz, xx, xy, xz, yy, yz, zz;
|
||||
|
||||
T norm = _array[0] * _array[0] + _array[1] * _array[1] + _array[2] * _array[2] + _array[3] * _array[3];
|
||||
|
||||
s = (norm == T(0.0)) ? T(0.0) : (T(2.0) / norm);
|
||||
|
||||
xs = _array[0] * s;
|
||||
ys = _array[1] * s;
|
||||
zs = _array[2] * s;
|
||||
|
||||
wx = _array[3] * xs;
|
||||
wy = _array[3] * ys;
|
||||
wz = _array[3] * zs;
|
||||
|
||||
xx = _array[0] * xs;
|
||||
xy = _array[0] * ys;
|
||||
xz = _array[0] * zs;
|
||||
|
||||
yy = _array[1] * ys;
|
||||
yz = _array[1] * zs;
|
||||
zz = _array[2] * zs;
|
||||
|
||||
m(0,0) = T(T(1.0) - (yy + zz));
|
||||
m(1,0) = T(xy + wz);
|
||||
m(2,0) = T(xz - wy);
|
||||
|
||||
m(0,1) = T(xy - wz);
|
||||
m(1,1) = T(T(1.0) - (xx + zz));
|
||||
m(2,1) = T(yz + wx);
|
||||
|
||||
m(0,2) = T(xz + wy);
|
||||
m(1,2) = T(yz - wx);
|
||||
m(2,2) = T(T(1.0) - (xx + yy));
|
||||
|
||||
m(3,0) = m(3,1) = m(3,2) = m(0,3) = m(1,3) = m(2,3) = T(0.0);
|
||||
m(3,3) = T(1.0);
|
||||
}
|
||||
|
||||
quaternion &set_value(const T *qp)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
_array[i] = qp[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
quaternion &set_value(const matrix4<T> &m)
|
||||
{
|
||||
T tr, s;
|
||||
int i, j, k;
|
||||
const int nxt[3] = { 1, 2, 0 };
|
||||
|
||||
tr = m(0,0) + m(1,1) + m(2,2);
|
||||
|
||||
if (tr > T(0))
|
||||
{
|
||||
s = T(sqrt(tr + m(3,3)));
|
||||
_array[3] = T(s * 0.5);
|
||||
s = T(0.5) / s;
|
||||
|
||||
_array[0] = T((m(1,2) - m(2,1)) * s);
|
||||
_array[1] = T((m(2,0) - m(0,2)) * s);
|
||||
_array[2] = T((m(0,1) - m(1,0)) * s);
|
||||
}
|
||||
else
|
||||
{
|
||||
i = 0;
|
||||
|
||||
if (m(1,1) > m(0,0))
|
||||
{
|
||||
i = 1;
|
||||
}
|
||||
|
||||
if (m(2,2) > m(i,i))
|
||||
{
|
||||
i = 2;
|
||||
}
|
||||
|
||||
j = nxt[i];
|
||||
k = nxt[j];
|
||||
|
||||
s = T(sqrt((m(i,j) - (m(j,j) + m(k,k))) + T(1.0)));
|
||||
|
||||
_array[i] = T(s * 0.5);
|
||||
s = T(0.5 / s);
|
||||
|
||||
_array[3] = T((m(j,k) - m(k,j)) * s);
|
||||
_array[j] = T((m(i,j) + m(j,i)) * s);
|
||||
_array[k] = T((m(i,k) + m(k,i)) * s);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
quaternion &set_value(const vec3<T> &axis, T theta)
|
||||
{
|
||||
T sqnorm = square_norm(axis);
|
||||
|
||||
if (sqnorm == T(0.0))
|
||||
{
|
||||
// axis too small.
|
||||
x = y = z = T(0.0);
|
||||
w = T(1.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
theta *= T(0.5);
|
||||
T sin_theta = T(sin(theta));
|
||||
|
||||
if (sqnorm != T(1))
|
||||
{
|
||||
sin_theta /= T(sqrt(sqnorm));
|
||||
}
|
||||
|
||||
x = sin_theta * axis[0];
|
||||
y = sin_theta * axis[1];
|
||||
z = sin_theta * axis[2];
|
||||
w = T(cos(theta));
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
quaternion &set_value(const vec3<T> &rotateFrom, const vec3<T> &rotateTo)
|
||||
{
|
||||
vec3<T> p1, p2;
|
||||
T alpha;
|
||||
|
||||
p1 = normalize(rotateFrom);
|
||||
p2 = normalize(rotateTo);
|
||||
|
||||
alpha = dot(p1, p2);
|
||||
|
||||
if (alpha == T(1.0))
|
||||
{
|
||||
*this = quaternion();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ensures that the anti-parallel case leads to a positive dot
|
||||
if (alpha == T(-1.0))
|
||||
{
|
||||
vec3<T> v;
|
||||
|
||||
if (p1[0] != p1[1] || p1[0] != p1[2])
|
||||
{
|
||||
v = vec3<T>(p1[1], p1[2], p1[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
v = vec3<T>(-p1[0], p1[1], p1[2]);
|
||||
}
|
||||
|
||||
v -= p1 * dot(p1, v);
|
||||
v = normalize(v);
|
||||
|
||||
set_value(v, T(3.1415926));
|
||||
return *this;
|
||||
}
|
||||
|
||||
p1 = normalize(cross(p1, p2));
|
||||
|
||||
set_value(p1,T(acos(alpha)));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
quaternion &set_value(const vec3<T> &from_look, const vec3<T> &from_up,
|
||||
const vec3<T> &to_look, const vec3<T> &to_up)
|
||||
{
|
||||
quaternion r_look = quaternion(from_look, to_look);
|
||||
|
||||
vec3<T> rotated_from_up(from_up);
|
||||
r_look.mult_vec(rotated_from_up);
|
||||
|
||||
quaternion r_twist = quaternion(rotated_from_up, to_up);
|
||||
|
||||
*this = r_twist;
|
||||
*this *= r_look;
|
||||
return *this;
|
||||
}
|
||||
|
||||
quaternion &operator *= (const quaternion<T> &qr)
|
||||
{
|
||||
quaternion ql(*this);
|
||||
|
||||
w = ql.w * qr.w - ql.x * qr.x - ql.y * qr.y - ql.z * qr.z;
|
||||
x = ql.w * qr.x + ql.x * qr.w + ql.y * qr.z - ql.z * qr.y;
|
||||
y = ql.w * qr.y + ql.y * qr.w + ql.z * qr.x - ql.x * qr.z;
|
||||
z = ql.w * qr.z + ql.z * qr.w + ql.x * qr.y - ql.y * qr.x;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend quaternion normalize(const quaternion<T> &q)
|
||||
{
|
||||
quaternion r(q);
|
||||
T rnorm = T(1.0) / T(sqrt(q.w * q.w + q.x * q.x + q.y * q.y + q.z * q.z));
|
||||
|
||||
r.x *= rnorm;
|
||||
r.y *= rnorm;
|
||||
r.z *= rnorm;
|
||||
r.w *= rnorm;
|
||||
}
|
||||
|
||||
friend quaternion<T> conjugate(const quaternion<T> &q)
|
||||
{
|
||||
quaternion<T> r(q);
|
||||
r._array[0] *= T(-1.0);
|
||||
r._array[1] *= T(-1.0);
|
||||
r._array[2] *= T(-1.0);
|
||||
return r;
|
||||
}
|
||||
|
||||
friend quaternion<T> inverse(const quaternion<T> &q)
|
||||
{
|
||||
return conjugate(q);
|
||||
}
|
||||
|
||||
//
|
||||
// Quaternion multiplication with cartesian vector
|
||||
// v' = q*v*q(star)
|
||||
//
|
||||
void mult_vec(const vec3<T> &src, vec3<T> &dst) const
|
||||
{
|
||||
T v_coef = w * w - x * x - y * y - z * z;
|
||||
T u_coef = T(2.0) * (src[0] * x + src[1] * y + src[2] * z);
|
||||
T c_coef = T(2.0) * w;
|
||||
|
||||
dst.v[0] = v_coef * src.v[0] + u_coef * x + c_coef * (y * src.v[2] - z * src.v[1]);
|
||||
dst.v[1] = v_coef * src.v[1] + u_coef * y + c_coef * (z * src.v[0] - x * src.v[2]);
|
||||
dst.v[2] = v_coef * src.v[2] + u_coef * z + c_coef * (x * src.v[1] - y * src.v[0]);
|
||||
}
|
||||
|
||||
void mult_vec(vec3<T> &src_and_dst) const
|
||||
{
|
||||
mult_vec(vec3<T>(src_and_dst), src_and_dst);
|
||||
}
|
||||
|
||||
void scale_angle(T scaleFactor)
|
||||
{
|
||||
vec3<T> axis;
|
||||
T radians;
|
||||
|
||||
get_value(axis, radians);
|
||||
radians *= scaleFactor;
|
||||
set_value(axis, radians);
|
||||
}
|
||||
|
||||
friend quaternion<T> slerp(const quaternion<T> &p, const quaternion<T> &q, T alpha)
|
||||
{
|
||||
quaternion r;
|
||||
|
||||
T cos_omega = p.x * q.x + p.y * q.y + p.z * q.z + p.w * q.w;
|
||||
// if B is on opposite hemisphere from A, use -B instead
|
||||
|
||||
int bflip;
|
||||
|
||||
if ((bflip = (cos_omega < T(0))))
|
||||
{
|
||||
cos_omega = -cos_omega;
|
||||
}
|
||||
|
||||
// complementary interpolation parameter
|
||||
T beta = T(1) - alpha;
|
||||
|
||||
if (cos_omega >= T(1))
|
||||
{
|
||||
return p;
|
||||
}
|
||||
|
||||
T omega = T(acos(cos_omega));
|
||||
T one_over_sin_omega = T(1.0) / T(sin(omega));
|
||||
|
||||
beta = T(sin(omega*beta) * one_over_sin_omega);
|
||||
alpha = T(sin(omega*alpha) * one_over_sin_omega);
|
||||
|
||||
if (bflip)
|
||||
{
|
||||
alpha = -alpha;
|
||||
}
|
||||
|
||||
r.x = beta * p._array[0]+ alpha * q._array[0];
|
||||
r.y = beta * p._array[1]+ alpha * q._array[1];
|
||||
r.z = beta * p._array[2]+ alpha * q._array[2];
|
||||
r.w = beta * p._array[3]+ alpha * q._array[3];
|
||||
return r;
|
||||
}
|
||||
|
||||
T &operator [](int i)
|
||||
{
|
||||
return _array[i];
|
||||
}
|
||||
|
||||
const T &operator [](int i) const
|
||||
{
|
||||
return _array[i];
|
||||
}
|
||||
|
||||
|
||||
friend bool operator == (const quaternion<T> &lhs, const quaternion<T> &rhs)
|
||||
{
|
||||
bool r = true;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
r &= lhs._array[i] == rhs._array[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
friend bool operator != (const quaternion<T> &lhs, const quaternion<T> &rhs)
|
||||
{
|
||||
bool r = true;
|
||||
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
r &= lhs._array[i] == rhs._array[i];
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
friend quaternion<T> operator * (const quaternion<T> &lhs, const quaternion<T> &rhs)
|
||||
{
|
||||
quaternion r(lhs);
|
||||
r *= rhs;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
T x;
|
||||
T y;
|
||||
T z;
|
||||
T w;
|
||||
};
|
||||
T _array[4];
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif
|
260
Common/nvShaderUtils.h
Normal file
260
Common/nvShaderUtils.h
Normal file
|
@ -0,0 +1,260 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Utility functions for compiling shaders and programs
|
||||
*
|
||||
* Author: Evan Hart
|
||||
* Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef NV_SHADER_UTILS_H
|
||||
#define NV_SHADER_UTILS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace nv {
|
||||
|
||||
//
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
inline GLuint CompileGLSLShader(GLenum target, const char *shader) {
|
||||
GLuint object;
|
||||
|
||||
object = glCreateShader(target);
|
||||
|
||||
if (!object) {
|
||||
return object;
|
||||
}
|
||||
|
||||
glShaderSource(object, 1, &shader, NULL);
|
||||
|
||||
glCompileShader(object);
|
||||
|
||||
// check if shader compiled
|
||||
GLint compiled = 0;
|
||||
glGetShaderiv(object, GL_COMPILE_STATUS, &compiled);
|
||||
|
||||
if (!compiled) {
|
||||
#ifdef NV_REPORT_COMPILE_ERRORS
|
||||
char temp[256] = "";
|
||||
glGetShaderInfoLog(object, 256, NULL, temp);
|
||||
fprintf(stderr, "Compile failed:\n%s\n", temp);
|
||||
#endif
|
||||
glDeleteShader(object);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
inline GLuint CompileGLSLShaderFromFile(GLenum target, const char *filename) {
|
||||
FILE *shaderFile;
|
||||
char *text;
|
||||
long size;
|
||||
size_t fsize = 0;
|
||||
|
||||
// read files as binary to prevent problems from newline translation
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
|
||||
if (fopen_s(&shaderFile, filename, "rb") != 0)
|
||||
#else
|
||||
if ((shaderFile = fopen(filename, "rb")) == 0)
|
||||
#endif
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get the length of the file
|
||||
fseek(shaderFile, 0, SEEK_END);
|
||||
size = ftell(shaderFile);
|
||||
|
||||
// Read the file contents from the start, then close file and add a null
|
||||
// terminator
|
||||
fseek(shaderFile, 0, SEEK_SET);
|
||||
text = new char[size + 1];
|
||||
fsize = fread(text, size, 1, shaderFile);
|
||||
fclose(shaderFile);
|
||||
|
||||
if (fsize == 0) {
|
||||
printf("CompileGLSLShaderFromFile(), error... fsize = 0\n");
|
||||
}
|
||||
|
||||
text[size] = '\0';
|
||||
|
||||
GLuint object = CompileGLSLShader(target, text);
|
||||
|
||||
delete[] text;
|
||||
|
||||
return object;
|
||||
}
|
||||
|
||||
// Create a program composed of vertex and fragment shaders.
|
||||
inline GLuint LinkGLSLProgram(GLuint vertexShader, GLuint fragmentShader) {
|
||||
GLuint program = glCreateProgram();
|
||||
glAttachShader(program, vertexShader);
|
||||
glAttachShader(program, fragmentShader);
|
||||
glLinkProgram(program);
|
||||
|
||||
#ifdef NV_REPORT_COMPILE_ERRORS
|
||||
// Get error log.
|
||||
GLint charsWritten, infoLogLength;
|
||||
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength);
|
||||
|
||||
char *infoLog = new char[infoLogLength];
|
||||
glGetProgramInfoLog(program, infoLogLength, &charsWritten, infoLog);
|
||||
printf(infoLog);
|
||||
delete[] infoLog;
|
||||
#endif
|
||||
|
||||
// Test linker result.
|
||||
GLint linkSucceed = GL_FALSE;
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linkSucceed);
|
||||
|
||||
if (linkSucceed == GL_FALSE) {
|
||||
glDeleteProgram(program);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
// Create a program composed of vertex, geometry and fragment shaders.
|
||||
inline GLuint LinkGLSLProgram(GLuint vertexShader, GLuint geometryShader,
|
||||
GLint inputType, GLint vertexOut,
|
||||
GLint outputType, GLuint fragmentShader) {
|
||||
GLuint program = glCreateProgram();
|
||||
glAttachShader(program, vertexShader);
|
||||
glAttachShader(program, geometryShader);
|
||||
glProgramParameteriEXT(program, GL_GEOMETRY_INPUT_TYPE_EXT, inputType);
|
||||
glProgramParameteriEXT(program, GL_GEOMETRY_VERTICES_OUT_EXT, vertexOut);
|
||||
glProgramParameteriEXT(program, GL_GEOMETRY_OUTPUT_TYPE_EXT, outputType);
|
||||
glAttachShader(program, fragmentShader);
|
||||
glLinkProgram(program);
|
||||
|
||||
#ifdef NV_REPORT_COMPILE_ERRORS
|
||||
// Get error log.
|
||||
GLint charsWritten, infoLogLength;
|
||||
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &infoLogLength);
|
||||
|
||||
char *infoLog = new char[infoLogLength];
|
||||
glGetProgramInfoLog(program, infoLogLength, &charsWritten, infoLog);
|
||||
printf(infoLog);
|
||||
delete[] infoLog;
|
||||
#endif
|
||||
|
||||
// Test linker result.
|
||||
GLint linkSucceed = GL_FALSE;
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linkSucceed);
|
||||
|
||||
if (linkSucceed == GL_FALSE) {
|
||||
glDeleteProgram(program);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
inline GLuint CompileASMShader(GLenum program_type, const char *code) {
|
||||
GLuint program_id;
|
||||
glGenProgramsARB(1, &program_id);
|
||||
glBindProgramARB(program_type, program_id);
|
||||
glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB,
|
||||
(GLsizei)strlen(code), (GLubyte *)code);
|
||||
|
||||
GLint error_pos;
|
||||
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);
|
||||
|
||||
if (error_pos != -1) {
|
||||
#ifdef NV_REPORT_COMPILE_ERRORS
|
||||
const GLubyte *error_string;
|
||||
error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
|
||||
fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos,
|
||||
error_string);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
return program_id;
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
////////////////////////////////////////////////////////////
|
||||
inline GLuint CompileASMShaderFromFile(GLenum target, const char *filename) {
|
||||
FILE *shaderFile;
|
||||
char *text;
|
||||
long size;
|
||||
size_t fsize = 0;
|
||||
|
||||
// read files as binary to prevent problems from newline translation
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
|
||||
if (fopen_s(&shaderFile, filename, "rb") != 0)
|
||||
#else
|
||||
if ((shaderFile = fopen(filename, "rb")) == 0)
|
||||
#endif
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get the length of the file
|
||||
fseek(shaderFile, 0, SEEK_END);
|
||||
size = ftell(shaderFile);
|
||||
|
||||
// Read the file contents from the start, then close file and add a null
|
||||
// terminator
|
||||
fseek(shaderFile, 0, SEEK_SET);
|
||||
text = new char[size + 1];
|
||||
fsize = fread(text, size, 1, shaderFile);
|
||||
fclose(shaderFile);
|
||||
|
||||
if (fsize == 0) {
|
||||
printf("CompileGLSLShaderFromFile(), error... fsize = 0\n");
|
||||
}
|
||||
|
||||
text[size] = '\0';
|
||||
|
||||
GLuint program_id = CompileASMShader(target, text);
|
||||
|
||||
delete[] text;
|
||||
|
||||
return program_id;
|
||||
}
|
||||
|
||||
} // namespace nv
|
||||
#endif
|
1074
Common/nvVector.h
Normal file
1074
Common/nvVector.h
Normal file
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
236
Common/param.h
Normal file
236
Common/param.h
Normal file
|
@ -0,0 +1,236 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Simple parameter system
|
||||
sgreen@nvidia.com 4/2001
|
||||
*/
|
||||
|
||||
#ifndef PARAM_H
|
||||
#define PARAM_H
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// base class for named parameter
|
||||
class ParamBase {
|
||||
public:
|
||||
ParamBase(const char *name) : m_name(name) {}
|
||||
virtual ~ParamBase() {}
|
||||
|
||||
std::string &GetName() { return m_name; }
|
||||
|
||||
virtual float GetFloatValue() = 0;
|
||||
virtual int GetIntValue() = 0;
|
||||
virtual std::string GetValueString() = 0;
|
||||
|
||||
virtual void Reset() = 0;
|
||||
virtual void Increment() = 0;
|
||||
virtual void Decrement() = 0;
|
||||
|
||||
virtual float GetPercentage() = 0;
|
||||
virtual void SetPercentage(float p) = 0;
|
||||
|
||||
virtual void Write(std::ostream &stream) = 0;
|
||||
virtual void Read(std::istream &stream) = 0;
|
||||
|
||||
virtual bool IsList() = 0;
|
||||
|
||||
protected:
|
||||
std::string m_name;
|
||||
};
|
||||
|
||||
// derived class for single-valued parameter
|
||||
template <class T>
|
||||
class Param : public ParamBase {
|
||||
public:
|
||||
Param(const char *name, T value = 0, T min = 0, T max = 10000, T step = 1,
|
||||
T *ptr = 0)
|
||||
: ParamBase(name),
|
||||
m_default(value),
|
||||
m_min(min),
|
||||
m_max(max),
|
||||
m_step(step),
|
||||
m_precision(3) {
|
||||
if (ptr) {
|
||||
m_ptr = ptr;
|
||||
} else {
|
||||
m_ptr = &m_value;
|
||||
}
|
||||
|
||||
*m_ptr = value;
|
||||
}
|
||||
~Param() {}
|
||||
|
||||
T GetValue() const { return *m_ptr; }
|
||||
T SetValue(const T value) { *m_ptr = value; }
|
||||
|
||||
float GetFloatValue() { return (float)*m_ptr; }
|
||||
int GetIntValue() { return (int)*m_ptr; }
|
||||
|
||||
std::string GetValueString() {
|
||||
std::ostringstream ost;
|
||||
ost << std::setprecision(m_precision) << std::fixed;
|
||||
ost << *m_ptr;
|
||||
return ost.str();
|
||||
}
|
||||
|
||||
void SetPrecision(int x) { m_precision = x; }
|
||||
|
||||
float GetPercentage() { return (*m_ptr - m_min) / (float)(m_max - m_min); }
|
||||
|
||||
void SetPercentage(float p) { *m_ptr = (T)(m_min + p * (m_max - m_min)); }
|
||||
|
||||
void Reset() { *m_ptr = m_default; }
|
||||
|
||||
void Increment() {
|
||||
*m_ptr += m_step;
|
||||
|
||||
if (*m_ptr > m_max) {
|
||||
*m_ptr = m_max;
|
||||
}
|
||||
}
|
||||
|
||||
void Decrement() {
|
||||
*m_ptr -= m_step;
|
||||
|
||||
if (*m_ptr < m_min) {
|
||||
*m_ptr = m_min;
|
||||
}
|
||||
}
|
||||
|
||||
void Write(std::ostream &stream) {
|
||||
stream << m_name << " " << *m_ptr << '\n';
|
||||
}
|
||||
void Read(std::istream &stream) { stream >> m_name >> *m_ptr; }
|
||||
|
||||
bool IsList() { return false; }
|
||||
|
||||
private:
|
||||
T m_value;
|
||||
T *m_ptr; // pointer to value declared elsewhere
|
||||
T m_default, m_min, m_max, m_step;
|
||||
int m_precision; // number of digits after decimal point in string output
|
||||
};
|
||||
|
||||
const Param<int> dummy("error");
|
||||
|
||||
// list of parameters
|
||||
class ParamList : public ParamBase {
|
||||
public:
|
||||
ParamList(const char *name = "") : ParamBase(name) { active = true; }
|
||||
~ParamList() {}
|
||||
|
||||
float GetFloatValue() { return 0.0f; }
|
||||
int GetIntValue() { return 0; }
|
||||
|
||||
void AddParam(ParamBase *param) {
|
||||
m_params.push_back(param);
|
||||
m_map[param->GetName()] = param;
|
||||
m_current = m_params.begin();
|
||||
}
|
||||
|
||||
// look-up parameter based on name
|
||||
ParamBase *GetParam(char *name) {
|
||||
ParamBase *p = m_map[name];
|
||||
|
||||
if (p) {
|
||||
return p;
|
||||
} else {
|
||||
return (ParamBase *)&dummy;
|
||||
}
|
||||
}
|
||||
|
||||
ParamBase *GetParam(int i) { return m_params[i]; }
|
||||
|
||||
ParamBase *GetCurrent() { return *m_current; }
|
||||
|
||||
int GetSize() { return (int)m_params.size(); }
|
||||
|
||||
std::string GetValueString() { return m_name; }
|
||||
|
||||
// functions to traverse list
|
||||
void Reset() { m_current = m_params.begin(); }
|
||||
|
||||
void Increment() {
|
||||
m_current++;
|
||||
|
||||
if (m_current == m_params.end()) {
|
||||
m_current = m_params.begin();
|
||||
}
|
||||
}
|
||||
|
||||
void Decrement() {
|
||||
if (m_current == m_params.begin()) {
|
||||
m_current = m_params.end() - 1;
|
||||
} else {
|
||||
m_current--;
|
||||
}
|
||||
}
|
||||
|
||||
float GetPercentage() { return 0.0f; }
|
||||
void SetPercentage(float /*p*/) {}
|
||||
|
||||
void Write(std::ostream &stream) {
|
||||
stream << m_name << '\n';
|
||||
|
||||
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
|
||||
p != m_params.end(); ++p) {
|
||||
(*p)->Write(stream);
|
||||
}
|
||||
}
|
||||
|
||||
void Read(std::istream &stream) {
|
||||
stream >> m_name;
|
||||
|
||||
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
|
||||
p != m_params.end(); ++p) {
|
||||
(*p)->Read(stream);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsList() { return true; }
|
||||
|
||||
void ResetAll() {
|
||||
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
|
||||
p != m_params.end(); ++p) {
|
||||
(*p)->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
bool active;
|
||||
std::vector<ParamBase *> m_params;
|
||||
std::map<std::string, ParamBase *> m_map;
|
||||
std::vector<ParamBase *>::const_iterator m_current;
|
||||
};
|
||||
|
||||
#endif
|
307
Common/paramgl.h
Normal file
307
Common/paramgl.h
Normal file
|
@ -0,0 +1,307 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
ParamListGL
|
||||
- class derived from ParamList to do simple OpenGL rendering of a parameter
|
||||
list sgg 8/2001
|
||||
*/
|
||||
|
||||
#ifndef PARAMGL_H
|
||||
#define PARAMGL_H
|
||||
|
||||
#if defined(__APPLE__) || defined(MACOSX)
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#include <GL/freeglut.h>
|
||||
#endif
|
||||
|
||||
#include <param.h>
|
||||
#include <string.h>
|
||||
|
||||
inline void beginWinCoords(void) {
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glPushMatrix();
|
||||
glLoadIdentity();
|
||||
glTranslatef(0.0, (GLfloat)(glutGet(GLUT_WINDOW_HEIGHT) - 1.0), 0.0);
|
||||
glScalef(1.0, -1.0, 1.0);
|
||||
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glPushMatrix();
|
||||
glLoadIdentity();
|
||||
glOrtho(0, glutGet(GLUT_WINDOW_WIDTH), 0, glutGet(GLUT_WINDOW_HEIGHT), -1, 1);
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
}
|
||||
|
||||
inline void endWinCoords(void) {
|
||||
glMatrixMode(GL_PROJECTION);
|
||||
glPopMatrix();
|
||||
|
||||
glMatrixMode(GL_MODELVIEW);
|
||||
glPopMatrix();
|
||||
}
|
||||
|
||||
inline void glPrint(int x, int y, const char *s, void *font) {
|
||||
glRasterPos2f((GLfloat)x, (GLfloat)y);
|
||||
int len = (int)strlen(s);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
glutBitmapCharacter(font, s[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline void glPrintShadowed(int x, int y, const char *s, void *font,
|
||||
float *color) {
|
||||
glColor3f(0.0, 0.0, 0.0);
|
||||
glPrint(x - 1, y - 1, s, font);
|
||||
|
||||
glColor3fv((GLfloat *)color);
|
||||
glPrint(x, y, s, font);
|
||||
}
|
||||
|
||||
class ParamListGL : public ParamList {
|
||||
public:
|
||||
ParamListGL(const char *name = "")
|
||||
: ParamList(name),
|
||||
m_active(true),
|
||||
m_text_color_selected(1.0, 1.0, 1.0),
|
||||
m_text_color_unselected(0.75, 0.75, 0.75),
|
||||
m_text_color_shadow(0.0, 0.0, 0.0),
|
||||
m_bar_color_outer(0.25, 0.25, 0.25),
|
||||
m_bar_color_inner(1.0, 1.0, 1.0) {
|
||||
m_font = (void *)GLUT_BITMAP_9_BY_15; // GLUT_BITMAP_8_BY_13;
|
||||
m_font_h = 15;
|
||||
m_bar_x = 260;
|
||||
m_bar_w = 250;
|
||||
m_bar_h = 10;
|
||||
m_bar_offset = 5;
|
||||
m_text_x = 5;
|
||||
m_separation = 15;
|
||||
m_value_x = 200;
|
||||
m_start_x = 0;
|
||||
m_start_y = 0;
|
||||
}
|
||||
|
||||
void Render(int x, int y, bool shadow = false) {
|
||||
beginWinCoords();
|
||||
|
||||
m_start_x = x;
|
||||
m_start_y = y;
|
||||
|
||||
for (std::vector<ParamBase *>::const_iterator p = m_params.begin();
|
||||
p != m_params.end(); ++p) {
|
||||
if ((*p)->IsList()) {
|
||||
ParamListGL *list = (ParamListGL *)(*p);
|
||||
list->Render(x + 10, y);
|
||||
y += m_separation * list->GetSize();
|
||||
} else {
|
||||
if (p == m_current) {
|
||||
glColor3fv(&m_text_color_selected.r);
|
||||
} else {
|
||||
glColor3fv(&m_text_color_unselected.r);
|
||||
}
|
||||
|
||||
if (shadow) {
|
||||
glPrintShadowed(x + m_text_x, y + m_font_h, (*p)->GetName().c_str(),
|
||||
m_font,
|
||||
(p == m_current) ? &m_text_color_selected.r
|
||||
: &m_text_color_unselected.r);
|
||||
glPrintShadowed(x + m_value_x, y + m_font_h,
|
||||
(*p)->GetValueString().c_str(), m_font,
|
||||
(p == m_current) ? &m_text_color_selected.r
|
||||
: &m_text_color_unselected.r);
|
||||
} else {
|
||||
glPrint(x + m_text_x, y + m_font_h, (*p)->GetName().c_str(), m_font);
|
||||
glPrint(x + m_value_x, y + m_font_h, (*p)->GetValueString().c_str(),
|
||||
m_font);
|
||||
}
|
||||
|
||||
glColor3fv((GLfloat *)&m_bar_color_outer.r);
|
||||
glBegin(GL_LINE_LOOP);
|
||||
glVertex2f((GLfloat)(x + m_bar_x), (GLfloat)(y + m_bar_offset));
|
||||
glVertex2f((GLfloat)(x + m_bar_x + m_bar_w),
|
||||
(GLfloat)(y + m_bar_offset));
|
||||
glVertex2f((GLfloat)(x + m_bar_x + m_bar_w),
|
||||
(GLfloat)(y + m_bar_offset + m_bar_h));
|
||||
glVertex2f((GLfloat)(x + m_bar_x),
|
||||
(GLfloat)(y + m_bar_offset + m_bar_h));
|
||||
glEnd();
|
||||
|
||||
glColor3fv((GLfloat *)&m_bar_color_inner.r);
|
||||
glRectf(
|
||||
(GLfloat)(x + m_bar_x), (GLfloat)(y + m_bar_offset + m_bar_h),
|
||||
(GLfloat)(x + m_bar_x + ((m_bar_w - 1) * (*p)->GetPercentage())),
|
||||
(GLfloat)(y + m_bar_offset + 1));
|
||||
|
||||
y += m_separation;
|
||||
}
|
||||
}
|
||||
|
||||
endWinCoords();
|
||||
}
|
||||
|
||||
bool Mouse(int x, int y, int button = GLUT_LEFT_BUTTON,
|
||||
int state = GLUT_DOWN) {
|
||||
if ((y < m_start_y) ||
|
||||
(y > (int)(m_start_y + (m_separation * m_params.size()) - 1))) {
|
||||
m_active = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
m_active = true;
|
||||
|
||||
int i = (y - m_start_y) / m_separation;
|
||||
|
||||
if ((button == GLUT_LEFT_BUTTON) && (state == GLUT_DOWN)) {
|
||||
#if defined(__GNUC__) && (__GNUC__ < 3)
|
||||
m_current = &m_params[i];
|
||||
#else
|
||||
|
||||
// MJH: workaround since the version of vector::at used here is
|
||||
// non-standard
|
||||
for (m_current = m_params.begin(); m_current != m_params.end() && i > 0;
|
||||
m_current++, i--)
|
||||
;
|
||||
|
||||
// m_current = (std::vector<ParamBase
|
||||
// *>::const_iterator)&m_params.at(i);
|
||||
#endif
|
||||
|
||||
if ((x > m_bar_x) && (x < m_bar_x + m_bar_w)) {
|
||||
Motion(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Motion(int x, int y) {
|
||||
if ((y < m_start_y) ||
|
||||
(y > m_start_y + (m_separation * (int)m_params.size()) - 1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (x < m_bar_x) {
|
||||
(*m_current)->SetPercentage(0.0);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (x > m_bar_x + m_bar_w) {
|
||||
(*m_current)->SetPercentage(1.0);
|
||||
return true;
|
||||
}
|
||||
|
||||
(*m_current)->SetPercentage((x - m_bar_x) / (float)m_bar_w);
|
||||
return true;
|
||||
}
|
||||
|
||||
void Special(int key, int x, int y) {
|
||||
if (!m_active) return;
|
||||
|
||||
switch (key) {
|
||||
case GLUT_KEY_DOWN:
|
||||
Increment();
|
||||
break;
|
||||
|
||||
case GLUT_KEY_UP:
|
||||
Decrement();
|
||||
break;
|
||||
|
||||
case GLUT_KEY_RIGHT:
|
||||
GetCurrent()->Increment();
|
||||
break;
|
||||
|
||||
case GLUT_KEY_LEFT:
|
||||
GetCurrent()->Decrement();
|
||||
break;
|
||||
|
||||
case GLUT_KEY_HOME:
|
||||
GetCurrent()->Reset();
|
||||
break;
|
||||
|
||||
case GLUT_KEY_END:
|
||||
GetCurrent()->SetPercentage(1.0);
|
||||
break;
|
||||
}
|
||||
|
||||
glutPostRedisplay();
|
||||
}
|
||||
|
||||
void SetFont(void *font, int height) {
|
||||
m_font = font;
|
||||
m_font_h = height;
|
||||
}
|
||||
|
||||
void SetSelectedColor(float r, float g, float b) {
|
||||
m_text_color_selected = Color(r, g, b);
|
||||
}
|
||||
void SetUnSelectedColor(float r, float g, float b) {
|
||||
m_text_color_unselected = Color(r, g, b);
|
||||
}
|
||||
void SetBarColorInner(float r, float g, float b) {
|
||||
m_bar_color_inner = Color(r, g, b);
|
||||
}
|
||||
void SetBarColorOuter(float r, float g, float b) {
|
||||
m_bar_color_outer = Color(r, g, b);
|
||||
}
|
||||
|
||||
void SetActive(bool b) { m_active = b; }
|
||||
|
||||
private:
|
||||
void *m_font;
|
||||
int m_font_h; // font height
|
||||
|
||||
int m_bar_x; // bar start x position
|
||||
int m_bar_w; // bar width
|
||||
int m_bar_h; // bar height
|
||||
int m_text_x; // text start x position
|
||||
int m_separation; // bar separation in y
|
||||
int m_value_x; // value text x position
|
||||
int m_bar_offset; // bar offset in y
|
||||
|
||||
int m_start_x, m_start_y;
|
||||
|
||||
bool m_active;
|
||||
|
||||
struct Color {
|
||||
Color(float _r, float _g, float _b) {
|
||||
r = _r;
|
||||
g = _g;
|
||||
b = _b;
|
||||
}
|
||||
float r, g, b;
|
||||
};
|
||||
|
||||
Color m_text_color_selected;
|
||||
Color m_text_color_unselected;
|
||||
Color m_text_color_shadow;
|
||||
Color m_bar_color_outer;
|
||||
Color m_bar_color_inner;
|
||||
};
|
||||
|
||||
#endif
|
128
Common/rendercheck_d3d10.cpp
Normal file
128
Common/rendercheck_d3d10.cpp
Normal file
|
@ -0,0 +1,128 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Utility funcs to wrap up saving a surface or the back buffer as a PPM file
|
||||
// In addition, wraps up a threshold comparision of two PPMs.
|
||||
//
|
||||
// These functions are designed to be used to implement an automated QA testing
|
||||
// for SDK samples.
|
||||
//
|
||||
// Author: Bryan Dudash
|
||||
// Email: sdkfeedback@nvidia.com
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <helper_functions.h>
|
||||
#include <rendercheck_d3d10.h>
|
||||
|
||||
HRESULT CheckRenderD3D10::ActiveRenderTargetToPPM(ID3D10Device *pDevice,
|
||||
const char *zFileName) {
|
||||
ID3D10RenderTargetView *pRTV = NULL;
|
||||
pDevice->OMGetRenderTargets(1, &pRTV, NULL);
|
||||
|
||||
ID3D10Resource *pSourceResource = NULL;
|
||||
pRTV->GetResource(&pSourceResource);
|
||||
|
||||
return ResourceToPPM(pDevice, pSourceResource, zFileName);
|
||||
}
|
||||
|
||||
HRESULT CheckRenderD3D10::ResourceToPPM(ID3D10Device *pDevice,
|
||||
ID3D10Resource *pResource,
|
||||
const char *zFileName) {
|
||||
D3D10_RESOURCE_DIMENSION rType;
|
||||
pResource->GetType(&rType);
|
||||
|
||||
if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) {
|
||||
printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
ID3D10Texture2D *pSourceTexture = (ID3D10Texture2D *)pResource;
|
||||
ID3D10Texture2D *pTargetTexture = NULL;
|
||||
|
||||
D3D10_TEXTURE2D_DESC desc;
|
||||
pSourceTexture->GetDesc(&desc);
|
||||
desc.BindFlags = 0;
|
||||
desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
|
||||
desc.Usage = D3D10_USAGE_STAGING;
|
||||
|
||||
if (FAILED(pDevice->CreateTexture2D(&desc, NULL, &pTargetTexture))) {
|
||||
printf(
|
||||
"SurfaceToPPM: Unable to create target Texture resoruce! Aborting... "
|
||||
"\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
pDevice->CopyResource(pTargetTexture, pSourceTexture);
|
||||
|
||||
D3D10_MAPPED_TEXTURE2D mappedTex2D;
|
||||
pTargetTexture->Map(0, D3D10_MAP_READ, 0, &mappedTex2D);
|
||||
|
||||
// Need to convert from dx pitch to pitch=width
|
||||
unsigned char *pPPMData = new unsigned char[desc.Width * desc.Height * 4];
|
||||
|
||||
for (unsigned int iHeight = 0; iHeight < desc.Height; iHeight++) {
|
||||
memcpy(
|
||||
&(pPPMData[iHeight * desc.Width * 4]),
|
||||
(unsigned char *)(mappedTex2D.pData) + iHeight * mappedTex2D.RowPitch,
|
||||
desc.Width * 4);
|
||||
}
|
||||
|
||||
pTargetTexture->Unmap(0);
|
||||
|
||||
// Prepends the PPM header info and bumps byte data afterwards
|
||||
sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);
|
||||
|
||||
delete[] pPPMData;
|
||||
pTargetTexture->Release();
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
bool CheckRenderD3D10::PPMvsPPM(const char *src_file, const char *ref_file,
|
||||
const char *exec_path, const float epsilon,
|
||||
const float threshold) {
|
||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
||||
|
||||
if (ref_file_path == NULL) {
|
||||
printf(
|
||||
"CheckRenderD3D10::PPMvsPPM unable to find <%s> in <%s> Aborting "
|
||||
"comparison!\n",
|
||||
ref_file, exec_path);
|
||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
||||
ref_file);
|
||||
printf("Aborting comparison!\n");
|
||||
printf(" FAILURE!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
|
||||
true);
|
||||
}
|
53
Common/rendercheck_d3d10.h
Normal file
53
Common/rendercheck_d3d10.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef _RENDERCHECK_D3D10_H_
|
||||
#define _RENDERCHECK_D3D10_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <d3d10.h>
|
||||
|
||||
class CheckRenderD3D10 {
|
||||
public:
|
||||
CheckRenderD3D10() {}
|
||||
|
||||
static HRESULT ActiveRenderTargetToPPM(ID3D10Device *pDevice,
|
||||
const char *zFileName);
|
||||
static HRESULT ResourceToPPM(ID3D10Device *pDevice, ID3D10Resource *pResource,
|
||||
const char *zFileName);
|
||||
|
||||
static bool PPMvsPPM(const char *src_file, const char *ref_file,
|
||||
const char *exec_path, const float epsilon,
|
||||
const float threshold = 0.0f);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
|
|
167
Common/rendercheck_d3d9.cpp
Normal file
167
Common/rendercheck_d3d9.cpp
Normal file
|
@ -0,0 +1,167 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Utility funcs to wrap up savings a surface or the back buffer as a PPM file
|
||||
// In addition, wraps up a threshold comparision of two PPMs.
|
||||
//
|
||||
// These functions are designed to be used to implement an automated QA testing
|
||||
// for SDK samples.
|
||||
//
|
||||
// Author: Bryan Dudash
|
||||
// Email: sdkfeedback@nvidia.com
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <helper_functions.h>
|
||||
#include <rendercheck_d3d9.h>
|
||||
|
||||
// originally copied from checkrender_gl.cpp and slightly modified
|
||||
bool CheckRenderD3D9::PPMvsPPM(const char *src_file, const char *ref_file,
|
||||
const char *exec_path, const float epsilon,
|
||||
const float threshold) {
|
||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
||||
|
||||
if (ref_file_path == NULL) {
|
||||
printf(
|
||||
"CheckRenderD3D9::PPMvsPPM unable to find <%s> in <%s> Aborting "
|
||||
"comparison!\n",
|
||||
ref_file, exec_path);
|
||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n",
|
||||
ref_file);
|
||||
printf("Aborting comparison!\n");
|
||||
printf(" FAILURE!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return (sdkComparePPM(src_file, ref_file_path, epsilon, threshold, true) ==
|
||||
true);
|
||||
};
|
||||
|
||||
HRESULT CheckRenderD3D9::BackbufferToPPM(IDirect3DDevice9 *pDevice,
|
||||
const char *zFileName) {
|
||||
IDirect3DSurface9 *pSurface = NULL;
|
||||
|
||||
if (FAILED(
|
||||
pDevice->GetBackBuffer(0, 0, D3DBACKBUFFER_TYPE_MONO, &pSurface))) {
|
||||
printf("Unable to get the back buffer. Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// D3DXSaveSurfaceToFile("C:\\bing.dds",D3DXIFF_DDS,pSurface,NULL,NULL);
|
||||
|
||||
HRESULT hr = S_OK;
|
||||
hr = SurfaceToPPM(pDevice, pSurface, zFileName);
|
||||
|
||||
pSurface->Release();
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT CheckRenderD3D9::SurfaceToPPM(IDirect3DDevice9 *pDevice,
|
||||
IDirect3DSurface9 *pSurface,
|
||||
const char *zFileName) {
|
||||
D3DSURFACE_DESC pDesc;
|
||||
pSurface->GetDesc(&pDesc);
|
||||
|
||||
// $$ For now only support common 8bit formats. TODO: support for more
|
||||
// complex formats via conversion?
|
||||
if (!(pDesc.Format == D3DFMT_A8R8G8B8 || pDesc.Format == D3DFMT_X8R8G8B8)) {
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
IDirect3DTexture9 *pTargetTex = NULL;
|
||||
|
||||
if (FAILED(pDevice->CreateTexture(pDesc.Width, pDesc.Height, 1,
|
||||
D3DUSAGE_DYNAMIC, pDesc.Format,
|
||||
D3DPOOL_SYSTEMMEM, &pTargetTex, NULL))) {
|
||||
printf("Unable to create texture for surface transfer! Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
IDirect3DSurface9 *pTargetSurface = NULL;
|
||||
|
||||
if (FAILED(pTargetTex->GetSurfaceLevel(0, &pTargetSurface))) {
|
||||
printf("Unable to get surface for surface transfer! Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
// This is required because we cannot lock a D3DPOOL_DEAULT surface directly.
|
||||
// So, we copy to our sysmem surface.
|
||||
if (FAILED(pDevice->GetRenderTargetData(pSurface, pTargetSurface))) {
|
||||
printf(
|
||||
"Unable to GetRenderTargetData() for surface transfer! Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
D3DLOCKED_RECT lockedRect;
|
||||
HRESULT hr = pTargetSurface->LockRect(&lockedRect, NULL, 0);
|
||||
|
||||
// Need to convert from dx pitch to pitch=width
|
||||
//
|
||||
// $ PPM is BGR and not RGB it seems. Saved image looks "funny" in viewer(red
|
||||
// and blue swapped), but since ref will be dumped using same method, this is
|
||||
// ok.
|
||||
// however, if we want the saved image to be properly colored, then we
|
||||
// can swizzle the color bytes here.
|
||||
unsigned char *pPPMData = new unsigned char[pDesc.Width * pDesc.Height * 4];
|
||||
|
||||
for (unsigned int iHeight = 0; iHeight < pDesc.Height; iHeight++) {
|
||||
#if 1 // swizzle to implment RGB to BGR conversion.
|
||||
|
||||
for (unsigned int iWidth = 0; iWidth < pDesc.Width; iWidth++) {
|
||||
DWORD color = *(DWORD *)((unsigned char *)(lockedRect.pBits) +
|
||||
iHeight * lockedRect.Pitch + iWidth * 4);
|
||||
|
||||
// R<->B, [7:0] <-> [23:16], swizzle
|
||||
color = ((color & 0xFF) << 16) | (color & 0xFF00) |
|
||||
((color & 0xFF0000) >> 16) | (color & 0xFF000000);
|
||||
|
||||
memcpy(&(pPPMData[(iHeight * pDesc.Width + iWidth) * 4]),
|
||||
(unsigned char *)&color, 4);
|
||||
}
|
||||
|
||||
#else
|
||||
memcpy(&(pPPMData[iHeight * pDesc.Width * 4]),
|
||||
(unsigned char *)(lockedRect.pBits) + iHeight * lockedRect.Pitch,
|
||||
pDesc.Width * 4);
|
||||
#endif
|
||||
}
|
||||
|
||||
pTargetSurface->UnlockRect();
|
||||
|
||||
// Prepends the PPM header info and bumps byte data afterwards
|
||||
sdkSavePPM4ub(zFileName, pPPMData, pDesc.Width, pDesc.Height);
|
||||
|
||||
delete[] pPPMData;
|
||||
pTargetSurface->Release();
|
||||
pTargetTex->Release();
|
||||
|
||||
return S_OK;
|
||||
}
|
54
Common/rendercheck_d3d9.h
Normal file
54
Common/rendercheck_d3d9.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef _RENDERCHECK_D3D9_H_
|
||||
#define _RENDERCHECK_D3D9_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <d3d9.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
class CheckRenderD3D9 {
|
||||
public:
|
||||
CheckRenderD3D9() {}
|
||||
|
||||
static HRESULT BackbufferToPPM(IDirect3DDevice9 *pDevice,
|
||||
const char *zFileName);
|
||||
static HRESULT SurfaceToPPM(IDirect3DDevice9 *pDevice,
|
||||
IDirect3DSurface9 *pSurface,
|
||||
const char *zFileName);
|
||||
|
||||
static bool PPMvsPPM(const char *src_file, const char *ref_file,
|
||||
const char *exec_path, const float epsilon,
|
||||
const float threshold = 0.0f);
|
||||
};
|
||||
|
||||
#endif
|
1347
Common/rendercheck_gl.h
Normal file
1347
Common/rendercheck_gl.h
Normal file
File diff suppressed because it is too large
Load Diff
5
Makefile
5
Makefile
|
@ -50,6 +50,9 @@ PROJECTS := $(filter-out $(FILTER_OUT),$(PROJECTS))
|
|||
%.ph_clobber :
|
||||
+@$(MAKE) -C $(dir $*) clobber $(USE_DEVICE)
|
||||
|
||||
%.ph_run :
|
||||
+@$(MAKE) -C $(dir $*) run
|
||||
|
||||
all: $(addsuffix .ph_build,$(PROJECTS))
|
||||
@echo "Finished building CUDA samples"
|
||||
|
||||
|
@ -62,3 +65,5 @@ tidy:
|
|||
clean: tidy $(addsuffix .ph_clean,$(PROJECTS))
|
||||
|
||||
clobber: clean $(addsuffix .ph_clobber,$(PROJECTS))
|
||||
|
||||
run: $(addsuffix .ph_run,$(PROJECTS))
|
||||
|
|
137
README.md
137
README.md
|
@ -1,11 +1,19 @@
|
|||
# CUDA Samples
|
||||
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.4 update 1](https://developer.nvidia.com/cuda-downloads).
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Release Notes
|
||||
|
||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||
|
||||
### CUDA 11.5
|
||||
* Added `cuDLAHybridMode`. Demonstrate usage of cuDLA in hybrid mode.
|
||||
* Added `cuDLAStandaloneMode`. Demonstrate usage of cuDLA in standalone mode.
|
||||
* Added `cuDLAErrorReporting`. Demonstrate DLA error detection via CUDA.
|
||||
* Added `graphMemoryNodes`. Demonstrates memory allocations and frees within CUDA graphs using Graph APIs and Stream Capture APIs.
|
||||
* Added `graphMemoryFootprint`. Demonstrates how graph memory nodes re-use virtual addresses and physical memory.
|
||||
* All samples from CUDA toolkit are now available on [GitHub](https://github.com/nvidia/cuda-samples).
|
||||
|
||||
### CUDA 11.4 update 1
|
||||
* Added support for VS Code on linux platform.
|
||||
|
||||
|
@ -116,7 +124,7 @@ This is the first release of CUDA Samples on GitHub:
|
|||
|
||||
### Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.4](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||
|
||||
### Getting the CUDA Samples
|
||||
|
@ -173,39 +181,104 @@ The samples makefiles can take advantage of certain options:
|
|||
### Samples by OS
|
||||
|
||||
#### Linux
|
||||
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||
**[simpleAssert_nvrtc](./Samples/simpleAssert_nvrtc)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[graphMemoryFootprint](./Samples/graphMemoryFootprint)** | **[MC_EstimatePiQ](./Samples/MC_EstimatePiQ)** |
|
||||
---|---|---|---|
|
||||
**[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
|
||||
**[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** |
|
||||
**[matrixMul](./Samples/matrixMul)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaNvSci](./Samples/cudaNvSci)** |
|
||||
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** |
|
||||
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** |
|
||||
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
|
||||
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
|
||||
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** |
|
||||
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** |
|
||||
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[cdpQuadtree](./Samples/cdpQuadtree)** |
|
||||
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[reduction](./Samples/reduction)** |
|
||||
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[reductionMultiBlockCG](./Samples/reductionMultiBlockCG)** | **[cuDLAStandaloneMode](./Samples/cuDLAStandaloneMode)** | **[conjugateGradientPrecond](./Samples/conjugateGradientPrecond)** | **[ptxjit](./Samples/ptxjit)** |
|
||||
**[threadMigration](./Samples/threadMigration)** | **[EGLStream_CUDA_CrossGPU](./Samples/EGLStream_CUDA_CrossGPU)** | **[threadFenceReduction](./Samples/threadFenceReduction)** | **[simpleAtomicIntrinsics_nvrtc](./Samples/simpleAtomicIntrinsics_nvrtc)** |
|
||||
**[shfl_scan](./Samples/shfl_scan)** | **[clock](./Samples/clock)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[MC_EstimatePiP](./Samples/MC_EstimatePiP)** |
|
||||
**[transpose](./Samples/transpose)** | **[simpleMultiCopy](./Samples/simpleMultiCopy)** | **[cuDLAErrorReporting](./Samples/cuDLAErrorReporting)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||
**[streamOrderedAllocationIPC](./Samples/streamOrderedAllocationIPC)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cppIntegration](./Samples/cppIntegration)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** |
|
||||
**[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[MC_EstimatePiInlineP](./Samples/MC_EstimatePiInlineP)** |
|
||||
**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[simpleAssert](./Samples/simpleAssert)** | **[simpleTemplates](./Samples/simpleTemplates)** |
|
||||
**[cuHook](./Samples/cuHook)** | **[simpleCUDA2GL](./Samples/simpleCUDA2GL)** | **[matrixMul](./Samples/matrixMul)** | **[quasirandomGenerator_nvrtc](./Samples/quasirandomGenerator_nvrtc)** |
|
||||
**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleTextureDrv](./Samples/simpleTextureDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||
**[simpleCallback](./Samples/simpleCallback)** | **[batchCUBLAS](./Samples/batchCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleAtomicIntrinsics](./Samples/simpleAtomicIntrinsics)** |
|
||||
**[newdelete](./Samples/newdelete)** | **[bicubicTexture](./Samples/bicubicTexture)** | **[dxtc](./Samples/dxtc)** | **[cudaOpenMP](./Samples/cudaOpenMP)** |
|
||||
**[cdpBezierTessellation](./Samples/cdpBezierTessellation)** | **[randomFog](./Samples/randomFog)** | **[bilateralFilter](./Samples/bilateralFilter)** | **[conjugateGradient](./Samples/conjugateGradient)** |
|
||||
**[particles](./Samples/particles)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[Mandelbrot](./Samples/Mandelbrot)** | **[binomialOptions_nvrtc](./Samples/binomialOptions_nvrtc)** |
|
||||
**[cudaNvSci](./Samples/cudaNvSci)** | **[mergeSort](./Samples/mergeSort)** | **[HSOpticalFlow](./Samples/HSOpticalFlow)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** |
|
||||
**[convolutionTexture](./Samples/convolutionTexture)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[eigenvalues](./Samples/eigenvalues)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[cuSolverSp_LowlevelCholesky](./Samples/cuSolverSp_LowlevelCholesky)** | **[topologyQuery](./Samples/topologyQuery)** |
|
||||
**[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[volumeRender](./Samples/volumeRender)** | **[stereoDisparity](./Samples/stereoDisparity)** | **[simpleTexture](./Samples/simpleTexture)** |
|
||||
**[simpleStreams](./Samples/simpleStreams)** | **[smokeParticles](./Samples/smokeParticles)** | **[simpleMultiGPU](./Samples/simpleMultiGPU)** | **[deviceQueryDrv](./Samples/deviceQueryDrv)** |
|
||||
**[fastWalshTransform](./Samples/fastWalshTransform)** | **[quasirandomGenerator](./Samples/quasirandomGenerator)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||
**[conjugateGradientUM](./Samples/conjugateGradientUM)** | **[simpleVoteIntrinsics_nvrtc](./Samples/simpleVoteIntrinsics_nvrtc)** | **[simpleLayeredTexture](./Samples/simpleLayeredTexture)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** |
|
||||
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[matrixMulCUBLAS](./Samples/matrixMulCUBLAS)** | **[histEqualizationNPP](./Samples/histEqualizationNPP)** |
|
||||
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[recursiveGaussian](./Samples/recursiveGaussian)** | **[imageDenoising](./Samples/imageDenoising)** | **[FunctionPointers](./Samples/FunctionPointers)** |
|
||||
**[simpleGL](./Samples/simpleGL)** | **[segmentationTreeThrust](./Samples/segmentationTreeThrust)** | **[scalarProd](./Samples/scalarProd)** | **[SobolQRNG](./Samples/SobolQRNG)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simplePitchLinearTexture](./Samples/simplePitchLinearTexture)** | **[freeImageInteropNPP](./Samples/freeImageInteropNPP)** |
|
||||
**[template](./Samples/template)** | **[dwtHaar1D](./Samples/dwtHaar1D)** | **[postProcessGL](./Samples/postProcessGL)** | **[BlackScholes](./Samples/BlackScholes)** |
|
||||
**[volumeFiltering](./Samples/volumeFiltering)** | **[simpleCUFFT_callback](./Samples/simpleCUFFT_callback)** | **[UnifiedMemoryStreams](./Samples/UnifiedMemoryStreams)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
|
||||
**[deviceQuery](./Samples/deviceQuery)** | **[simpleHyperQ](./Samples/simpleHyperQ)** | **[systemWideAtomics](./Samples/systemWideAtomics)** | **[cuSolverSp_LowlevelQR](./Samples/cuSolverSp_LowlevelQR)** |
|
||||
**[inlinePTX](./Samples/inlinePTX)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[cuDLAHybridMode](./Samples/cuDLAHybridMode)** |
|
||||
**[asyncAPI](./Samples/asyncAPI)** | **[MC_EstimatePiInlineQ](./Samples/MC_EstimatePiInlineQ)** | **[scan](./Samples/scan)** | **[simpleCooperativeGroups](./Samples/simpleCooperativeGroups)** |
|
||||
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleTemplates_nvrtc](./Samples/simpleTemplates_nvrtc)** | **[simpleTexture3D](./Samples/simpleTexture3D)** | **[lineOfSight](./Samples/lineOfSight)** |
|
||||
**[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[binomialOptions](./Samples/binomialOptions)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[bindlessTexture](./Samples/bindlessTexture)** |
|
||||
**[simpleCUFFT_2d_MGPU](./Samples/simpleCUFFT_2d_MGPU)** | **[simplePrintf](./Samples/simplePrintf)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[histogram](./Samples/histogram)** |
|
||||
**[matrixMulDynlinkJIT](./Samples/matrixMulDynlinkJIT)** | **[simpleP2P](./Samples/simpleP2P)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[vectorAddDrv](./Samples/vectorAddDrv)** |
|
||||
**[sortingNetworks](./Samples/sortingNetworks)** | **[alignedTypes](./Samples/alignedTypes)** | **[inlinePTX_nvrtc](./Samples/inlinePTX_nvrtc)** | **[simpleCubemapTexture](./Samples/simpleCubemapTexture)** |
|
||||
**[simpleIPC](./Samples/simpleIPC)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[radixSortThrust](./Samples/radixSortThrust)** | **[MonteCarloMultiGPU](./Samples/MonteCarloMultiGPU)** |
|
||||
**[cudaNvSciNvMedia](./Samples/cudaNvSciNvMedia)** | **[vectorAdd](./Samples/vectorAdd)** | **[cdpSimplePrint](./Samples/cdpSimplePrint)** | **[FilterBorderControlNPP](./Samples/FilterBorderControlNPP)** |
|
||||
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[convolutionSeparable](./Samples/convolutionSeparable)** | **[nbody](./Samples/nbody)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** |
|
||||
**[simpleSeparateCompilation](./Samples/simpleSeparateCompilation)** | **[c++11_cuda](./Samples/c++11_cuda)** | **[fluidsGL](./Samples/fluidsGL)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[clock_nvrtc](./Samples/clock_nvrtc)** | **[graphMemoryNodes](./Samples/graphMemoryNodes)** | **[cdpQuadtree](./Samples/cdpQuadtree)** | **[interval](./Samples/interval)** |
|
||||
**[boxFilter](./Samples/boxFilter)** | **[matrixMul_nvrtc](./Samples/matrixMul_nvrtc)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cppOverload](./Samples/cppOverload)** |
|
||||
**[marchingCubes](./Samples/marchingCubes)** | **[cuSolverRf](./Samples/cuSolverRf)** | **[BlackScholes_nvrtc](./Samples/BlackScholes_nvrtc)** | **[cdpAdvancedQuicksort](./Samples/cdpAdvancedQuicksort)** |
|
||||
**[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[cdpSimpleQuicksort](./Samples/cdpSimpleQuicksort)** | **[simpleOccupancy](./Samples/simpleOccupancy)** | **[simpleSurfaceWrite](./Samples/simpleSurfaceWrite)** |
|
||||
**[simpleCUFFT_MGPU](./Samples/simpleCUFFT_MGPU)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[convolutionFFT2D](./Samples/convolutionFFT2D)** | **[reduction](./Samples/reduction)** |
|
||||
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[SobelFilter](./Samples/SobelFilter)** | **[dct8x8](./Samples/dct8x8)** | **[fp16ScalarProduct](./Samples/fp16ScalarProduct)** |
|
||||
**[FDTD3d](./Samples/FDTD3d)** | **[oceanFFT](./Samples/oceanFFT)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[StreamPriorities](./Samples/StreamPriorities)** |
|
||||
**[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[MC_SingleAsianOptionP](./Samples/MC_SingleAsianOptionP)** | **[simpleMPI](./Samples/simpleMPI)** |
|
||||
|
||||
#### Windows
|
||||
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||
**[simpleAssert_nvrtc](./Samples/simpleAssert_nvrtc)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[graphMemoryFootprint](./Samples/graphMemoryFootprint)** | **[MC_EstimatePiQ](./Samples/MC_EstimatePiQ)** |
|
||||
---|---|---|---|
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** |
|
||||
**[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
|
||||
**[cudaOpenMP](./Samples/cudaOpenMP)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** |
|
||||
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
||||
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
|
||||
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[simpleGL](./Samples/simpleGL)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||
**[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** |
|
||||
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[simpleAttributes](./Samples/simpleAttributes)** |
|
||||
**[simpleD3D12](./Samples/simpleD3D12)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[simpleIPC](./Samples/simpleIPC)** |
|
||||
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[cdpQuadtree](./Samples/cdpQuadtree)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
|
||||
**[reduction](./Samples/reduction)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[reductionMultiBlockCG](./Samples/reductionMultiBlockCG)** | **[conjugateGradientPrecond](./Samples/conjugateGradientPrecond)** | **[ptxjit](./Samples/ptxjit)** | **[threadMigration](./Samples/threadMigration)** |
|
||||
**[threadFenceReduction](./Samples/threadFenceReduction)** | **[simpleAtomicIntrinsics_nvrtc](./Samples/simpleAtomicIntrinsics_nvrtc)** | **[shfl_scan](./Samples/shfl_scan)** | **[clock](./Samples/clock)** |
|
||||
**[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[MC_EstimatePiP](./Samples/MC_EstimatePiP)** | **[transpose](./Samples/transpose)** | **[simpleMultiCopy](./Samples/simpleMultiCopy)** |
|
||||
**[concurrentKernels](./Samples/concurrentKernels)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cppIntegration](./Samples/cppIntegration)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** |
|
||||
**[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[nvJPEG](./Samples/nvJPEG)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** | **[MC_EstimatePiInlineP](./Samples/MC_EstimatePiInlineP)** |
|
||||
**[simpleD3D10](./Samples/simpleD3D10)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[fluidsD3D9](./Samples/fluidsD3D9)** | **[boxFilterNPP](./Samples/boxFilterNPP)** |
|
||||
**[simpleAssert](./Samples/simpleAssert)** | **[simpleTemplates](./Samples/simpleTemplates)** | **[simpleCUDA2GL](./Samples/simpleCUDA2GL)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
**[quasirandomGenerator_nvrtc](./Samples/quasirandomGenerator_nvrtc)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[simpleTextureDrv](./Samples/simpleTextureDrv)** |
|
||||
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCallback](./Samples/simpleCallback)** | **[SLID3D10Texture](./Samples/SLID3D10Texture)** | **[batchCUBLAS](./Samples/batchCUBLAS)** |
|
||||
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleAtomicIntrinsics](./Samples/simpleAtomicIntrinsics)** | **[newdelete](./Samples/newdelete)** | **[bicubicTexture](./Samples/bicubicTexture)** |
|
||||
**[dxtc](./Samples/dxtc)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[cdpBezierTessellation](./Samples/cdpBezierTessellation)** | **[randomFog](./Samples/randomFog)** |
|
||||
**[bilateralFilter](./Samples/bilateralFilter)** | **[conjugateGradient](./Samples/conjugateGradient)** | **[particles](./Samples/particles)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
|
||||
**[Mandelbrot](./Samples/Mandelbrot)** | **[binomialOptions_nvrtc](./Samples/binomialOptions_nvrtc)** | **[simpleD3D10RenderTarget](./Samples/simpleD3D10RenderTarget)** | **[mergeSort](./Samples/mergeSort)** |
|
||||
**[HSOpticalFlow](./Samples/HSOpticalFlow)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[convolutionTexture](./Samples/convolutionTexture)** | **[simpleVulkan](./Samples/simpleVulkan)** |
|
||||
**[simpleD3D9Texture](./Samples/simpleD3D9Texture)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[eigenvalues](./Samples/eigenvalues)** |
|
||||
**[simpleD3D10Texture](./Samples/simpleD3D10Texture)** | **[streamOrderedAllocationP2P](./Samples/streamOrderedAllocationP2P)** | **[cuSolverSp_LowlevelCholesky](./Samples/cuSolverSp_LowlevelCholesky)** | **[topologyQuery](./Samples/topologyQuery)** |
|
||||
**[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** | **[volumeRender](./Samples/volumeRender)** | **[stereoDisparity](./Samples/stereoDisparity)** | **[simpleTexture](./Samples/simpleTexture)** |
|
||||
**[simpleStreams](./Samples/simpleStreams)** | **[smokeParticles](./Samples/smokeParticles)** | **[simpleMultiGPU](./Samples/simpleMultiGPU)** | **[deviceQueryDrv](./Samples/deviceQueryDrv)** |
|
||||
**[fastWalshTransform](./Samples/fastWalshTransform)** | **[quasirandomGenerator](./Samples/quasirandomGenerator)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||
**[conjugateGradientUM](./Samples/conjugateGradientUM)** | **[simpleVoteIntrinsics_nvrtc](./Samples/simpleVoteIntrinsics_nvrtc)** | **[simpleLayeredTexture](./Samples/simpleLayeredTexture)** | **[streamOrderedAllocation](./Samples/streamOrderedAllocation)** |
|
||||
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[matrixMulCUBLAS](./Samples/matrixMulCUBLAS)** | **[histEqualizationNPP](./Samples/histEqualizationNPP)** |
|
||||
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[recursiveGaussian](./Samples/recursiveGaussian)** | **[imageDenoising](./Samples/imageDenoising)** | **[FunctionPointers](./Samples/FunctionPointers)** |
|
||||
**[simpleGL](./Samples/simpleGL)** | **[segmentationTreeThrust](./Samples/segmentationTreeThrust)** | **[scalarProd](./Samples/scalarProd)** | **[SobolQRNG](./Samples/SobolQRNG)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[simplePitchLinearTexture](./Samples/simplePitchLinearTexture)** | **[freeImageInteropNPP](./Samples/freeImageInteropNPP)** |
|
||||
**[template](./Samples/template)** | **[dwtHaar1D](./Samples/dwtHaar1D)** | **[simpleD3D11Texture](./Samples/simpleD3D11Texture)** | **[postProcessGL](./Samples/postProcessGL)** |
|
||||
**[BlackScholes](./Samples/BlackScholes)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[volumeFiltering](./Samples/volumeFiltering)** | **[UnifiedMemoryStreams](./Samples/UnifiedMemoryStreams)** |
|
||||
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleHyperQ](./Samples/simpleHyperQ)** | **[cuSolverSp_LowlevelQR](./Samples/cuSolverSp_LowlevelQR)** |
|
||||
**[inlinePTX](./Samples/inlinePTX)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[asyncAPI](./Samples/asyncAPI)** | **[MC_EstimatePiInlineQ](./Samples/MC_EstimatePiInlineQ)** |
|
||||
**[scan](./Samples/scan)** | **[simpleCooperativeGroups](./Samples/simpleCooperativeGroups)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleTemplates_nvrtc](./Samples/simpleTemplates_nvrtc)** |
|
||||
**[simpleTexture3D](./Samples/simpleTexture3D)** | **[lineOfSight](./Samples/lineOfSight)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[binomialOptions](./Samples/binomialOptions)** |
|
||||
**[simpleAttributes](./Samples/simpleAttributes)** | **[bindlessTexture](./Samples/bindlessTexture)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCUFFT_2d_MGPU](./Samples/simpleCUFFT_2d_MGPU)** |
|
||||
**[simplePrintf](./Samples/simplePrintf)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[histogram](./Samples/histogram)** | **[matrixMulDynlinkJIT](./Samples/matrixMulDynlinkJIT)** |
|
||||
**[simpleP2P](./Samples/simpleP2P)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[vectorAddDrv](./Samples/vectorAddDrv)** | **[sortingNetworks](./Samples/sortingNetworks)** |
|
||||
**[alignedTypes](./Samples/alignedTypes)** | **[inlinePTX_nvrtc](./Samples/inlinePTX_nvrtc)** | **[simpleCubemapTexture](./Samples/simpleCubemapTexture)** | **[simpleIPC](./Samples/simpleIPC)** |
|
||||
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[radixSortThrust](./Samples/radixSortThrust)** | **[MonteCarloMultiGPU](./Samples/MonteCarloMultiGPU)** | **[vectorAdd](./Samples/vectorAdd)** |
|
||||
**[VFlockingD3D10](./Samples/VFlockingD3D10)** | **[simpleD3D9](./Samples/simpleD3D9)** | **[cdpSimplePrint](./Samples/cdpSimplePrint)** | **[FilterBorderControlNPP](./Samples/FilterBorderControlNPP)** |
|
||||
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[convolutionSeparable](./Samples/convolutionSeparable)** | **[nbody](./Samples/nbody)** | **[simpleCUBLAS_LU](./Samples/simpleCUBLAS_LU)** |
|
||||
**[simpleSeparateCompilation](./Samples/simpleSeparateCompilation)** | **[c++11_cuda](./Samples/c++11_cuda)** | **[fluidsGL](./Samples/fluidsGL)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[clock_nvrtc](./Samples/clock_nvrtc)** | **[graphMemoryNodes](./Samples/graphMemoryNodes)** | **[cdpQuadtree](./Samples/cdpQuadtree)** | **[interval](./Samples/interval)** |
|
||||
**[boxFilter](./Samples/boxFilter)** | **[matrixMul_nvrtc](./Samples/matrixMul_nvrtc)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cppOverload](./Samples/cppOverload)** |
|
||||
**[marchingCubes](./Samples/marchingCubes)** | **[cuSolverRf](./Samples/cuSolverRf)** | **[BlackScholes_nvrtc](./Samples/BlackScholes_nvrtc)** | **[cdpAdvancedQuicksort](./Samples/cdpAdvancedQuicksort)** |
|
||||
**[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[cdpSimpleQuicksort](./Samples/cdpSimpleQuicksort)** | **[simpleOccupancy](./Samples/simpleOccupancy)** | **[simpleSurfaceWrite](./Samples/simpleSurfaceWrite)** |
|
||||
**[simpleCUFFT_MGPU](./Samples/simpleCUFFT_MGPU)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[convolutionFFT2D](./Samples/convolutionFFT2D)** | **[reduction](./Samples/reduction)** |
|
||||
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[SobelFilter](./Samples/SobelFilter)** | **[dct8x8](./Samples/dct8x8)** | **[fp16ScalarProduct](./Samples/fp16ScalarProduct)** |
|
||||
**[FDTD3d](./Samples/FDTD3d)** | **[oceanFFT](./Samples/oceanFFT)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[MC_SingleAsianOptionP](./Samples/MC_SingleAsianOptionP)** | **[simpleMPI](./Samples/simpleMPI)** |
|
||||
|
||||
## Dependencies
|
||||
|
||||
|
@ -374,5 +447,5 @@ Answers to frequently asked questions about CUDA can be found at http://develope
|
|||
## References
|
||||
|
||||
* [CUDA Programming Guide](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html)
|
||||
* [Accelerated Computing Blog](https://devblogs.nvidia.com/category/accelerated-computing/)
|
||||
* [Accelerated Computing Blog](https://developer.nvidia.com/blog/?tags=accelerated-computing)
|
||||
|
||||
|
|
18
Samples/BlackScholes/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/BlackScholes/.vscode/c_cpp_properties.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
7
Samples/BlackScholes/.vscode/extensions.json
vendored
Normal file
7
Samples/BlackScholes/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
10
Samples/BlackScholes/.vscode/launch.json
vendored
Normal file
10
Samples/BlackScholes/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/BlackScholes"
|
||||
}
|
||||
]
|
||||
}
|
15
Samples/BlackScholes/.vscode/tasks.json
vendored
Normal file
15
Samples/BlackScholes/.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
243
Samples/BlackScholes/BlackScholes.cu
Normal file
243
Samples/BlackScholes/BlackScholes.cu
Normal file
|
@ -0,0 +1,243 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This sample evaluates fair call and put prices for a
|
||||
* given set of European options by Black-Scholes formula.
|
||||
* See supplied whitepaper for more explanations.
|
||||
*/
|
||||
|
||||
#include <helper_functions.h> // helper functions for string parsing
|
||||
#include <helper_cuda.h> // helper functions CUDA error checking and initialization
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options on CPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
|
||||
float *h_StockPrice, float *h_OptionStrike,
|
||||
float *h_OptionYears, float Riskfree,
|
||||
float Volatility, int optN);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of OptN options on GPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#include "BlackScholes_kernel.cuh"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Helper function, returning uniformly distributed
|
||||
// random float in [low, high] range
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
float RandFloat(float low, float high) {
|
||||
float t = (float)rand() / (float)RAND_MAX;
|
||||
return (1.0f - t) * low + t * high;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Data configuration
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
const int OPT_N = 4000000;
|
||||
const int NUM_ITERATIONS = 512;
|
||||
|
||||
const int OPT_SZ = OPT_N * sizeof(float);
|
||||
const float RISKFREE = 0.02f;
|
||||
const float VOLATILITY = 0.30f;
|
||||
|
||||
#define DIV_UP(a, b) (((a) + (b)-1) / (b))
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv) {
|
||||
// Start logs
|
||||
printf("[%s] - Starting...\n", argv[0]);
|
||||
|
||||
//'h_' prefix - CPU (host) memory space
|
||||
float
|
||||
// Results calculated by CPU for reference
|
||||
*h_CallResultCPU,
|
||||
*h_PutResultCPU,
|
||||
// CPU copy of GPU results
|
||||
*h_CallResultGPU, *h_PutResultGPU,
|
||||
// CPU instance of input data
|
||||
*h_StockPrice, *h_OptionStrike, *h_OptionYears;
|
||||
|
||||
//'d_' prefix - GPU (device) memory space
|
||||
float
|
||||
// Results calculated by GPU
|
||||
*d_CallResult,
|
||||
*d_PutResult,
|
||||
// GPU instance of input data
|
||||
*d_StockPrice, *d_OptionStrike, *d_OptionYears;
|
||||
|
||||
double delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;
|
||||
|
||||
StopWatchInterface *hTimer = NULL;
|
||||
int i;
|
||||
|
||||
findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
sdkCreateTimer(&hTimer);
|
||||
|
||||
printf("Initializing data...\n");
|
||||
printf("...allocating CPU memory for options.\n");
|
||||
h_CallResultCPU = (float *)malloc(OPT_SZ);
|
||||
h_PutResultCPU = (float *)malloc(OPT_SZ);
|
||||
h_CallResultGPU = (float *)malloc(OPT_SZ);
|
||||
h_PutResultGPU = (float *)malloc(OPT_SZ);
|
||||
h_StockPrice = (float *)malloc(OPT_SZ);
|
||||
h_OptionStrike = (float *)malloc(OPT_SZ);
|
||||
h_OptionYears = (float *)malloc(OPT_SZ);
|
||||
|
||||
printf("...allocating GPU memory for options.\n");
|
||||
checkCudaErrors(cudaMalloc((void **)&d_CallResult, OPT_SZ));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_PutResult, OPT_SZ));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_StockPrice, OPT_SZ));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_OptionStrike, OPT_SZ));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_OptionYears, OPT_SZ));
|
||||
|
||||
printf("...generating input data in CPU mem.\n");
|
||||
srand(5347);
|
||||
|
||||
// Generate options set
|
||||
for (i = 0; i < OPT_N; i++) {
|
||||
h_CallResultCPU[i] = 0.0f;
|
||||
h_PutResultCPU[i] = -1.0f;
|
||||
h_StockPrice[i] = RandFloat(5.0f, 30.0f);
|
||||
h_OptionStrike[i] = RandFloat(1.0f, 100.0f);
|
||||
h_OptionYears[i] = RandFloat(0.25f, 10.0f);
|
||||
}
|
||||
|
||||
printf("...copying input data to GPU mem.\n");
|
||||
// Copy options data to GPU memory for further processing
|
||||
checkCudaErrors(
|
||||
cudaMemcpy(d_StockPrice, h_StockPrice, OPT_SZ, cudaMemcpyHostToDevice));
|
||||
checkCudaErrors(cudaMemcpy(d_OptionStrike, h_OptionStrike, OPT_SZ,
|
||||
cudaMemcpyHostToDevice));
|
||||
checkCudaErrors(
|
||||
cudaMemcpy(d_OptionYears, h_OptionYears, OPT_SZ, cudaMemcpyHostToDevice));
|
||||
printf("Data init done.\n\n");
|
||||
|
||||
printf("Executing Black-Scholes GPU kernel (%i iterations)...\n",
|
||||
NUM_ITERATIONS);
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
sdkResetTimer(&hTimer);
|
||||
sdkStartTimer(&hTimer);
|
||||
|
||||
for (i = 0; i < NUM_ITERATIONS; i++) {
|
||||
BlackScholesGPU<<<DIV_UP((OPT_N / 2), 128), 128 /*480, 128*/>>>(
|
||||
(float2 *)d_CallResult, (float2 *)d_PutResult, (float2 *)d_StockPrice,
|
||||
(float2 *)d_OptionStrike, (float2 *)d_OptionYears, RISKFREE, VOLATILITY,
|
||||
OPT_N);
|
||||
getLastCudaError("BlackScholesGPU() execution failed\n");
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
sdkStopTimer(&hTimer);
|
||||
gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;
|
||||
|
||||
// Both call and put is calculated
|
||||
printf("Options count : %i \n", 2 * OPT_N);
|
||||
printf("BlackScholesGPU() time : %f msec\n", gpuTime);
|
||||
printf("Effective memory bandwidth: %f GB/s\n",
|
||||
((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
|
||||
printf("Gigaoptions per second : %f \n\n",
|
||||
((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
|
||||
|
||||
printf(
|
||||
"BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u "
|
||||
"options, NumDevsUsed = %u, Workgroup = %u\n",
|
||||
(((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime * 1e-3,
|
||||
(2 * OPT_N), 1, 128);
|
||||
|
||||
printf("\nReading back GPU results...\n");
|
||||
// Read back GPU results to compare them to CPU results
|
||||
checkCudaErrors(cudaMemcpy(h_CallResultGPU, d_CallResult, OPT_SZ,
|
||||
cudaMemcpyDeviceToHost));
|
||||
checkCudaErrors(
|
||||
cudaMemcpy(h_PutResultGPU, d_PutResult, OPT_SZ, cudaMemcpyDeviceToHost));
|
||||
|
||||
printf("Checking the results...\n");
|
||||
printf("...running CPU calculations.\n\n");
|
||||
// Calculate options values on CPU
|
||||
BlackScholesCPU(h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike,
|
||||
h_OptionYears, RISKFREE, VOLATILITY, OPT_N);
|
||||
|
||||
printf("Comparing the results...\n");
|
||||
// Calculate max absolute difference and L1 distance
|
||||
// between CPU and GPU results
|
||||
sum_delta = 0;
|
||||
sum_ref = 0;
|
||||
max_delta = 0;
|
||||
|
||||
for (i = 0; i < OPT_N; i++) {
|
||||
ref = h_CallResultCPU[i];
|
||||
delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
|
||||
|
||||
if (delta > max_delta) {
|
||||
max_delta = delta;
|
||||
}
|
||||
|
||||
sum_delta += delta;
|
||||
sum_ref += fabs(ref);
|
||||
}
|
||||
|
||||
L1norm = sum_delta / sum_ref;
|
||||
printf("L1 norm: %E\n", L1norm);
|
||||
printf("Max absolute error: %E\n\n", max_delta);
|
||||
|
||||
printf("Shutting down...\n");
|
||||
printf("...releasing GPU memory.\n");
|
||||
checkCudaErrors(cudaFree(d_OptionYears));
|
||||
checkCudaErrors(cudaFree(d_OptionStrike));
|
||||
checkCudaErrors(cudaFree(d_StockPrice));
|
||||
checkCudaErrors(cudaFree(d_PutResult));
|
||||
checkCudaErrors(cudaFree(d_CallResult));
|
||||
|
||||
printf("...releasing CPU memory.\n");
|
||||
free(h_OptionYears);
|
||||
free(h_OptionStrike);
|
||||
free(h_StockPrice);
|
||||
free(h_PutResultGPU);
|
||||
free(h_CallResultGPU);
|
||||
free(h_PutResultCPU);
|
||||
free(h_CallResultCPU);
|
||||
sdkDeleteTimer(&hTimer);
|
||||
printf("Shutdown done.\n");
|
||||
|
||||
printf("\n[BlackScholes] - Test Summary\n");
|
||||
|
||||
if (L1norm > 1e-6) {
|
||||
printf("Test failed!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
printf(
|
||||
"\nNOTE: The CUDA Samples are not meant for performance measurements. "
|
||||
"Results may vary when GPU Boost is enabled.\n\n");
|
||||
printf("Test passed\n");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
86
Samples/BlackScholes/BlackScholes_gold.cpp
Normal file
86
Samples/BlackScholes/BlackScholes_gold.cpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Polynomial approximation of cumulative normal distribution function
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static double CND(double d) {
|
||||
const double A1 = 0.31938153;
|
||||
const double A2 = -0.356563782;
|
||||
const double A3 = 1.781477937;
|
||||
const double A4 = -1.821255978;
|
||||
const double A5 = 1.330274429;
|
||||
const double RSQRT2PI = 0.39894228040143267793994605993438;
|
||||
|
||||
double K = 1.0 / (1.0 + 0.2316419 * fabs(d));
|
||||
|
||||
double cnd = RSQRT2PI * exp(-0.5 * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if (d > 0) cnd = 1.0 - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static void BlackScholesBodyCPU(float &callResult, float &putResult,
|
||||
float Sf, // Stock price
|
||||
float Xf, // Option strike
|
||||
float Tf, // Option years
|
||||
float Rf, // Riskless rate
|
||||
float Vf // Volatility rate
|
||||
) {
|
||||
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
|
||||
|
||||
double sqrtT = sqrt(T);
|
||||
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
|
||||
double d2 = d1 - V * sqrtT;
|
||||
double CNDD1 = CND(d1);
|
||||
double CNDD2 = CND(d2);
|
||||
|
||||
// Calculate Call and Put simultaneously
|
||||
double expRT = exp(-R * T);
|
||||
callResult = (float)(S * CNDD1 - X * expRT * CNDD2);
|
||||
putResult = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
|
||||
float *h_StockPrice, float *h_OptionStrike,
|
||||
float *h_OptionYears, float Riskfree,
|
||||
float Volatility, int optN) {
|
||||
for (int opt = 0; opt < optN; opt++)
|
||||
BlackScholesBodyCPU(h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt],
|
||||
h_OptionStrike[opt], h_OptionYears[opt], Riskfree,
|
||||
Volatility);
|
||||
}
|
106
Samples/BlackScholes/BlackScholes_kernel.cuh
Normal file
106
Samples/BlackScholes/BlackScholes_kernel.cuh
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Polynomial approximation of cumulative normal distribution function
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__device__ inline float cndGPU(float d) {
|
||||
const float A1 = 0.31938153f;
|
||||
const float A2 = -0.356563782f;
|
||||
const float A3 = 1.781477937f;
|
||||
const float A4 = -1.821255978f;
|
||||
const float A5 = 1.330274429f;
|
||||
const float RSQRT2PI = 0.39894228040143267793994605993438f;
|
||||
|
||||
float K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
|
||||
|
||||
float cnd = RSQRT2PI * __expf(-0.5f * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if (d > 0) cnd = 1.0f - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__device__ inline void BlackScholesBodyGPU(float &CallResult, float &PutResult,
|
||||
float S, // Stock price
|
||||
float X, // Option strike
|
||||
float T, // Option years
|
||||
float R, // Riskless rate
|
||||
float V // Volatility rate
|
||||
) {
|
||||
float sqrtT, expRT;
|
||||
float d1, d2, CNDD1, CNDD2;
|
||||
|
||||
sqrtT = __fdividef(1.0F, rsqrtf(T));
|
||||
d1 = __fdividef(__logf(S / X) + (R + 0.5f * V * V) * T, V * sqrtT);
|
||||
d2 = d1 - V * sqrtT;
|
||||
|
||||
CNDD1 = cndGPU(d1);
|
||||
CNDD2 = cndGPU(d2);
|
||||
|
||||
// Calculate Call and Put simultaneously
|
||||
expRT = __expf(-R * T);
|
||||
CallResult = S * CNDD1 - X * expRT * CNDD2;
|
||||
PutResult = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options on GPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__launch_bounds__(128) __global__
|
||||
void BlackScholesGPU(float2 *__restrict d_CallResult,
|
||||
float2 *__restrict d_PutResult,
|
||||
float2 *__restrict d_StockPrice,
|
||||
float2 *__restrict d_OptionStrike,
|
||||
float2 *__restrict d_OptionYears, float Riskfree,
|
||||
float Volatility, int optN) {
|
||||
////Thread index
|
||||
// const int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
////Total number of threads in execution grid
|
||||
// const int THREAD_N = blockDim.x * gridDim.x;
|
||||
|
||||
const int opt = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Calculating 2 options per thread to increase ILP (instruction level
|
||||
// parallelism)
|
||||
if (opt < (optN / 2)) {
|
||||
float callResult1, callResult2;
|
||||
float putResult1, putResult2;
|
||||
BlackScholesBodyGPU(callResult1, putResult1, d_StockPrice[opt].x,
|
||||
d_OptionStrike[opt].x, d_OptionYears[opt].x, Riskfree,
|
||||
Volatility);
|
||||
BlackScholesBodyGPU(callResult2, putResult2, d_StockPrice[opt].y,
|
||||
d_OptionStrike[opt].y, d_OptionYears[opt].y, Riskfree,
|
||||
Volatility);
|
||||
d_CallResult[opt] = make_float2(callResult1, callResult2);
|
||||
d_PutResult[opt] = make_float2(putResult1, putResult2);
|
||||
}
|
||||
}
|
20
Samples/BlackScholes/BlackScholes_vs2017.sln
Normal file
20
Samples/BlackScholes/BlackScholes_vs2017.sln
Normal file
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2017
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes", "BlackScholes_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
114
Samples/BlackScholes/BlackScholes_vs2017.vcxproj
Normal file
114
Samples/BlackScholes/BlackScholes_vs2017.vcxproj
Normal file
|
@ -0,0 +1,114 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>BlackScholes_vs2017</RootNamespace>
|
||||
<ProjectName>BlackScholes</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
<AdditionalOptions>--threads 0</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="BlackScholes.cu" />
|
||||
<ClCompile Include="BlackScholes_gold.cpp" />
|
||||
<None Include="BlackScholes_kernel.cuh" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
20
Samples/BlackScholes/BlackScholes_vs2019.sln
Normal file
20
Samples/BlackScholes/BlackScholes_vs2019.sln
Normal file
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2019
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes", "BlackScholes_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
110
Samples/BlackScholes/BlackScholes_vs2019.vcxproj
Normal file
110
Samples/BlackScholes/BlackScholes_vs2019.vcxproj
Normal file
|
@ -0,0 +1,110 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>BlackScholes_vs2019</RootNamespace>
|
||||
<ProjectName>BlackScholes</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/BlackScholes.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
<AdditionalOptions>--threads 0</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="BlackScholes.cu" />
|
||||
<ClCompile Include="BlackScholes_gold.cpp" />
|
||||
<None Include="BlackScholes_kernel.cuh" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
365
Samples/BlackScholes/Makefile
Normal file
365
Samples/BlackScholes/Makefile
Normal file
|
@ -0,0 +1,365 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
#Detect if installed version of GCC supports required C++11
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
empty :=
|
||||
space := $(empty) $(empty)
|
||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
||||
#Create version number without "."
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
||||
# Make sure the version number has at least 3 decimals
|
||||
GCCVERSION += 00
|
||||
# Remove spaces from the version number
|
||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
||||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
|
||||
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += -maxrregcount=16 --threads 0 --std=c++11
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: BlackScholes
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
BlackScholes.o:BlackScholes.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
BlackScholes_gold.o:BlackScholes_gold.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
BlackScholes: BlackScholes.o BlackScholes_gold.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./BlackScholes
|
||||
|
||||
clean:
|
||||
rm -f BlackScholes BlackScholes.o BlackScholes_gold.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/BlackScholes
|
||||
|
||||
clobber: clean
|
71
Samples/BlackScholes/NsightEclipse.xml
Normal file
71
Samples/BlackScholes/NsightEclipse.xml
Normal file
|
@ -0,0 +1,71 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>BlackScholes</name>
|
||||
<cflags>
|
||||
<flag>-maxrregcount=16</flag>
|
||||
</cflags>
|
||||
<description><![CDATA[This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../Common</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">Computational Finance</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>Computational Finance</keyword>
|
||||
<keyword>option pricing</keyword>
|
||||
<keyword>Black-Scholes</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>BlackScholes.cu</primary_file>
|
||||
<scopes>
|
||||
<scope>1:CUDA Basic Topics</scope>
|
||||
<scope>3:Computational Finance</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>macosx</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>Black-Scholes Option Pricing</title>
|
||||
<type>exe</type>
|
||||
<whitepaper>doc\BlackScholes.pdf</whitepaper>
|
||||
</entry>
|
67
Samples/BlackScholes/README.md
Normal file
67
Samples/BlackScholes/README.md
Normal file
|
@ -0,0 +1,67 @@
|
|||
# BlackScholes - Black-Scholes Option Pricing
|
||||
|
||||
## Description
|
||||
|
||||
This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Computational Finance
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux, Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, ppc64le, armv7l
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
BIN
Samples/BlackScholes/doc/BlackScholes.doc
Normal file
BIN
Samples/BlackScholes/doc/BlackScholes.doc
Normal file
Binary file not shown.
BIN
Samples/BlackScholes/doc/BlackScholes.pdf
Normal file
BIN
Samples/BlackScholes/doc/BlackScholes.pdf
Normal file
Binary file not shown.
18
Samples/BlackScholes_nvrtc/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/BlackScholes_nvrtc/.vscode/c_cpp_properties.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
7
Samples/BlackScholes_nvrtc/.vscode/extensions.json
vendored
Normal file
7
Samples/BlackScholes_nvrtc/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
10
Samples/BlackScholes_nvrtc/.vscode/launch.json
vendored
Normal file
10
Samples/BlackScholes_nvrtc/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/BlackScholes_nvrtc"
|
||||
}
|
||||
]
|
||||
}
|
15
Samples/BlackScholes_nvrtc/.vscode/tasks.json
vendored
Normal file
15
Samples/BlackScholes_nvrtc/.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
269
Samples/BlackScholes_nvrtc/BlackScholes.cpp
Normal file
269
Samples/BlackScholes_nvrtc/BlackScholes.cpp
Normal file
|
@ -0,0 +1,269 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This sample evaluates fair call and put prices for a
|
||||
* given set of European options by Black-Scholes formula.
|
||||
* See supplied whitepaper for more explanations.
|
||||
*/
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvrtc_helper.h>
|
||||
|
||||
#include <helper_functions.h> // helper functions for string parsing
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options on CPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
|
||||
float *h_StockPrice, float *h_OptionStrike,
|
||||
float *h_OptionYears, float Riskfree,
|
||||
float Volatility, int optN);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of OptN options on GPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Helper function, returning uniformly distributed
|
||||
// random float in [low, high] range
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
float RandFloat(float low, float high) {
|
||||
float t = (float)rand() / (float)RAND_MAX;
|
||||
return (1.0f - t) * low + t * high;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Data configuration
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const int OPT_N = 4000000;
|
||||
const int NUM_ITERATIONS = 512;
|
||||
const int OPT_SZ = OPT_N * sizeof(float);
|
||||
const float RISKFREE = 0.02f;
|
||||
const float VOLATILITY = 0.30f;
|
||||
|
||||
#define DIV_UP(a, b) (((a) + (b)-1) / (b))
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Start logs
|
||||
printf("[%s] - Starting...\n", argv[0]);
|
||||
|
||||
//'h_' prefix - CPU (host) memory space
|
||||
float
|
||||
// Results calculated by CPU for reference
|
||||
*h_CallResultCPU,
|
||||
*h_PutResultCPU,
|
||||
// CPU copy of GPU results
|
||||
*h_CallResultGPU, *h_PutResultGPU,
|
||||
// CPU instance of input data
|
||||
*h_StockPrice, *h_OptionStrike, *h_OptionYears;
|
||||
|
||||
//'d_' prefix - GPU (device) memory space
|
||||
CUdeviceptr
|
||||
// Results calculated by GPU
|
||||
d_CallResult,
|
||||
d_PutResult,
|
||||
|
||||
// GPU instance of input data
|
||||
d_StockPrice, d_OptionStrike, d_OptionYears;
|
||||
|
||||
double delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;
|
||||
|
||||
StopWatchInterface *hTimer = NULL;
|
||||
int i;
|
||||
|
||||
sdkCreateTimer(&hTimer);
|
||||
|
||||
printf("Initializing data...\n");
|
||||
printf("...allocating CPU memory for options.\n");
|
||||
|
||||
h_CallResultCPU = (float *)malloc(OPT_SZ);
|
||||
h_PutResultCPU = (float *)malloc(OPT_SZ);
|
||||
h_CallResultGPU = (float *)malloc(OPT_SZ);
|
||||
h_PutResultGPU = (float *)malloc(OPT_SZ);
|
||||
h_StockPrice = (float *)malloc(OPT_SZ);
|
||||
h_OptionStrike = (float *)malloc(OPT_SZ);
|
||||
h_OptionYears = (float *)malloc(OPT_SZ);
|
||||
|
||||
char *cubin, *kernel_file;
|
||||
size_t cubinSize;
|
||||
kernel_file = sdkFindFilePath("BlackScholes_kernel.cuh", argv[0]);
|
||||
|
||||
// Compile the kernel BlackScholes_kernel.
|
||||
compileFileToCUBIN(kernel_file, argc, argv, &cubin, &cubinSize, 0);
|
||||
CUmodule module = loadCUBIN(cubin, argc, argv);
|
||||
|
||||
CUfunction kernel_addr;
|
||||
checkCudaErrors(cuModuleGetFunction(&kernel_addr, module, "BlackScholesGPU"));
|
||||
|
||||
printf("...allocating GPU memory for options.\n");
|
||||
checkCudaErrors(cuMemAlloc(&d_CallResult, OPT_SZ));
|
||||
checkCudaErrors(cuMemAlloc(&d_PutResult, OPT_SZ));
|
||||
checkCudaErrors(cuMemAlloc(&d_StockPrice, OPT_SZ));
|
||||
checkCudaErrors(cuMemAlloc(&d_OptionStrike, OPT_SZ));
|
||||
checkCudaErrors(cuMemAlloc(&d_OptionYears, OPT_SZ));
|
||||
|
||||
printf("...generating input data in CPU mem.\n");
|
||||
srand(5347);
|
||||
|
||||
// Generate options set
|
||||
for (i = 0; i < OPT_N; i++) {
|
||||
h_CallResultCPU[i] = 0.0f;
|
||||
h_PutResultCPU[i] = -1.0f;
|
||||
h_StockPrice[i] = RandFloat(5.0f, 30.0f);
|
||||
h_OptionStrike[i] = RandFloat(1.0f, 100.0f);
|
||||
h_OptionYears[i] = RandFloat(0.25f, 10.0f);
|
||||
}
|
||||
|
||||
printf("...copying input data to GPU mem.\n");
|
||||
// Copy options data to GPU memory for further processing
|
||||
checkCudaErrors(cuMemcpyHtoD(d_StockPrice, h_StockPrice, OPT_SZ));
|
||||
checkCudaErrors(cuMemcpyHtoD(d_OptionStrike, h_OptionStrike, OPT_SZ));
|
||||
checkCudaErrors(cuMemcpyHtoD(d_OptionYears, h_OptionYears, OPT_SZ));
|
||||
|
||||
printf("Data init done.\n\n");
|
||||
printf("Executing Black-Scholes GPU kernel (%i iterations)...\n",
|
||||
NUM_ITERATIONS);
|
||||
|
||||
sdkResetTimer(&hTimer);
|
||||
sdkStartTimer(&hTimer);
|
||||
|
||||
dim3 cudaBlockSize(128, 1, 1);
|
||||
dim3 cudaGridSize(DIV_UP(OPT_N / 2, 128), 1, 1);
|
||||
|
||||
float risk = RISKFREE;
|
||||
float volatility = VOLATILITY;
|
||||
int optval = OPT_N;
|
||||
|
||||
void *arr[] = {(void *)&d_CallResult, (void *)&d_PutResult,
|
||||
(void *)&d_StockPrice, (void *)&d_OptionStrike,
|
||||
(void *)&d_OptionYears, (void *)&risk,
|
||||
(void *)&volatility, (void *)&optval};
|
||||
|
||||
for (i = 0; i < NUM_ITERATIONS; i++) {
|
||||
checkCudaErrors(cuLaunchKernel(kernel_addr, cudaGridSize.x, cudaGridSize.y,
|
||||
cudaGridSize.z, /* grid dim */
|
||||
cudaBlockSize.x, cudaBlockSize.y,
|
||||
cudaBlockSize.z, /* block dim */
|
||||
0, 0, /* shared mem, stream */
|
||||
&arr[0], /* arguments */
|
||||
0));
|
||||
}
|
||||
|
||||
checkCudaErrors(cuCtxSynchronize());
|
||||
|
||||
sdkStopTimer(&hTimer);
|
||||
gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;
|
||||
|
||||
// Both call and put is calculated
|
||||
printf("Options count : %i \n", 2 * OPT_N);
|
||||
printf("BlackScholesGPU() time : %f msec\n", gpuTime);
|
||||
printf("Effective memory bandwidth: %f GB/s\n",
|
||||
((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
|
||||
printf("Gigaoptions per second : %f \n\n",
|
||||
((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
|
||||
printf(
|
||||
"BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u "
|
||||
"options, NumDevsUsed = %u, Workgroup = %u\n",
|
||||
(((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime * 1e-3,
|
||||
(2 * OPT_N), 1, 128);
|
||||
|
||||
printf("\nReading back GPU results...\n");
|
||||
|
||||
// Read back GPU results to compare them to CPU results
|
||||
checkCudaErrors(cuMemcpyDtoH(h_CallResultGPU, d_CallResult, OPT_SZ));
|
||||
checkCudaErrors(cuMemcpyDtoH(h_PutResultGPU, d_PutResult, OPT_SZ));
|
||||
|
||||
printf("Checking the results...\n");
|
||||
printf("...running CPU calculations.\n\n");
|
||||
|
||||
// Calculate options values on CPU
|
||||
BlackScholesCPU(h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike,
|
||||
h_OptionYears, RISKFREE, VOLATILITY, OPT_N);
|
||||
|
||||
printf("Comparing the results...\n");
|
||||
// Calculate max absolute difference and L1 distance
|
||||
// between CPU and GPU results
|
||||
sum_delta = 0;
|
||||
sum_ref = 0;
|
||||
max_delta = 0;
|
||||
|
||||
for (i = 0; i < OPT_N; i++) {
|
||||
ref = h_CallResultCPU[i];
|
||||
delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
|
||||
|
||||
if (delta > max_delta) {
|
||||
max_delta = delta;
|
||||
}
|
||||
|
||||
sum_delta += delta;
|
||||
sum_ref += fabs(ref);
|
||||
}
|
||||
|
||||
L1norm = sum_delta / sum_ref;
|
||||
printf("L1 norm: %E\n", L1norm);
|
||||
printf("Max absolute error: %E\n\n", max_delta);
|
||||
|
||||
printf("Shutting down...\n");
|
||||
printf("...releasing GPU memory.\n");
|
||||
|
||||
checkCudaErrors(cuMemFree(d_OptionYears));
|
||||
checkCudaErrors(cuMemFree(d_OptionStrike));
|
||||
checkCudaErrors(cuMemFree(d_StockPrice));
|
||||
checkCudaErrors(cuMemFree(d_PutResult));
|
||||
checkCudaErrors(cuMemFree(d_CallResult));
|
||||
|
||||
printf("...releasing CPU memory.\n");
|
||||
|
||||
free(h_OptionYears);
|
||||
free(h_OptionStrike);
|
||||
free(h_StockPrice);
|
||||
free(h_PutResultGPU);
|
||||
free(h_CallResultGPU);
|
||||
free(h_PutResultCPU);
|
||||
free(h_CallResultCPU);
|
||||
|
||||
sdkDeleteTimer(&hTimer);
|
||||
printf("Shutdown done.\n");
|
||||
|
||||
printf("\n[%s] - Test Summary\n", argv[0]);
|
||||
|
||||
if (L1norm > 1e-6) {
|
||||
printf("Test failed!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
printf("Test passed\n");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
88
Samples/BlackScholes_nvrtc/BlackScholes_gold.cpp
Normal file
88
Samples/BlackScholes_nvrtc/BlackScholes_gold.cpp
Normal file
|
@ -0,0 +1,88 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Polynomial approximation of cumulative normal distribution function
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static double CND(double d) {
|
||||
const double A1 = 0.31938153;
|
||||
const double A2 = -0.356563782;
|
||||
const double A3 = 1.781477937;
|
||||
const double A4 = -1.821255978;
|
||||
const double A5 = 1.330274429;
|
||||
const double RSQRT2PI = 0.39894228040143267793994605993438;
|
||||
|
||||
double K = 1.0 / (1.0 + 0.2316419 * fabs(d));
|
||||
|
||||
double cnd = RSQRT2PI * exp(-0.5 * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if (d > 0) cnd = 1.0 - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static void BlackScholesBodyCPU(float &callResult, float &putResult,
|
||||
float Sf, // Stock price
|
||||
float Xf, // Option strike
|
||||
float Tf, // Option years
|
||||
float Rf, // Riskless rate
|
||||
float Vf // Volatility rate
|
||||
) {
|
||||
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
|
||||
double sqrtT = sqrt(T);
|
||||
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
|
||||
double d2 = d1 - V * sqrtT;
|
||||
double CNDD1 = CND(d1);
|
||||
double CNDD2 = CND(d2);
|
||||
|
||||
// Calculate Call and Put simultaneously
|
||||
double expRT = exp(-R * T);
|
||||
|
||||
callResult = (float)(S * CNDD1 - X * expRT * CNDD2);
|
||||
putResult = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
|
||||
float *h_StockPrice, float *h_OptionStrike,
|
||||
float *h_OptionYears, float Riskfree,
|
||||
float Volatility, int optN) {
|
||||
for (int opt = 0; opt < optN; opt++)
|
||||
BlackScholesBodyCPU(h_CallResult[opt], h_PutResult[opt], h_StockPrice[opt],
|
||||
h_OptionStrike[opt], h_OptionYears[opt], Riskfree,
|
||||
Volatility);
|
||||
}
|
103
Samples/BlackScholes_nvrtc/BlackScholes_kernel.cuh
Normal file
103
Samples/BlackScholes_nvrtc/BlackScholes_kernel.cuh
Normal file
|
@ -0,0 +1,103 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Polynomial approximation of cumulative normal distribution function
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__device__ inline float cndGPU(float d) {
|
||||
const float A1 = 0.31938153f;
|
||||
const float A2 = -0.356563782f;
|
||||
const float A3 = 1.781477937f;
|
||||
const float A4 = -1.821255978f;
|
||||
const float A5 = 1.330274429f;
|
||||
const float RSQRT2PI = 0.39894228040143267793994605993438f;
|
||||
|
||||
float K = __fdividef(1.0f, (1.0f + 0.2316419f * fabsf(d)));
|
||||
|
||||
float cnd = RSQRT2PI * __expf(-0.5f * d * d) *
|
||||
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
|
||||
|
||||
if (d > 0) cnd = 1.0f - cnd;
|
||||
|
||||
return cnd;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Black-Scholes formula for both call and put
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
__device__ inline void BlackScholesBodyGPU(float &CallResult, float &PutResult,
|
||||
float S, // Stock price
|
||||
float X, // Option strike
|
||||
float T, // Option years
|
||||
float R, // Riskless rate
|
||||
float V // Volatility rate
|
||||
) {
|
||||
float sqrtT, expRT;
|
||||
float d1, d2, CNDD1, CNDD2;
|
||||
|
||||
sqrtT = __fdividef(1.0F, rsqrtf(T));
|
||||
d1 = __fdividef(__logf(S / X) + (R + 0.5f * V * V) * T, V * sqrtT);
|
||||
d2 = d1 - V * sqrtT;
|
||||
|
||||
CNDD1 = cndGPU(d1);
|
||||
CNDD2 = cndGPU(d2);
|
||||
|
||||
// Calculate Call and Put simultaneously
|
||||
expRT = __expf(-R * T);
|
||||
CallResult = S * CNDD1 - X * expRT * CNDD2;
|
||||
PutResult = X * expRT * (1.0f - CNDD2) - S * (1.0f - CNDD1);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Process an array of optN options on GPU
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern "C" __launch_bounds__(128) __global__
|
||||
void BlackScholesGPU(float2 *__restrict d_CallResult,
|
||||
float2 *__restrict d_PutResult,
|
||||
float2 *__restrict d_StockPrice,
|
||||
float2 *__restrict d_OptionStrike,
|
||||
float2 *__restrict d_OptionYears, float Riskfree,
|
||||
float Volatility, int optN) {
|
||||
////Thread index
|
||||
const int opt = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Calculating 2 options per thread to increase ILP (instruction level
|
||||
// parallelism)
|
||||
if (opt < (optN / 2)) {
|
||||
float callResult1, callResult2;
|
||||
float putResult1, putResult2;
|
||||
BlackScholesBodyGPU(callResult1, putResult1, d_StockPrice[opt].x,
|
||||
d_OptionStrike[opt].x, d_OptionYears[opt].x, Riskfree,
|
||||
Volatility);
|
||||
BlackScholesBodyGPU(callResult2, putResult2, d_StockPrice[opt].y,
|
||||
d_OptionStrike[opt].y, d_OptionYears[opt].y, Riskfree,
|
||||
Volatility);
|
||||
d_CallResult[opt] = make_float2(callResult1, callResult2);
|
||||
d_PutResult[opt] = make_float2(putResult1, putResult2);
|
||||
}
|
||||
}
|
20
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.sln
Normal file
20
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.sln
Normal file
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2017
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes_nvrtc", "BlackScholes_nvrtc_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
114
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
Normal file
114
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2017.vcxproj
Normal file
|
@ -0,0 +1,114 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>BlackScholes_nvrtc_vs2017</RootNamespace>
|
||||
<ProjectName>BlackScholes_nvrtc</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/BlackScholes_nvrtc.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration></CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
<AdditionalOptions>--threads 0</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="BlackScholes.cpp" />
|
||||
<ClCompile Include="BlackScholes_gold.cpp" />
|
||||
<None Include="BlackScholes_kernel.cuh" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
20
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.sln
Normal file
20
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.sln
Normal file
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2019
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BlackScholes_nvrtc", "BlackScholes_nvrtc_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
110
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
Normal file
110
Samples/BlackScholes_nvrtc/BlackScholes_nvrtc_vs2019.vcxproj
Normal file
|
@ -0,0 +1,110 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>BlackScholes_nvrtc_vs2019</RootNamespace>
|
||||
<ProjectName>BlackScholes_nvrtc</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);$(CUDA_PATH)/include;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cuda.lib;nvrtc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/BlackScholes_nvrtc.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration></CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
<AdditionalOptions>--threads 0</AdditionalOptions>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="BlackScholes.cpp" />
|
||||
<ClCompile Include="BlackScholes_gold.cpp" />
|
||||
<None Include="BlackScholes_kernel.cuh" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.5.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
422
Samples/BlackScholes_nvrtc/Makefile
Normal file
422
Samples/BlackScholes_nvrtc/Makefile
Normal file
|
@ -0,0 +1,422 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
CCFLAGS += -g
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on ARMv7
|
||||
ifeq ($(TARGET_ARCH),armv7l)
|
||||
$(info >>> WARNING - BlackScholes_nvrtc is not supported on ARMv7 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on QNX
|
||||
ifeq ($(TARGET_OS),qnx)
|
||||
$(info >>> WARNING - BlackScholes_nvrtc is not supported on QNX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# libNVRTC specific libraries
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -L$(CUDA_PATH)/lib -F/Library/Frameworks -framework CUDA
|
||||
endif
|
||||
|
||||
#Detect if installed version of GCC supports required C++11
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
empty :=
|
||||
space := $(empty) $(empty)
|
||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
||||
#Create version number without "."
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
||||
# Make sure the version number has at least 3 decimals
|
||||
GCCVERSION += 00
|
||||
# Remove spaces from the version number
|
||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
||||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
|
||||
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
||||
else
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
||||
ifdef TARGET_OVERRIDE
|
||||
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(HOST_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
|
||||
endif
|
||||
|
||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
||||
ifeq ("$(CUDALIB)","")
|
||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
else
|
||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
||||
|
||||
INCLUDES += -I$(CUDA_PATH)/include
|
||||
|
||||
LIBRARIES += -lnvrtc
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: BlackScholes_nvrtc
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
BlackScholes.o:BlackScholes.cpp
|
||||
$(EXEC) $(HOST_COMPILER) $(INCLUDES) $(CCFLAGS) $(EXTRA_CCFLAGS) -o $@ -c $<
|
||||
|
||||
BlackScholes_gold.o:BlackScholes_gold.cpp
|
||||
$(EXEC) $(HOST_COMPILER) $(INCLUDES) $(CCFLAGS) $(EXTRA_CCFLAGS) -o $@ -c $<
|
||||
|
||||
BlackScholes_nvrtc: BlackScholes.o BlackScholes_gold.o
|
||||
$(EXEC) $(HOST_COMPILER) $(LDFLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./BlackScholes_nvrtc
|
||||
|
||||
clean:
|
||||
rm -f BlackScholes_nvrtc BlackScholes.o BlackScholes_gold.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/BlackScholes_nvrtc
|
||||
|
||||
clobber: clean
|
71
Samples/BlackScholes_nvrtc/README.md
Normal file
71
Samples/BlackScholes_nvrtc/README.md
Normal file
|
@ -0,0 +1,71 @@
|
|||
# BlackScholes_nvrtc - Black-Scholes Option Pricing with libNVRTC
|
||||
|
||||
## Description
|
||||
|
||||
This sample evaluates fair call and put prices for a given set of European options by Black-Scholes formula, compiling the CUDA kernels involved at runtime using NVRTC.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Computational Finance, Runtime Compilation
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux, Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, ppc64le, aarch64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[NVRTC](../../README.md#nvrtc)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, aarch64.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=aarch64` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
18
Samples/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/EGLStream_CUDA_CrossGPU/.vscode/c_cpp_properties.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
7
Samples/EGLStream_CUDA_CrossGPU/.vscode/extensions.json
vendored
Normal file
7
Samples/EGLStream_CUDA_CrossGPU/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
10
Samples/EGLStream_CUDA_CrossGPU/.vscode/launch.json
vendored
Normal file
10
Samples/EGLStream_CUDA_CrossGPU/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/EGLStream_CUDA_CrossGPU"
|
||||
}
|
||||
]
|
||||
}
|
15
Samples/EGLStream_CUDA_CrossGPU/.vscode/tasks.json
vendored
Normal file
15
Samples/EGLStream_CUDA_CrossGPU/.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
453
Samples/EGLStream_CUDA_CrossGPU/Makefile
Normal file
453
Samples/EGLStream_CUDA_CrossGPU/Makefile
Normal file
|
@ -0,0 +1,453 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - EGLStream_CUDA_CrossGPU is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on android
|
||||
ifeq ($(TARGET_OS),android)
|
||||
$(info >>> WARNING - EGLStream_CUDA_CrossGPU is not supported on android - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Makefile include to help find EGL Libraries
|
||||
include ./findegl.mk
|
||||
|
||||
# EGL specific libraries
|
||||
ifneq ($(TARGET_OS),darwin)
|
||||
LIBRARIES += -lEGL
|
||||
endif
|
||||
|
||||
#Detect if installed version of GCC supports required C++11
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
empty :=
|
||||
space := $(empty) $(empty)
|
||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
||||
#Create version number without "."
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
||||
# Make sure the version number has at least 3 decimals
|
||||
GCCVERSION += 00
|
||||
# Remove spaces from the version number
|
||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
||||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
|
||||
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
||||
else
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
||||
ifdef TARGET_OVERRIDE
|
||||
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(HOST_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
|
||||
endif
|
||||
|
||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
||||
ifeq ("$(CUDALIB)","")
|
||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
else
|
||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: EGLStream_CUDA_CrossGPU
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
cuda_consumer.o:cuda_consumer.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
cuda_producer.o:cuda_producer.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
eglstrm_common.o:eglstrm_common.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
kernel.o:kernel.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
main.o:main.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
EGLStream_CUDA_CrossGPU: cuda_consumer.o cuda_producer.o eglstrm_common.o kernel.o main.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./EGLStream_CUDA_CrossGPU
|
||||
|
||||
clean:
|
||||
rm -f EGLStream_CUDA_CrossGPU cuda_consumer.o cuda_producer.o eglstrm_common.o kernel.o main.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/EGLStream_CUDA_CrossGPU
|
||||
|
||||
clobber: clean
|
78
Samples/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
Normal file
78
Samples/EGLStream_CUDA_CrossGPU/NsightEclipse.xml
Normal file
|
@ -0,0 +1,78 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>EGLStream_CUDA_CrossGPU</name>
|
||||
<cuda_api_list>
|
||||
<driver>cuDeviceGet</driver>
|
||||
<driver>cuDeviceGetAttribute</driver>
|
||||
<driver>cuDeviceComputeCapability</driver>
|
||||
<driver>cuDeviceGetCount</driver>
|
||||
<driver>cuDeviceGetName</driver>
|
||||
<driver>cuGraphicsResourceGetMappedEglFrame</driver>
|
||||
<driver>cuEGLStreamConsumerAcquireFrame</driver>
|
||||
<driver>cuEGLStreamConsumerReleaseFrame</driver>
|
||||
<driver>cuEGLStreamProducerReturnFrame</driver>
|
||||
<driver>cuEGLStreamProducerPresentFrame</driver>
|
||||
<driver>cuCtxCreate</driver>
|
||||
<driver>cuMemAlloc</driver>
|
||||
<driver>cuMemFree</driver>
|
||||
<driver>cuMemcpy3D</driver>
|
||||
<driver>cuStreamCreate</driver>
|
||||
<driver>cuCtxPushCurrent</driver>
|
||||
<driver>cuCtxPopCurrent</driver>
|
||||
<driver>cuCtxDestroy</driver>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../Common</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">EGLStreams Interop</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>EGL Streams</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library os="linux">cuda</library>
|
||||
<library framework="true" os="macosx">CUDA</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>main.cpp</primary_file>
|
||||
<required_dependencies>
|
||||
<dependency>EGL</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Advanced Topics</scope>
|
||||
<scope>2:Graphics Interop</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>EGLStream_CUDA_CrossGPU</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
64
Samples/EGLStream_CUDA_CrossGPU/README.md
Normal file
64
Samples/EGLStream_CUDA_CrossGPU/README.md
Normal file
|
@ -0,0 +1,64 @@
|
|||
# EGLStream_CUDA_CrossGPU - EGLStream_CUDA_CrossGPU
|
||||
|
||||
## Description
|
||||
|
||||
Demonstrates CUDA and EGL Streams interop, where consumer's EGL Stream is on one GPU and producer's on other and both consumer-producer are different processes.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
EGLStreams Interop
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, armv7l
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Driver API](http://docs.nvidia.com/cuda/cuda-driver-api/index.html)
|
||||
cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount, cuDeviceGetName, cuGraphicsResourceGetMappedEglFrame, cuEGLStreamConsumerAcquireFrame, cuEGLStreamConsumerReleaseFrame, cuEGLStreamProducerReturnFrame, cuEGLStreamProducerPresentFrame, cuCtxCreate, cuMemAlloc, cuMemFree, cuMemcpy3D, cuStreamCreate, cuCtxPushCurrent, cuCtxPopCurrent, cuCtxDestroy
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[EGL](../../README.md#egl)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, armv7l.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
258
Samples/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp
Normal file
258
Samples/EGLStream_CUDA_CrossGPU/cuda_consumer.cpp
Normal file
|
@ -0,0 +1,258 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Simple CUDA consumer rendering sample app
|
||||
//
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "cuda_consumer.h"
|
||||
#include "eglstrm_common.h"
|
||||
#include <math.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#if defined(EXTENSION_LIST)
|
||||
EXTENSION_LIST(EXTLST_EXTERN)
|
||||
#endif
|
||||
CUgraphicsResource cudaResource;
|
||||
|
||||
static int count_acq = 0;
|
||||
static double acquire_time[25000] = {0}, total_time_acq = 0;
|
||||
|
||||
static int count_rel = 0;
|
||||
static double rel_time[25000] = {0}, total_time_rel = 0;
|
||||
|
||||
void acquireApiStat(void);
|
||||
void acquireApiStat(void) {
|
||||
int i = 0;
|
||||
double min = 10000000, max = 0;
|
||||
double average_launch_time = 0, standard_deviation = 0;
|
||||
if (count_acq == 0) return;
|
||||
// lets compute the standard deviation
|
||||
min = max = acquire_time[1];
|
||||
average_launch_time = (total_time_acq - acquire_time[0]) / count_acq;
|
||||
for (i = 1; i < count_acq; i++) {
|
||||
standard_deviation += (acquire_time[i] - average_launch_time) *
|
||||
(acquire_time[i] - average_launch_time);
|
||||
if (acquire_time[i] < min) min = acquire_time[i];
|
||||
if (acquire_time[i] > max) max = acquire_time[i];
|
||||
}
|
||||
standard_deviation = sqrt(standard_deviation / count_acq);
|
||||
printf("acquire Avg: %lf\n", average_launch_time);
|
||||
printf("acquire SD: %lf\n", standard_deviation);
|
||||
printf("acquire min: %lf\n", min);
|
||||
printf("acquire max: %lf\n", max);
|
||||
|
||||
min = max = rel_time[1];
|
||||
average_launch_time = (total_time_rel - rel_time[0]) / count_rel;
|
||||
for (i = 1; i < count_rel; i++) {
|
||||
standard_deviation += (rel_time[i] - average_launch_time) *
|
||||
(rel_time[i] - average_launch_time);
|
||||
if (rel_time[i] < min) min = rel_time[i];
|
||||
if (rel_time[i] > max) max = rel_time[i];
|
||||
}
|
||||
standard_deviation = sqrt(standard_deviation / count_rel);
|
||||
printf("release Avg: %lf\n", average_launch_time);
|
||||
printf("release SD: %lf\n", standard_deviation);
|
||||
printf("release min: %lf\n", min);
|
||||
printf("release max: %lf\n", max);
|
||||
}
|
||||
CUresult cudaConsumerAcquireFrame(test_cuda_consumer_s *cudaConsumer,
|
||||
int frameNumber) {
|
||||
CUresult cuStatus = CUDA_SUCCESS;
|
||||
CUeglFrame cudaEgl;
|
||||
struct timespec start, end;
|
||||
EGLint streamState = 0;
|
||||
double curTime;
|
||||
|
||||
if (!cudaConsumer) {
|
||||
printf("%s: Bad parameter\n", __func__);
|
||||
goto done;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
if (!eglQueryStreamKHR(cudaConsumer->eglDisplay, cudaConsumer->eglStream,
|
||||
EGL_STREAM_STATE_KHR, &streamState)) {
|
||||
printf("Cuda Consumer: eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
|
||||
cuStatus = CUDA_ERROR_UNKNOWN;
|
||||
goto done;
|
||||
}
|
||||
if (streamState == EGL_STREAM_STATE_DISCONNECTED_KHR) {
|
||||
printf("Cuda Consumer: EGL_STREAM_STATE_DISCONNECTED_KHR received\n");
|
||||
cuStatus = CUDA_ERROR_UNKNOWN;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (streamState == EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (cudaConsumer->profileAPI) {
|
||||
getTime(&start);
|
||||
}
|
||||
cuStatus =
|
||||
cuEGLStreamConsumerAcquireFrame(&(cudaConsumer->cudaConn), &cudaResource,
|
||||
&cudaConsumer->consCudaStream, 16000);
|
||||
if (cudaConsumer->profileAPI) {
|
||||
getTime(&end);
|
||||
curTime = TIME_DIFF(end, start);
|
||||
acquire_time[count_acq++] = curTime;
|
||||
if (count_acq == 25000) count_acq = 0;
|
||||
total_time_acq += curTime;
|
||||
}
|
||||
if (cuStatus == CUDA_SUCCESS) {
|
||||
CUdeviceptr pDevPtr = 0;
|
||||
cudaError_t err;
|
||||
|
||||
cuStatus =
|
||||
cuGraphicsResourceGetMappedEglFrame(&cudaEgl, cudaResource, 0, 0);
|
||||
if (cuStatus != CUDA_SUCCESS) {
|
||||
printf("Cuda get resource failed with %d\n", cuStatus);
|
||||
goto done;
|
||||
}
|
||||
pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
|
||||
|
||||
err = cudaConsumer_filter(cudaConsumer->consCudaStream, (char *)pDevPtr,
|
||||
WIDTH * 4, HEIGHT, PROD_DATA + frameNumber,
|
||||
CONS_DATA + frameNumber, frameNumber);
|
||||
if (err != cudaSuccess) {
|
||||
printf("Cuda Consumer: kernel failed with: %s\n",
|
||||
cudaGetErrorString(err));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
return cuStatus;
|
||||
}
|
||||
|
||||
CUresult cudaConsumerReleaseFrame(test_cuda_consumer_s *cudaConsumer,
|
||||
int frameNumber) {
|
||||
CUresult cuStatus = CUDA_SUCCESS;
|
||||
struct timespec start, end;
|
||||
double curTime;
|
||||
|
||||
if (!cudaConsumer) {
|
||||
printf("%s: Bad parameter\n", __func__);
|
||||
goto done;
|
||||
}
|
||||
if (cudaConsumer->profileAPI) {
|
||||
getTime(&start);
|
||||
}
|
||||
cuStatus = cuEGLStreamConsumerReleaseFrame(
|
||||
&cudaConsumer->cudaConn, cudaResource, &cudaConsumer->consCudaStream);
|
||||
if (cudaConsumer->profileAPI) {
|
||||
getTime(&end);
|
||||
curTime = TIME_DIFF(end, start);
|
||||
rel_time[count_rel++] = curTime;
|
||||
if (count_rel == 25000) count_rel = 0;
|
||||
total_time_rel += curTime;
|
||||
}
|
||||
if (cuStatus != CUDA_SUCCESS) {
|
||||
printf("cuEGLStreamConsumerReleaseFrame failed, status:%d\n", cuStatus);
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
return cuStatus;
|
||||
}
|
||||
|
||||
CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer) {
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuInit(0))) {
|
||||
printf("Failed to initialize CUDA\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS !=
|
||||
(status = cuDeviceGet(&device, cudaConsumer->cudaDevId))) {
|
||||
printf("failed to get CUDA device\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS !=
|
||||
(status = cuCtxCreate(&cudaConsumer->context, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
int major = 0, minor = 0;
|
||||
char deviceName[256];
|
||||
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
|
||||
device);
|
||||
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
|
||||
device);
|
||||
cuDeviceGetName(deviceName, 256, device);
|
||||
printf(
|
||||
"CUDA Consumer on GPU Device %d: \"%s\" with compute capability "
|
||||
"%d.%d\n\n",
|
||||
device, deviceName, major, minor);
|
||||
|
||||
cuCtxPopCurrent(&cudaConsumer->context);
|
||||
if (major < 6) {
|
||||
printf(
|
||||
"EGLStream_CUDA_CrossGPU requires SM 6.0 or higher arch GPU. "
|
||||
"Exiting...\n");
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
CUresult cuda_consumer_init(test_cuda_consumer_s *cudaConsumer,
|
||||
TestArgs *args) {
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
int bufferSize;
|
||||
|
||||
cudaConsumer->charCnt = args->charCnt;
|
||||
bufferSize = args->charCnt;
|
||||
|
||||
cudaConsumer->pCudaCopyMem = (unsigned char *)malloc(bufferSize);
|
||||
if (cudaConsumer->pCudaCopyMem == NULL) {
|
||||
printf("Cuda Consumer: malloc failed\n");
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = cuStreamCreate(&cudaConsumer->consCudaStream, 0);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer: cuStreamCreate failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
|
||||
atexit(acquireApiStat);
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
CUresult cuda_consumer_Deinit(test_cuda_consumer_s *cudaConsumer) {
|
||||
if (cudaConsumer->pCudaCopyMem) {
|
||||
free(cudaConsumer->pCudaCopyMem);
|
||||
}
|
||||
return cuEGLStreamConsumerDisconnect(&cudaConsumer->cudaConn);
|
||||
}
|
66
Samples/EGLStream_CUDA_CrossGPU/cuda_consumer.h
Normal file
66
Samples/EGLStream_CUDA_CrossGPU/cuda_consumer.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: CUDA consumer header file
|
||||
//
|
||||
|
||||
#ifndef _CUDA_CONSUMER_H_
|
||||
#define _CUDA_CONSUMER_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "cudaEGL.h"
|
||||
#include "eglstrm_common.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
|
||||
typedef struct _test_cuda_consumer_s {
|
||||
CUcontext context;
|
||||
CUeglStreamConnection cudaConn;
|
||||
int cudaDevId;
|
||||
EGLDisplay eglDisplay;
|
||||
EGLStreamKHR eglStream;
|
||||
unsigned int charCnt;
|
||||
char *cudaBuf;
|
||||
bool profileAPI;
|
||||
unsigned char *pCudaCopyMem;
|
||||
CUstream consCudaStream;
|
||||
} test_cuda_consumer_s;
|
||||
|
||||
CUresult cuda_consumer_init(test_cuda_consumer_s *cudaConsumer, TestArgs *args);
|
||||
CUresult cuda_consumer_Deinit(test_cuda_consumer_s *cudaConsumer);
|
||||
CUresult cudaConsumerAcquireFrame(test_cuda_consumer_s *data, int frameNumber);
|
||||
CUresult cudaConsumerReleaseFrame(test_cuda_consumer_s *data, int frameNumber);
|
||||
CUresult cudaDeviceCreateConsumer(test_cuda_consumer_s *cudaConsumer);
|
||||
cudaError_t cudaConsumer_filter(CUstream cStream, char *pSrc, int width,
|
||||
int height, char expectedVal, char newVal,
|
||||
int frameNumber);
|
||||
cudaError_t cudaGetValueMismatch(void);
|
||||
|
||||
#endif
|
288
Samples/EGLStream_CUDA_CrossGPU/cuda_producer.cpp
Normal file
288
Samples/EGLStream_CUDA_CrossGPU/cuda_producer.cpp
Normal file
|
@ -0,0 +1,288 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Simple cuda EGL stream producer app
|
||||
//
|
||||
|
||||
#include "cudaEGL.h"
|
||||
#include "cuda_producer.h"
|
||||
#include "eglstrm_common.h"
|
||||
#include <cuda_runtime.h>
|
||||
#if defined(EXTENSION_LIST)
|
||||
EXTENSION_LIST(EXTLST_EXTERN)
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <string.h>
|
||||
#include "cuda_runtime.h"
|
||||
#include "math.h"
|
||||
|
||||
int cudaPresentReturnData = INIT_DATA;
|
||||
int fakePresent = 0;
|
||||
CUeglFrame fakeFrame;
|
||||
CUdeviceptr cudaPtrFake;
|
||||
extern bool isCrossDevice;
|
||||
|
||||
void cudaProducerPrepareFrame(CUeglFrame *cudaEgl, CUdeviceptr cudaPtr,
|
||||
int bufferSize) {
|
||||
cudaEgl->frame.pPitch[0] = (void *)cudaPtr;
|
||||
cudaEgl->width = WIDTH;
|
||||
cudaEgl->depth = 0;
|
||||
cudaEgl->height = HEIGHT;
|
||||
cudaEgl->pitch = WIDTH * 4;
|
||||
cudaEgl->frameType = CU_EGL_FRAME_TYPE_PITCH;
|
||||
cudaEgl->planeCount = 1;
|
||||
cudaEgl->numChannels = 4;
|
||||
cudaEgl->eglColorFormat = CU_EGL_COLOR_FORMAT_ARGB;
|
||||
cudaEgl->cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
|
||||
}
|
||||
|
||||
static int count_present = 0, count_return = 0;
|
||||
static double present_time[25000] = {0}, total_time_present = 0;
|
||||
static double return_time[25000] = {0}, total_time_return = 0;
|
||||
|
||||
void presentApiStat(void);
|
||||
void presentApiStat(void) {
|
||||
int i = 0;
|
||||
double min = 10000000, max = 0;
|
||||
double average_launch_time = 0, standard_deviation = 0;
|
||||
if (count_present == 0) return;
|
||||
// lets compute the standard deviation
|
||||
min = max = present_time[1];
|
||||
average_launch_time = (total_time_present) / count_present;
|
||||
for (i = 1; i < count_present; i++) {
|
||||
standard_deviation += (present_time[i] - average_launch_time) *
|
||||
(present_time[i] - average_launch_time);
|
||||
if (present_time[i] < min) min = present_time[i];
|
||||
if (present_time[i] > max) max = present_time[i];
|
||||
}
|
||||
standard_deviation = sqrt(standard_deviation / count_present);
|
||||
printf("present Avg: %lf\n", average_launch_time);
|
||||
printf("present SD: %lf\n", standard_deviation);
|
||||
printf("present min: %lf\n", min);
|
||||
printf("present max: %lf\n", max);
|
||||
|
||||
min = max = return_time[1];
|
||||
average_launch_time = (total_time_return - return_time[0]) / count_return;
|
||||
for (i = 1; i < count_return; i++) {
|
||||
standard_deviation += (return_time[i] - average_launch_time) *
|
||||
(return_time[i] - average_launch_time);
|
||||
if (return_time[i] < min) min = return_time[i];
|
||||
if (return_time[i] > max) max = return_time[i];
|
||||
}
|
||||
standard_deviation = sqrt(standard_deviation / count_return);
|
||||
printf("return Avg: %lf\n", average_launch_time);
|
||||
printf("return SD: %lf\n", standard_deviation);
|
||||
printf("return min: %lf\n", min);
|
||||
printf("return max: %lf\n", max);
|
||||
}
|
||||
CUresult cudaProducerPresentFrame(test_cuda_producer_s *cudaProducer,
|
||||
CUeglFrame cudaEgl, int t) {
|
||||
static int flag = 0;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
struct timespec start, end;
|
||||
double curTime;
|
||||
CUdeviceptr pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
|
||||
cudaProducer_filter(cudaProducer->prodCudaStream, (char *)pDevPtr, WIDTH * 4,
|
||||
HEIGHT, cudaPresentReturnData, PROD_DATA + t, t);
|
||||
if (cudaProducer->profileAPI) {
|
||||
getTime(&start);
|
||||
}
|
||||
status = cuEGLStreamProducerPresentFrame(&cudaProducer->cudaConn, cudaEgl,
|
||||
&cudaProducer->prodCudaStream);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: Present frame failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
flag++;
|
||||
if (cudaProducer->profileAPI && flag > 10) {
|
||||
getTime(&end);
|
||||
curTime = TIME_DIFF(end, start);
|
||||
present_time[count_present++] = curTime;
|
||||
if (count_present == 25000) count_present = 0;
|
||||
total_time_present += curTime;
|
||||
}
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
int flag = 0;
|
||||
CUresult cudaProducerReturnFrame(test_cuda_producer_s *cudaProducer,
|
||||
CUeglFrame cudaEgl, int t) {
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
struct timespec start, end;
|
||||
double curTime;
|
||||
CUdeviceptr pDevPtr = 0;
|
||||
|
||||
pDevPtr = (CUdeviceptr)cudaEgl.frame.pPitch[0];
|
||||
if (cudaProducer->profileAPI) {
|
||||
getTime(&start);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
status = cuEGLStreamProducerReturnFrame(&cudaProducer->cudaConn, &cudaEgl,
|
||||
&cudaProducer->prodCudaStream);
|
||||
if (status == CUDA_ERROR_LAUNCH_TIMEOUT) {
|
||||
continue;
|
||||
} else if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: Return frame failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (cudaProducer->profileAPI) {
|
||||
getTime(&end);
|
||||
curTime = TIME_DIFF(end, start);
|
||||
return_time[count_return++] = curTime;
|
||||
if (count_return == 25000) count_return = 0;
|
||||
total_time_return += curTime;
|
||||
}
|
||||
if (flag % 2 == 0) {
|
||||
cudaPresentReturnData++;
|
||||
}
|
||||
cudaProducer_filter(cudaProducer->prodCudaStream, (char *)pDevPtr, WIDTH * 4,
|
||||
HEIGHT, CONS_DATA + t, cudaPresentReturnData, t);
|
||||
flag++;
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer) {
|
||||
CUdevice device;
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
|
||||
if (CUDA_SUCCESS != (status = cuInit(0))) {
|
||||
printf("Failed to initialize CUDA\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS !=
|
||||
(status = cuDeviceGet(&device, cudaProducer->cudaDevId))) {
|
||||
printf("failed to get CUDA device\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS !=
|
||||
(status = cuCtxCreate(&cudaProducer->context, 0, device))) {
|
||||
printf("failed to create CUDA context\n");
|
||||
return status;
|
||||
}
|
||||
|
||||
int major = 0, minor = 0;
|
||||
char deviceName[256];
|
||||
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
|
||||
device);
|
||||
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
|
||||
device);
|
||||
cuDeviceGetName(deviceName, 256, device);
|
||||
printf(
|
||||
"CUDA Producer on GPU Device %d: \"%s\" with compute capability "
|
||||
"%d.%d\n\n",
|
||||
device, deviceName, major, minor);
|
||||
|
||||
cuCtxPopCurrent(&cudaProducer->context);
|
||||
|
||||
if (major < 6) {
|
||||
printf(
|
||||
"EGLStream_CUDA_CrossGPU requires SM 6.0 or higher arch GPU. "
|
||||
"Exiting...\n");
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
CUresult cudaProducerInit(test_cuda_producer_s *cudaProducer, TestArgs *args) {
|
||||
CUresult status = CUDA_SUCCESS;
|
||||
int bufferSize;
|
||||
|
||||
cudaProducer->charCnt = args->charCnt;
|
||||
bufferSize = cudaProducer->charCnt;
|
||||
|
||||
cudaProducer->tempBuff = (char *)malloc(bufferSize);
|
||||
if (!cudaProducer->tempBuff) {
|
||||
printf("Cuda Producer: Failed to allocate image buffer\n");
|
||||
status = CUDA_ERROR_UNKNOWN;
|
||||
goto done;
|
||||
}
|
||||
memset((void *)cudaProducer->tempBuff, INIT_DATA, cudaProducer->charCnt);
|
||||
|
||||
// Fill this init data
|
||||
status = cuMemAlloc(&cudaProducer->cudaPtr, bufferSize);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
status = cuMemcpyHtoD(cudaProducer->cudaPtr, (void *)(cudaProducer->tempBuff),
|
||||
bufferSize);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuMemCpy failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Fill this init data
|
||||
status = cuMemAlloc(&cudaProducer->cudaPtr1, bufferSize);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
status = cuMemcpyHtoD(cudaProducer->cudaPtr1,
|
||||
(void *)(cudaProducer->tempBuff), bufferSize);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuMemCpy failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
|
||||
status = cuStreamCreate(&cudaProducer->prodCudaStream, 0);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuStreamCreate failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Fill this init data
|
||||
status = cuMemAlloc(&cudaPtrFake, 100);
|
||||
if (status != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: cuda Malloc failed, status:%d\n", status);
|
||||
goto done;
|
||||
}
|
||||
|
||||
atexit(presentApiStat);
|
||||
done:
|
||||
return status;
|
||||
}
|
||||
|
||||
CUresult cudaProducerDeinit(test_cuda_producer_s *cudaProducer) {
|
||||
if (cudaProducer->tempBuff) {
|
||||
free(cudaProducer->tempBuff);
|
||||
}
|
||||
if (cudaProducer->cudaPtr) {
|
||||
cuMemFree(cudaProducer->cudaPtr);
|
||||
}
|
||||
return cuEGLStreamProducerDisconnect(&cudaProducer->cudaConn);
|
||||
}
|
68
Samples/EGLStream_CUDA_CrossGPU/cuda_producer.h
Normal file
68
Samples/EGLStream_CUDA_CrossGPU/cuda_producer.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Simple cuda producer header file
|
||||
//
|
||||
|
||||
#ifndef _CUDA_PRODUCER_H_
|
||||
#define _CUDA_PRODUCER_H_
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
#include "cudaEGL.h"
|
||||
#include "eglstrm_common.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
|
||||
typedef struct _test_cuda_producer_s {
|
||||
// Stream params
|
||||
CUcontext context;
|
||||
CUeglStreamConnection cudaConn;
|
||||
int cudaDevId;
|
||||
EGLStreamKHR eglStream;
|
||||
EGLDisplay eglDisplay;
|
||||
unsigned int charCnt;
|
||||
bool profileAPI;
|
||||
char *tempBuff;
|
||||
CUdeviceptr cudaPtr;
|
||||
CUdeviceptr cudaPtr1;
|
||||
CUstream prodCudaStream;
|
||||
} test_cuda_producer_s;
|
||||
|
||||
CUresult cudaProducerInit(test_cuda_producer_s *cudaProducer, TestArgs *args);
|
||||
CUresult cudaProducerPresentFrame(test_cuda_producer_s *parserArg,
|
||||
CUeglFrame cudaEgl, int t);
|
||||
CUresult cudaProducerReturnFrame(test_cuda_producer_s *parserArg,
|
||||
CUeglFrame cudaEgl, int t);
|
||||
CUresult cudaProducerDeinit(test_cuda_producer_s *cudaProducer);
|
||||
CUresult cudaDeviceCreateProducer(test_cuda_producer_s *cudaProducer);
|
||||
cudaError_t cudaProducer_filter(CUstream cStream, char *pSrc, int width,
|
||||
int height, char expectedVal, char newVal,
|
||||
int frameNumber);
|
||||
void cudaProducerPrepareFrame(CUeglFrame *cudaEgl, CUdeviceptr cudaPtr,
|
||||
int bufferSize);
|
||||
#endif
|
423
Samples/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp
Normal file
423
Samples/EGLStream_CUDA_CrossGPU/eglstrm_common.cpp
Normal file
|
@ -0,0 +1,423 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Common egl stream functions
|
||||
//
|
||||
|
||||
#include "eglstrm_common.h"
|
||||
|
||||
EGLStreamKHR g_producerEglStream = EGL_NO_STREAM_KHR;
|
||||
EGLStreamKHR g_consumerEglStream = EGL_NO_STREAM_KHR;
|
||||
EGLDisplay g_producerEglDisplay = EGL_NO_DISPLAY;
|
||||
EGLDisplay g_consumerEglDisplay = EGL_NO_DISPLAY;
|
||||
int cudaDevIndexProd = -1;
|
||||
int cudaDevIndexCons = -1;
|
||||
|
||||
#if defined(EXTENSION_LIST)
|
||||
EXTENSION_LIST(EXTLST_DECL)
|
||||
typedef void (*extlst_fnptr_t)(void);
|
||||
static struct {
|
||||
extlst_fnptr_t *fnptr;
|
||||
char const *name;
|
||||
bool is_dgpu; // This function is need only for dgpu case
|
||||
} extensionList[] = {EXTENSION_LIST(EXTLST_ENTRY)};
|
||||
|
||||
int eglSetupExtensions(bool isCrossDevice) {
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < (sizeof(extensionList) / sizeof(*extensionList)); i++) {
|
||||
// load the dgpu function only if we are running cross device test
|
||||
if ((!extensionList[i].is_dgpu) ||
|
||||
(extensionList[i].is_dgpu == isCrossDevice)) {
|
||||
*extensionList[i].fnptr = eglGetProcAddress(extensionList[i].name);
|
||||
if (*extensionList[i].fnptr == NULL) {
|
||||
printf("Couldn't get address of %s()\n", extensionList[i].name);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int EGLStreamInit(bool isCrossDevice, int isConsumer,
|
||||
EGLNativeFileDescriptorKHR fileDesc) {
|
||||
static const EGLint streamAttrFIFOMode[] = {
|
||||
EGL_STREAM_FIFO_LENGTH_KHR, 5, EGL_SUPPORT_REUSE_NV, EGL_FALSE, EGL_NONE};
|
||||
EGLDisplay eglDisplay[2] = {0};
|
||||
EGLStreamKHR eglStream[2] = {0};
|
||||
EGLBoolean eglStatus;
|
||||
|
||||
#define MAX_EGL_DEVICES 4
|
||||
|
||||
EGLDeviceEXT devices[MAX_EGL_DEVICES];
|
||||
EGLint numDevices = 0;
|
||||
|
||||
eglStatus = eglQueryDevicesEXT(MAX_EGL_DEVICES, devices, &numDevices);
|
||||
if (eglStatus != EGL_TRUE) {
|
||||
printf("Error querying EGL devices\n");
|
||||
goto Done;
|
||||
}
|
||||
|
||||
if (numDevices == 0) {
|
||||
printf("No EGL devices found\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
// If cross device, create discrete GPU stream first and then create the
|
||||
// integrated GPU stream to connect to it via fd. The other way round fails
|
||||
// in producer connect.
|
||||
//
|
||||
// TODO: Find out if this EGL behavior is by design.
|
||||
if (isConsumer) {
|
||||
int egl_device_id = 0;
|
||||
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
|
||||
EGLAttrib cuda_device;
|
||||
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
|
||||
EGL_CUDA_DEVICE_NV, &cuda_device);
|
||||
if (eglStatus == EGL_TRUE) {
|
||||
cudaDevIndexCons = cuda_device; // We select first EGL-CUDA Capable
|
||||
// device for consumer.
|
||||
printf(
|
||||
"Found EGL-CUDA Capable device with CUDA Device id = %d out of "
|
||||
"egl_device_id = %d\n",
|
||||
(int)cudaDevIndexCons, egl_device_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (egl_device_id >= numDevices) {
|
||||
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
|
||||
goto Done;
|
||||
}
|
||||
|
||||
g_consumerEglDisplay = eglGetPlatformDisplayEXT(
|
||||
EGL_PLATFORM_DEVICE_EXT, (void *)devices[egl_device_id], NULL);
|
||||
if (g_consumerEglDisplay == EGL_NO_DISPLAY) {
|
||||
printf("Could not get EGL display from device. \n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
eglStatus = eglInitialize(g_consumerEglDisplay, 0, 0);
|
||||
if (!eglStatus) {
|
||||
printf("EGL failed to initialize. \n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
g_consumerEglStream =
|
||||
eglCreateStreamKHR(g_consumerEglDisplay, streamAttrFIFOMode);
|
||||
if (g_consumerEglStream == EGL_NO_STREAM_KHR) {
|
||||
printf("Could not create EGL stream.\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
eglStatus = eglStreamAttribKHR(g_consumerEglDisplay, g_consumerEglStream,
|
||||
EGL_CONSUMER_LATENCY_USEC_KHR, 16000);
|
||||
if (eglStatus != EGL_TRUE) {
|
||||
printf("eglStreamAttribKHR EGL_CONSUMER_LATENCY_USEC_KHR failed\n");
|
||||
goto Done;
|
||||
}
|
||||
|
||||
eglStatus =
|
||||
eglStreamAttribKHR(g_consumerEglDisplay, g_consumerEglStream,
|
||||
EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR, 16000);
|
||||
if (eglStatus != EGL_TRUE) {
|
||||
printf(
|
||||
"eglStreamAttribKHR EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR failed\n");
|
||||
goto Done;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isConsumer) { // Producer
|
||||
|
||||
if (fileDesc == EGL_NO_FILE_DESCRIPTOR_KHR) {
|
||||
printf("Cuda Producer received bad file descriptor\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
int egl_device_id = 0;
|
||||
int egl_cuda_devices = 0;
|
||||
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
|
||||
EGLAttrib cuda_device = -1;
|
||||
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
|
||||
EGL_CUDA_DEVICE_NV, &cuda_device);
|
||||
if (eglStatus == EGL_TRUE) {
|
||||
egl_cuda_devices++;
|
||||
if (isCrossDevice && (egl_cuda_devices > 1)) {
|
||||
// We select second EGL-CUDA Capable device for producer.
|
||||
cudaDevIndexProd = (int)cuda_device;
|
||||
printf(
|
||||
"Found EGL-CUDA Capable device with CUDA Device id = %d "
|
||||
"egl_device_id = %d \n",
|
||||
(int)cudaDevIndexProd, egl_device_id);
|
||||
break;
|
||||
}
|
||||
if (!isCrossDevice) {
|
||||
// We select first EGL-CUDA Capable device for producer same as
|
||||
// consumer.
|
||||
cudaDevIndexProd = (int)cuda_device;
|
||||
printf(
|
||||
"Found EGL-CUDA Capable device with CUDA Device id = %d "
|
||||
"egl_device_id = %d \n",
|
||||
(int)cudaDevIndexProd, egl_device_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (egl_device_id >= numDevices) {
|
||||
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
|
||||
goto Done;
|
||||
}
|
||||
|
||||
g_producerEglDisplay = eglGetPlatformDisplayEXT(
|
||||
EGL_PLATFORM_DEVICE_EXT, (void *)devices[egl_device_id], NULL);
|
||||
if (g_producerEglDisplay == EGL_NO_DISPLAY) {
|
||||
printf("Could not get EGL display from device. \n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
eglStatus = eglInitialize(g_producerEglDisplay, 0, 0);
|
||||
if (!eglStatus) {
|
||||
printf("EGL failed to initialize. \n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
g_producerEglStream =
|
||||
eglCreateStreamFromFileDescriptorKHR(g_producerEglDisplay, fileDesc);
|
||||
close(fileDesc);
|
||||
|
||||
if (g_producerEglStream == EGL_NO_STREAM_KHR) {
|
||||
printf("CUDA Producer Could not create EGL stream.\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
goto Done;
|
||||
} else {
|
||||
printf("Producer created EGLStream for the GPU.\n");
|
||||
}
|
||||
}
|
||||
|
||||
Done:
|
||||
return eglStatus == EGL_TRUE ? 1 : 0;
|
||||
}
|
||||
|
||||
void EGLStreamFini(void) {
|
||||
if (g_producerEglStream != EGL_NO_STREAM_KHR) {
|
||||
eglDestroyStreamKHR(g_producerEglDisplay, g_producerEglStream);
|
||||
}
|
||||
if (g_consumerEglStream != g_producerEglStream) {
|
||||
if (g_consumerEglStream != EGL_NO_STREAM_KHR) {
|
||||
eglDestroyStreamKHR(g_consumerEglDisplay, g_consumerEglStream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int UnixSocketConnect(const char *socket_name) {
|
||||
int sock_fd = -1;
|
||||
struct sockaddr_un sock_addr;
|
||||
int wait_loop = 0;
|
||||
|
||||
sock_fd = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
if (sock_fd < 0) {
|
||||
printf("%s: socket create failed.\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verbose) printf("%s: send_fd: sock_fd: %d\n", __func__, sock_fd);
|
||||
|
||||
memset(&sock_addr, 0, sizeof(struct sockaddr_un));
|
||||
sock_addr.sun_family = AF_UNIX;
|
||||
strncpy(sock_addr.sun_path, socket_name, sizeof(sock_addr.sun_path) - 1);
|
||||
|
||||
while (connect(sock_fd, (const struct sockaddr *)&sock_addr,
|
||||
sizeof(struct sockaddr_un))) {
|
||||
if (wait_loop < 60) {
|
||||
if (!wait_loop)
|
||||
printf("Waiting for EGL stream producer ");
|
||||
else
|
||||
printf(".");
|
||||
fflush(stdout);
|
||||
sleep(1);
|
||||
wait_loop++;
|
||||
} else {
|
||||
printf("\n%s: Waiting timed out\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (wait_loop) printf("\n");
|
||||
|
||||
if (verbose) printf("%s: Wait is done\n", __func__);
|
||||
|
||||
return sock_fd;
|
||||
}
|
||||
|
||||
/* Send <fd_to_send> (a file descriptor) to another process */
|
||||
/* over a unix domain socket named <socket_name>. */
|
||||
/* <socket_name> can be any nonexistant filename. */
|
||||
int EGLStreamSendfd(int send_fd, int fd_to_send) {
|
||||
struct msghdr msg;
|
||||
struct iovec iov[1];
|
||||
char ctrl_buf[CMSG_SPACE(sizeof(int))];
|
||||
struct cmsghdr *cmsg = NULL;
|
||||
void *data;
|
||||
int res;
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
|
||||
iov[0].iov_len = 1; // must send at least 1 byte
|
||||
iov[0].iov_base = (void *)"x"; // any byte value (value ignored)
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
memset(ctrl_buf, 0, sizeof(ctrl_buf));
|
||||
msg.msg_control = ctrl_buf;
|
||||
msg.msg_controllen = sizeof(ctrl_buf);
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg);
|
||||
cmsg->cmsg_level = SOL_SOCKET;
|
||||
cmsg->cmsg_type = SCM_RIGHTS;
|
||||
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
||||
data = CMSG_DATA(cmsg);
|
||||
*(int *)data = fd_to_send;
|
||||
|
||||
msg.msg_controllen = cmsg->cmsg_len;
|
||||
|
||||
res = sendmsg(send_fd, &msg, 0);
|
||||
if (res <= 0) {
|
||||
printf("%s: sendmsg failed", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Listen on a unix domain socket named <socket_name>. */
|
||||
/* Connect to it and return connect_fd */
|
||||
int UnixSocketCreate(const char *socket_name) {
|
||||
int listen_fd;
|
||||
struct sockaddr_un sock_addr;
|
||||
int connect_fd;
|
||||
struct sockaddr_un connect_addr;
|
||||
socklen_t connect_addr_len = 0;
|
||||
|
||||
listen_fd = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
if (listen_fd < 0) {
|
||||
printf("%s: socket create failed", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verbose) printf("%s: listen_fd: %d\n", __func__, listen_fd);
|
||||
|
||||
unlink(socket_name);
|
||||
|
||||
memset(&sock_addr, 0, sizeof(struct sockaddr_un));
|
||||
sock_addr.sun_family = AF_UNIX;
|
||||
strncpy(sock_addr.sun_path, socket_name, sizeof(sock_addr.sun_path) - 1);
|
||||
|
||||
if (bind(listen_fd, (const struct sockaddr *)&sock_addr,
|
||||
sizeof(struct sockaddr_un))) {
|
||||
printf("i%s: bind error", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (listen(listen_fd, 1)) {
|
||||
printf("%s: listen error", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
connect_fd =
|
||||
accept(listen_fd, (struct sockaddr *)&connect_addr, &connect_addr_len);
|
||||
|
||||
if (verbose) printf("%s: connect_fd: %d\n", __func__, connect_fd);
|
||||
|
||||
close(listen_fd);
|
||||
unlink(socket_name);
|
||||
if (connect_fd < 0) {
|
||||
printf("%s: accept failed\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return connect_fd;
|
||||
}
|
||||
|
||||
/* receive a file descriptor from another process. */
|
||||
/* Returns the file descriptor. Note: the integer value */
|
||||
/* of the file descriptor may be different from the */
|
||||
/* integer value in the other process, but the file */
|
||||
/* descriptors in each process will refer to the same file */
|
||||
/* object in the kernel. */
|
||||
int EGLStreamReceivefd(int connect_fd) {
|
||||
struct msghdr msg;
|
||||
struct iovec iov[1];
|
||||
char msg_buf[1];
|
||||
char ctrl_buf[CMSG_SPACE(sizeof(int))];
|
||||
struct cmsghdr *cmsg;
|
||||
void *data;
|
||||
int recvfd;
|
||||
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
|
||||
iov[0].iov_base = msg_buf;
|
||||
iov[0].iov_len = sizeof(msg_buf);
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
msg.msg_control = ctrl_buf;
|
||||
msg.msg_controllen = sizeof(ctrl_buf);
|
||||
|
||||
if (recvmsg(connect_fd, &msg, 0) <= 0) {
|
||||
printf("%s: recvmsg failed", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg);
|
||||
if (!cmsg) {
|
||||
printf("%s: NULL message header\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
if (cmsg->cmsg_level != SOL_SOCKET) {
|
||||
printf("%s: Message level is not SOL_SOCKET\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
if (cmsg->cmsg_type != SCM_RIGHTS) {
|
||||
printf("%s: Message type is not SCM_RIGHTS\n", __func__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
data = CMSG_DATA(cmsg);
|
||||
recvfd = *(int *)data;
|
||||
|
||||
return recvfd;
|
||||
}
|
||||
|
||||
#endif
|
113
Samples/EGLStream_CUDA_CrossGPU/eglstrm_common.h
Normal file
113
Samples/EGLStream_CUDA_CrossGPU/eglstrm_common.h
Normal file
|
@ -0,0 +1,113 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Common EGL stream functions header file
|
||||
//
|
||||
|
||||
#ifndef _EGLSTRM_COMMON_H_
|
||||
#define _EGLSTRM_COMMON_H_
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/un.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "cuda.h"
|
||||
#include "cudaEGL.h"
|
||||
#define TIME_DIFF(end, start) (getMicrosecond(end) - getMicrosecond(start))
|
||||
|
||||
extern EGLStreamKHR g_producerEglStream;
|
||||
extern EGLStreamKHR g_consumerEglStream;
|
||||
extern EGLDisplay g_producerEglDisplay;
|
||||
extern EGLDisplay g_consumerEglDisplay;
|
||||
extern int cudaDevIndexCons;
|
||||
extern int cudaDevIndexProd;
|
||||
extern bool verbose;
|
||||
|
||||
#define EXTENSION_LIST(T) \
|
||||
T(PFNEGLCREATESTREAMKHRPROC, eglCreateStreamKHR) \
|
||||
T(PFNEGLDESTROYSTREAMKHRPROC, eglDestroyStreamKHR) \
|
||||
T(PFNEGLQUERYSTREAMKHRPROC, eglQueryStreamKHR) \
|
||||
T(PFNEGLQUERYSTREAMU64KHRPROC, eglQueryStreamu64KHR) \
|
||||
T(PFNEGLQUERYSTREAMTIMEKHRPROC, eglQueryStreamTimeKHR) \
|
||||
T(PFNEGLSTREAMATTRIBKHRPROC, eglStreamAttribKHR) \
|
||||
T(PFNEGLSTREAMCONSUMERACQUIREKHRPROC, eglStreamConsumerAcquireKHR) \
|
||||
T(PFNEGLSTREAMCONSUMERRELEASEKHRPROC, eglStreamConsumerReleaseKHR) \
|
||||
T(PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC, \
|
||||
eglStreamConsumerGLTextureExternalKHR) \
|
||||
T(PFNEGLQUERYDEVICESEXTPROC, eglQueryDevicesEXT) \
|
||||
T(PFNEGLGETPLATFORMDISPLAYEXTPROC, eglGetPlatformDisplayEXT) \
|
||||
T(PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC, eglGetStreamFileDescriptorKHR) \
|
||||
T(PFNEGLQUERYDEVICEATTRIBEXTPROC, eglQueryDeviceAttribEXT) \
|
||||
T(PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC, \
|
||||
eglCreateStreamFromFileDescriptorKHR)
|
||||
|
||||
#define EXTLST_DECL(tx, x) tx x = NULL;
|
||||
#define EXTLST_EXTERN(tx, x) extern tx x;
|
||||
#define EXTLST_ENTRY(tx, x) {(extlst_fnptr_t *)&x, #x},
|
||||
|
||||
#define MAX_STRING_SIZE 256
|
||||
#define INIT_DATA 0x01
|
||||
#define PROD_DATA 0x07
|
||||
#define CONS_DATA 0x04
|
||||
|
||||
#define SOCK_PATH "/tmp/tegra_sw_egl_socket"
|
||||
|
||||
typedef struct _TestArgs {
|
||||
unsigned int charCnt;
|
||||
bool isProducer;
|
||||
} TestArgs;
|
||||
|
||||
extern int WIDTH, HEIGHT;
|
||||
|
||||
int eglSetupExtensions(bool is_dgpu);
|
||||
int EGLStreamInit(bool isCrossDevice, int isConsumer,
|
||||
EGLNativeFileDescriptorKHR fileDesc);
|
||||
void EGLStreamFini(void);
|
||||
|
||||
int EGLStreamSetAttr(EGLDisplay display, EGLStreamKHR eglStream);
|
||||
int UnixSocketConnect(const char *socket_name);
|
||||
int EGLStreamSendfd(int send_fd, int fd_to_send);
|
||||
int UnixSocketCreate(const char *socket_name);
|
||||
int EGLStreamReceivefd(int connect_fd);
|
||||
|
||||
static clockid_t clock_id = CLOCK_MONOTONIC; // CLOCK_PROCESS_CPUTIME_ID;
|
||||
static double getMicrosecond(struct timespec t) {
|
||||
return ((t.tv_sec) * 1000000.0 + (t.tv_nsec) / 1.0e3);
|
||||
}
|
||||
|
||||
static inline void getTime(struct timespec *t) { clock_gettime(clock_id, t); }
|
||||
#endif
|
160
Samples/EGLStream_CUDA_CrossGPU/findegl.mk
Normal file
160
Samples/EGLStream_CUDA_CrossGPU/findegl.mk
Normal file
|
@ -0,0 +1,160 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# findegl.mk is used to find the necessary EGL Libraries for specific distributions
|
||||
# this is supported on Linux
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Determine OS platform and unix distribution
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# first search lsb_release
|
||||
DISTRO = $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
ifeq ("$(DISTRO)","")
|
||||
# second search and parse /etc/issue
|
||||
DISTRO = $(shell more /etc/issue | awk '{print $$1}' | sed '1!d' | sed -e "/^$$/d" 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
# ensure data from /etc/issue is valid
|
||||
ifeq (,$(filter $(DISTRO),ubuntu fedora red rhel centos suse))
|
||||
DISTRO =
|
||||
endif
|
||||
ifeq ("$(DISTRO)","")
|
||||
# third, we can search in /etc/os-release or /etc/{distro}-release
|
||||
DISTRO = $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# $(info) >> findegl.mk -> LINUX path <<<)
|
||||
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
|
||||
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
|
||||
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
|
||||
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
|
||||
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
|
||||
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
|
||||
ifeq ("$(UBUNTU)","0")
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
GLPATH := /usr/arm-linux-gnueabihf/lib
|
||||
GLLINK := -L/usr/arm-linux-gnueabihf/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
|
||||
GLPATH := /usr/aarch64-linux-gnu/lib
|
||||
GLLINK := -L/usr/aarch64-linux-gnu/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib
|
||||
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
endif
|
||||
else
|
||||
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
|
||||
ifneq ("$(UBUNTU_PKG_NAME)","")
|
||||
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
|
||||
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
|
||||
endif
|
||||
|
||||
DFLT_PATH ?= /usr/lib
|
||||
endif
|
||||
endif
|
||||
ifeq ("$(SUSE)","0")
|
||||
GLPATH ?= /usr/X11R6/lib64
|
||||
GLLINK ?= -L/usr/X11R6/lib64
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
ifeq ("$(FEDORA)","0")
|
||||
GLPATH ?= /usr/lib64/nvidia
|
||||
GLLINK ?= -L/usr/lib64/nvidia
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
ifeq ("$(RHEL)","0")
|
||||
GLPATH ?= /usr/lib64/nvidia
|
||||
GLLINK ?= -L/usr/lib64/nvidia
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
ifeq ("$(CENTOS)","0")
|
||||
GLPATH ?= /usr/lib64/nvidia
|
||||
GLLINK ?= -L/usr/lib64/nvidia
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
|
||||
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLLIB)","")
|
||||
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
|
||||
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
|
||||
endif
|
||||
|
||||
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
|
||||
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLHEADER)","")
|
||||
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(EGLEXTHEADER)","")
|
||||
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
else
|
||||
endif
|
||||
|
||||
ifeq ("$(TARGET_OS)","qnx")
|
||||
HOST_CCFLAGS := -V5.4.0,gcc_ntoaarch64le
|
||||
endif
|
||||
|
||||
# Attempt to compile a minimal EGL application and run to check if EGL_SUPPORT_REUSE_NV is supported in the EGL headers available.
|
||||
ifneq ($(SAMPLE_ENABLED), 0)
|
||||
$(shell printf "#include <EGL/egl.h>\n#include <EGL/eglext.h>\nint main() {\n#ifdef EGL_SUPPORT_REUSE_NV \n #error \"Compatible EGL header found\" \n return 0;\n#endif \n return 1;\n}" > test.c; )
|
||||
EGL_DEFINES := $(shell $(HOST_COMPILER) $(HOST_CCFLAGS) $(CCFLAGS) $(EXTRA_CCFLAGS) -lEGL test.c -c 2>&1 | grep -ic "Compatible EGL header found";)
|
||||
SHOULD_WAIVE := 0
|
||||
ifeq ($(EGL_DEFINES),0)
|
||||
SHOULD_WAIVE := 1
|
||||
endif
|
||||
ifeq ($(SHOULD_WAIVE),1)
|
||||
$(info -----------------------------------------------------------------------------------------------)
|
||||
$(info WARNING - NVIDIA EGL EXTENSIONS are not available in the present EGL headers)
|
||||
$(info -----------------------------------------------------------------------------------------------)
|
||||
$(info This CUDA Sample cannot be built if the EGL NVIDIA EXTENSIONS like EGL_SUPPORT_REUSE_NV are not supported in EGL headers.)
|
||||
$(info This will be a dry-run of the Makefile.)
|
||||
$(info Please install the latest khronos EGL headers and libs to build this sample)
|
||||
$(info -----------------------------------------------------------------------------------------------)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
$(shell rm test.o test.c 2>/dev/null)
|
||||
endif
|
||||
|
221
Samples/EGLStream_CUDA_CrossGPU/helper.h
Normal file
221
Samples/EGLStream_CUDA_CrossGPU/helper.h
Normal file
|
@ -0,0 +1,221 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "eglstrm_common.h"
|
||||
#if defined(EXTENSION_LIST)
|
||||
EXTENSION_LIST(EXTLST_EXTERN)
|
||||
#endif
|
||||
#include <cuda.h>
|
||||
|
||||
int parseCmdLine(int argc, char *argv[], TestArgs *args);
|
||||
void printUsage(void);
|
||||
int NUMTRIALS = 10;
|
||||
int profileAPIs = 0;
|
||||
|
||||
bool verbose = 0;
|
||||
bool isCrossDevice = 0;
|
||||
|
||||
// Parse the command line options. Returns FAILURE on a parse error, SUCCESS
|
||||
// otherwise.
|
||||
int parseCmdLine(int argc, char *argv[], TestArgs *args) {
|
||||
int i;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "-h") == 0) {
|
||||
printUsage();
|
||||
exit(0);
|
||||
} else if (strcmp(argv[i], "-n") == 0) {
|
||||
++i;
|
||||
if (sscanf(argv[i], "%d", &NUMTRIALS) != 1 || NUMTRIALS <= 0) {
|
||||
printf("Invalid trial count: %s should be > 0\n", argv[i]);
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "-profile") == 0) {
|
||||
profileAPIs = 1;
|
||||
} else if (strcmp(argv[i], "-crossdev") == 0) {
|
||||
isCrossDevice = 1;
|
||||
} else if (strcmp(argv[i], "-width") == 0) {
|
||||
++i;
|
||||
if (sscanf(argv[i], "%d", &WIDTH) != 1 || (WIDTH <= 0)) {
|
||||
printf("Width should be greater than 0\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "-height") == 0) {
|
||||
++i;
|
||||
if (sscanf(argv[i], "%d", &HEIGHT) != 1 || (HEIGHT <= 0)) {
|
||||
printf("Width should be greater than 0\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (0 == strcmp(&argv[i][1], "proctype")) {
|
||||
++i;
|
||||
if (!strcasecmp(argv[i], "prod")) {
|
||||
args->isProducer = 1;
|
||||
} else if (!strcasecmp(argv[i], "cons")) {
|
||||
args->isProducer = 0;
|
||||
} else {
|
||||
printf("%s: Bad Process Type: %s\n", __func__, argv[i]);
|
||||
return 1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "-v") == 0) {
|
||||
verbose = 1;
|
||||
} else {
|
||||
printf("Unknown option: %s\n", argv[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (isCrossDevice) {
|
||||
int deviceCount = 0;
|
||||
|
||||
CUresult error_id = cuInit(0);
|
||||
if (error_id != CUDA_SUCCESS) {
|
||||
printf("cuInit(0) returned %d\n", error_id);
|
||||
printf("Result = FAIL\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
error_id = cuDeviceGetCount(&deviceCount);
|
||||
if (error_id != CUDA_SUCCESS) {
|
||||
printf("cuDeviceGetCount returned %d\n", (int)error_id);
|
||||
printf("Result = FAIL\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int iGPUexists = 0;
|
||||
CUdevice dev;
|
||||
for (dev = 0; dev < deviceCount; ++dev) {
|
||||
int integrated = 0;
|
||||
CUresult error_result = cuDeviceGetAttribute(
|
||||
&integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
|
||||
|
||||
if (error_result != CUDA_SUCCESS) {
|
||||
printf("cuDeviceGetAttribute returned error : %d\n", (int)error_result);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (integrated) {
|
||||
iGPUexists = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!iGPUexists) {
|
||||
printf("No Integrated GPU found in the system.\n");
|
||||
printf(
|
||||
"-crossdev option is only supported on systems with an Integrated "
|
||||
"GPU and a Discrete GPU\n");
|
||||
printf("Waiving the execution\n");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
if (!eglSetupExtensions(isCrossDevice)) {
|
||||
printf("SetupExtentions failed \n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#define MAX_EGL_DEVICES 4
|
||||
EGLDeviceEXT devices[MAX_EGL_DEVICES];
|
||||
EGLint numDevices = 0;
|
||||
EGLBoolean eglStatus =
|
||||
eglQueryDevicesEXT(MAX_EGL_DEVICES, devices, &numDevices);
|
||||
if (eglStatus != EGL_TRUE) {
|
||||
printf("Error querying EGL devices\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (numDevices == 0) {
|
||||
printf("No EGL devices found\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
|
||||
int egl_device_id = 0;
|
||||
for (egl_device_id = 0; egl_device_id < numDevices; egl_device_id++) {
|
||||
EGLAttrib cuda_device;
|
||||
eglStatus = eglQueryDeviceAttribEXT(devices[egl_device_id],
|
||||
EGL_CUDA_DEVICE_NV, &cuda_device);
|
||||
if (eglStatus == EGL_TRUE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (egl_device_id >= numDevices) {
|
||||
printf("No CUDA Capable EGL Device found.. Waiving execution\n");
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
|
||||
if (isCrossDevice) {
|
||||
if (numDevices == 1) {
|
||||
printf(
|
||||
"Found only one EGL device, cannot setup cross GPU streams. "
|
||||
"Waiving\n");
|
||||
eglStatus = EGL_FALSE;
|
||||
exit(2); // EXIT_WAIVED
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void launchProducer(TestArgs *args) {
|
||||
/* Cross-process creation of producer */
|
||||
char argsProducer[1024];
|
||||
char str[256];
|
||||
|
||||
strcpy(argsProducer, "./EGLStream_CUDA_CrossGPU -proctype prod ");
|
||||
|
||||
if (isCrossDevice) {
|
||||
sprintf(str, "-crossdev ");
|
||||
strcat(argsProducer, str);
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
sprintf(str, "-v ");
|
||||
strcat(argsProducer, str);
|
||||
}
|
||||
|
||||
/*Make the process run in bg*/
|
||||
strcat(argsProducer, "& ");
|
||||
|
||||
printf("\n%s: Crossproc Producer command: %s \n", __func__, argsProducer);
|
||||
|
||||
/*Create crossproc Producer*/
|
||||
system(argsProducer);
|
||||
|
||||
/*Enable crossproc Consumer in the same process */
|
||||
args->isProducer = 0;
|
||||
}
|
||||
|
||||
void printUsage(void) {
|
||||
printf("Usage:\n");
|
||||
printf(" -h Print this help message\n");
|
||||
printf(" -n n Exit after running n trials. Set to 10 by default\n");
|
||||
printf(
|
||||
" -profile Profile time taken by ReleaseAPI. Not set by default\n");
|
||||
printf(" -crossdev Run with producer on idgpu and consumer on dgpu\n");
|
||||
printf(" -dgpu (same as -crossdev, deprecated)\n");
|
||||
printf(" -v verbose output\n");
|
||||
}
|
140
Samples/EGLStream_CUDA_CrossGPU/kernel.cu
Normal file
140
Samples/EGLStream_CUDA_CrossGPU/kernel.cu
Normal file
|
@ -0,0 +1,140 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//
|
||||
// DESCRIPTION: Simple CUDA consumer rendering sample app
|
||||
//
|
||||
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "eglstrm_common.h"
|
||||
|
||||
extern bool isCrossDevice;
|
||||
|
||||
__device__ static unsigned int numErrors = 0, errorFound = 0;
|
||||
__device__ void checkProducerDataGPU(char *data, int size, char expectedVal,
|
||||
int frameNumber) {
|
||||
if ((data[blockDim.x * blockIdx.x + threadIdx.x] != expectedVal) &&
|
||||
(!errorFound)) {
|
||||
printf("Producer FOUND:%d expected: %d at %d for trial %d %d\n",
|
||||
data[blockDim.x * blockIdx.x + threadIdx.x], expectedVal,
|
||||
(blockDim.x * blockIdx.x + threadIdx.x), frameNumber, numErrors);
|
||||
numErrors++;
|
||||
errorFound = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void checkConsumerDataGPU(char *data, int size, char expectedVal,
|
||||
int frameNumber) {
|
||||
if ((data[blockDim.x * blockIdx.x + threadIdx.x] != expectedVal) &&
|
||||
(!errorFound)) {
|
||||
printf("Consumer FOUND:%d expected: %d at %d for trial %d %d\n",
|
||||
data[blockDim.x * blockIdx.x + threadIdx.x], expectedVal,
|
||||
(blockDim.x * blockIdx.x + threadIdx.x), frameNumber, numErrors);
|
||||
numErrors++;
|
||||
errorFound = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void writeDataToBuffer(char *pSrc, char newVal) {
|
||||
pSrc[blockDim.x * blockIdx.x + threadIdx.x] = newVal;
|
||||
}
|
||||
|
||||
__global__ void testKernelConsumer(char *pSrc, char size, char expectedVal,
|
||||
char newVal, int frameNumber) {
|
||||
checkConsumerDataGPU(pSrc, size, expectedVal, frameNumber);
|
||||
}
|
||||
|
||||
__global__ void testKernelProducer(char *pSrc, char size, char expectedVal,
|
||||
char newVal, int frameNumber) {
|
||||
checkProducerDataGPU(pSrc, size, expectedVal, frameNumber);
|
||||
}
|
||||
__global__ void getNumErrors(int *numErr) { *numErr = numErrors; }
|
||||
|
||||
cudaError_t cudaProducer_filter(cudaStream_t pStream, char *pSrc, int width,
|
||||
int height, char expectedVal, char newVal,
|
||||
int frameNumber) {
|
||||
// in case where consumer is on dgpu and producer is on igpu when return is
|
||||
// called the frame is not copied back to igpu. So the consumer changes is not
|
||||
// visible to producer
|
||||
if (isCrossDevice == 0) {
|
||||
testKernelProducer<<<(width * height) / 1024, 1024, 1, pStream>>>(
|
||||
pSrc, width * height, expectedVal, newVal, frameNumber);
|
||||
}
|
||||
writeDataToBuffer<<<(width * height) / 1024, 1024, 1, pStream>>>(pSrc,
|
||||
newVal);
|
||||
return cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t cudaConsumer_filter(cudaStream_t cStream, char *pSrc, int width,
|
||||
int height, char expectedVal, char newVal,
|
||||
int frameNumber) {
|
||||
testKernelConsumer<<<(width * height) / 1024, 1024, 1, cStream>>>(
|
||||
pSrc, width * height, expectedVal, newVal, frameNumber);
|
||||
writeDataToBuffer<<<(width * height) / 1024, 1024, 1, cStream>>>(pSrc,
|
||||
newVal);
|
||||
return cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t cudaGetValueMismatch() {
|
||||
int numErr_h;
|
||||
int *numErr_d = NULL;
|
||||
cudaError_t err = cudaSuccess;
|
||||
err = cudaMalloc(&numErr_d, sizeof(int));
|
||||
if (err != cudaSuccess) {
|
||||
printf("Cuda Main: cudaMalloc failed with %s\n", cudaGetErrorString(err));
|
||||
return err;
|
||||
}
|
||||
getNumErrors<<<1, 1>>>(numErr_d);
|
||||
err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) {
|
||||
printf("Cuda Main: cudaDeviceSynchronize failed with %s\n",
|
||||
cudaGetErrorString(err));
|
||||
}
|
||||
err = cudaMemcpy(&numErr_h, numErr_d, sizeof(int), cudaMemcpyDeviceToHost);
|
||||
if (err != cudaSuccess) {
|
||||
printf("Cuda Main: cudaMemcpy failed with %s\n", cudaGetErrorString(err));
|
||||
cudaFree(numErr_d);
|
||||
return err;
|
||||
}
|
||||
err = cudaFree(numErr_d);
|
||||
if (err != cudaSuccess) {
|
||||
printf("Cuda Main: cudaFree failed with %s\n", cudaGetErrorString(err));
|
||||
return err;
|
||||
}
|
||||
if (numErr_h > 0) {
|
||||
return cudaErrorUnknown;
|
||||
}
|
||||
return cudaSuccess;
|
||||
}
|
392
Samples/EGLStream_CUDA_CrossGPU/main.cpp
Normal file
392
Samples/EGLStream_CUDA_CrossGPU/main.cpp
Normal file
|
@ -0,0 +1,392 @@
|
|||
/* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cudaEGL.h"
|
||||
#include "cuda_consumer.h"
|
||||
#include "cuda_producer.h"
|
||||
#include "eglstrm_common.h"
|
||||
#include "helper.h"
|
||||
#if defined(EXTENSION_LIST)
|
||||
EXTENSION_LIST(EXTLST_EXTERN)
|
||||
#endif
|
||||
|
||||
bool signal_stop = 0;
|
||||
extern bool verbose;
|
||||
|
||||
static void sig_handler(int sig) {
|
||||
signal_stop = 1;
|
||||
printf("Signal: %d\n", sig);
|
||||
}
|
||||
|
||||
void DoneCons(int consumerStatus, int send_fd) {
|
||||
EGLStreamFini();
|
||||
// get the final status from producer, combine and print
|
||||
int producerStatus = -1;
|
||||
if (-1 == recv(send_fd, (void *)&producerStatus, sizeof(int), 0)) {
|
||||
printf("%s: Cuda Consumer could not receive status from producer.\n",
|
||||
__func__);
|
||||
}
|
||||
close(send_fd);
|
||||
|
||||
if (producerStatus == 0 && consumerStatus == 0) {
|
||||
printf("&&&& EGLStream_CUDA_CrossGPU PASSED\n");
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
printf("&&&& EGLStream_CUDA_CrossGPU FAILED\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void DoneProd(int producerStatus, int connect_fd) {
|
||||
EGLStreamFini();
|
||||
if (-1 == send(connect_fd, (void *)&producerStatus, sizeof(int), 0)) {
|
||||
printf("%s: Cuda Producer could not send status to consumer.\n", __func__);
|
||||
}
|
||||
close(connect_fd);
|
||||
if (producerStatus == 0) {
|
||||
exit(EXIT_SUCCESS);
|
||||
} else {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int WIDTH = 8192, HEIGHT = 8192;
|
||||
int main(int argc, char **argv) {
|
||||
TestArgs args = {0, false};
|
||||
CUresult curesult = CUDA_SUCCESS;
|
||||
unsigned int j = 0;
|
||||
cudaError_t err = cudaSuccess;
|
||||
EGLNativeFileDescriptorKHR fileDescriptor = EGL_NO_FILE_DESCRIPTOR_KHR;
|
||||
struct timespec start, end;
|
||||
CUeglFrame cudaEgl1, cudaEgl2;
|
||||
int consumerStatus = 0;
|
||||
int send_fd = -1;
|
||||
|
||||
if (parseCmdLine(argc, argv, &args) < 0) {
|
||||
printUsage();
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
printf("Width : %u, height: %u and iterations: %u\n", WIDTH, HEIGHT,
|
||||
NUMTRIALS);
|
||||
|
||||
if (!args.isProducer) // Consumer code
|
||||
{
|
||||
test_cuda_consumer_s cudaConsumer;
|
||||
memset(&cudaConsumer, 0, sizeof(test_cuda_consumer_s));
|
||||
cudaConsumer.profileAPI = profileAPIs;
|
||||
|
||||
// Hook up Ctrl-C handler
|
||||
signal(SIGINT, sig_handler);
|
||||
|
||||
if (!EGLStreamInit(isCrossDevice, !args.isProducer,
|
||||
EGL_NO_FILE_DESCRIPTOR_KHR)) {
|
||||
printf("EGLStream Init failed.\n");
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
cudaConsumer.cudaDevId = cudaDevIndexCons;
|
||||
curesult = cudaDeviceCreateConsumer(&cudaConsumer);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
cuCtxPushCurrent(cudaConsumer.context);
|
||||
|
||||
launchProducer(&args);
|
||||
|
||||
args.charCnt = WIDTH * HEIGHT * 4;
|
||||
|
||||
curesult = cuda_consumer_init(&cudaConsumer, &args);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer: Init failed, status: %d\n", curesult);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
cuCtxPopCurrent(&cudaConsumer.context);
|
||||
|
||||
send_fd = UnixSocketConnect(SOCK_PATH);
|
||||
if (-1 == send_fd) {
|
||||
printf("%s: Cuda Consumer cannot create socket %s\n", __func__,
|
||||
SOCK_PATH);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
cuCtxPushCurrent(cudaConsumer.context);
|
||||
cudaConsumer.eglStream = g_consumerEglStream;
|
||||
cudaConsumer.eglDisplay = g_consumerEglDisplay;
|
||||
|
||||
// Send the EGL stream FD to producer
|
||||
fileDescriptor = eglGetStreamFileDescriptorKHR(cudaConsumer.eglDisplay,
|
||||
cudaConsumer.eglStream);
|
||||
if (EGL_NO_FILE_DESCRIPTOR_KHR == fileDescriptor) {
|
||||
printf("%s: Cuda Consumer could not get EGL file descriptor.\n",
|
||||
__func__);
|
||||
eglDestroyStreamKHR(cudaConsumer.eglDisplay, cudaConsumer.eglStream);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
if (verbose)
|
||||
printf("%s: Cuda Consumer EGL stream FD obtained : %d.\n", __func__,
|
||||
fileDescriptor);
|
||||
|
||||
int res = -1;
|
||||
res = EGLStreamSendfd(send_fd, fileDescriptor);
|
||||
if (-1 == res) {
|
||||
printf("%s: Cuda Consumer could not send EGL file descriptor.\n",
|
||||
__func__);
|
||||
consumerStatus = -1;
|
||||
close(fileDescriptor);
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS !=
|
||||
(curesult = cuEGLStreamConsumerConnect(&(cudaConsumer.cudaConn),
|
||||
cudaConsumer.eglStream))) {
|
||||
printf("FAILED Connect CUDA consumer with error %d\n", curesult);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
j = 0;
|
||||
for (j = 0; j < NUMTRIALS; j++) {
|
||||
curesult = cudaConsumerAcquireFrame(&cudaConsumer, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
curesult = cudaConsumerReleaseFrame(&cudaConsumer, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
curesult = cudaConsumerAcquireFrame(&cudaConsumer, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
curesult = cudaConsumerReleaseFrame(&cudaConsumer, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Consumer Test failed for frame = %d\n", j + 1);
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
}
|
||||
cuCtxSynchronize();
|
||||
close(fileDescriptor);
|
||||
err = cudaGetValueMismatch();
|
||||
if (err != cudaSuccess) {
|
||||
printf("Consumer: App failed with value mismatch\n");
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
EGLint streamState = 0;
|
||||
if (!eglQueryStreamKHR(cudaConsumer.eglDisplay, cudaConsumer.eglStream,
|
||||
EGL_STREAM_STATE_KHR, &streamState)) {
|
||||
printf("Main, eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
|
||||
if (streamState != EGL_STREAM_STATE_DISCONNECTED_KHR) {
|
||||
if (CUDA_SUCCESS != (curesult = cuda_consumer_Deinit(&cudaConsumer))) {
|
||||
printf("Consumer Disconnect FAILED.\n");
|
||||
consumerStatus = -1;
|
||||
DoneCons(consumerStatus, send_fd);
|
||||
}
|
||||
}
|
||||
} else // Producer
|
||||
{
|
||||
test_cuda_producer_s cudaProducer;
|
||||
memset(&cudaProducer, 0, sizeof(test_cuda_producer_s));
|
||||
cudaProducer.profileAPI = profileAPIs;
|
||||
int producerStatus = 0;
|
||||
|
||||
setenv("CUDA_EGL_PRODUCER_RETURN_WAIT_TIMEOUT", "1600", 0);
|
||||
|
||||
int connect_fd = -1;
|
||||
// Hook up Ctrl-C handler
|
||||
signal(SIGINT, sig_handler);
|
||||
|
||||
// Create connection to Consumer
|
||||
connect_fd = UnixSocketCreate(SOCK_PATH);
|
||||
if (-1 == connect_fd) {
|
||||
printf("%s: Cuda Producer could not create socket: %s.\n", __func__,
|
||||
SOCK_PATH);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
// Get the file descriptor of the stream from the consumer process
|
||||
// and re-create the EGL stream from it
|
||||
fileDescriptor = EGLStreamReceivefd(connect_fd);
|
||||
if (-1 == fileDescriptor) {
|
||||
printf("%s: Cuda Producer could not receive EGL file descriptor \n",
|
||||
__func__);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
if (!EGLStreamInit(isCrossDevice, 0, fileDescriptor)) {
|
||||
printf("EGLStream Init failed.\n");
|
||||
producerStatus = -1;
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
cudaProducer.eglDisplay = g_producerEglDisplay;
|
||||
cudaProducer.eglStream = g_producerEglStream;
|
||||
cudaProducer.cudaDevId = cudaDevIndexProd;
|
||||
|
||||
curesult = cudaDeviceCreateProducer(&cudaProducer);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
args.charCnt = WIDTH * HEIGHT * 4;
|
||||
cuCtxPushCurrent(cudaProducer.context);
|
||||
curesult = cudaProducerInit(&cudaProducer, &args);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer: Init failed, status: %d\n", curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
// wait for consumer to connect first
|
||||
int err = 0;
|
||||
int wait_loop = 0;
|
||||
EGLint streamState = 0;
|
||||
do {
|
||||
err = eglQueryStreamKHR(cudaProducer.eglDisplay, cudaProducer.eglStream,
|
||||
EGL_STREAM_STATE_KHR, &streamState);
|
||||
if ((0 != err) && (EGL_STREAM_STATE_CONNECTING_KHR != streamState)) {
|
||||
sleep(1);
|
||||
wait_loop++;
|
||||
}
|
||||
} while ((wait_loop < 10) && (0 != err) &&
|
||||
(streamState != EGL_STREAM_STATE_CONNECTING_KHR));
|
||||
|
||||
if ((0 == err) || (wait_loop >= 10)) {
|
||||
printf(
|
||||
"%s: Cuda Producer eglQueryStreamKHR EGL_STREAM_STATE_KHR failed.\n",
|
||||
__func__);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
if (CUDA_SUCCESS != (curesult = cuEGLStreamProducerConnect(
|
||||
&(cudaProducer.cudaConn), cudaProducer.eglStream,
|
||||
WIDTH, HEIGHT))) {
|
||||
printf("Connect CUDA producer FAILED with error %d\n", curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
printf("main - Cuda Producer and Consumer Initialized.\n");
|
||||
|
||||
cudaProducerPrepareFrame(&cudaEgl1, cudaProducer.cudaPtr, args.charCnt);
|
||||
cudaProducerPrepareFrame(&cudaEgl2, cudaProducer.cudaPtr1, args.charCnt);
|
||||
|
||||
j = 0;
|
||||
for (j = 0; j < NUMTRIALS; j++) {
|
||||
curesult = cudaProducerPresentFrame(&cudaProducer, cudaEgl1, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
|
||||
j + 1, curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
curesult = cudaProducerPresentFrame(&cudaProducer, cudaEgl2, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
|
||||
j + 1, curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
curesult = cudaProducerReturnFrame(&cudaProducer, cudaEgl1, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
|
||||
j + 1, curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
curesult = cudaProducerReturnFrame(&cudaProducer, cudaEgl2, j);
|
||||
if (curesult != CUDA_SUCCESS) {
|
||||
printf("Cuda Producer Test failed for frame = %d with cuda error:%d\n",
|
||||
j + 1, curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
}
|
||||
|
||||
cuCtxSynchronize();
|
||||
err = cudaGetValueMismatch();
|
||||
if (err != cudaSuccess) {
|
||||
printf("Prod: App failed with value mismatch\n");
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
printf("Tear Down Start.....\n");
|
||||
if (!eglQueryStreamKHR(cudaProducer.eglDisplay, cudaProducer.eglStream,
|
||||
EGL_STREAM_STATE_KHR, &streamState)) {
|
||||
printf("Main, eglQueryStreamKHR EGL_STREAM_STATE_KHR failed\n");
|
||||
curesult = CUDA_ERROR_UNKNOWN;
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
|
||||
if (streamState != EGL_STREAM_STATE_DISCONNECTED_KHR) {
|
||||
if (CUDA_SUCCESS != (curesult = cudaProducerDeinit(&cudaProducer))) {
|
||||
printf("Producer Disconnect FAILED with %d\n", curesult);
|
||||
producerStatus = -1;
|
||||
DoneProd(producerStatus, connect_fd);
|
||||
}
|
||||
}
|
||||
unsetenv("CUDA_EGL_PRODUCER_RETURN_WAIT_TIMEOUT");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
18
Samples/EGLStreams_CUDA_Interop/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/EGLStreams_CUDA_Interop/.vscode/c_cpp_properties.json
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
7
Samples/EGLStreams_CUDA_Interop/.vscode/extensions.json
vendored
Normal file
7
Samples/EGLStreams_CUDA_Interop/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
10
Samples/EGLStreams_CUDA_Interop/.vscode/launch.json
vendored
Normal file
10
Samples/EGLStreams_CUDA_Interop/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/EGLStream_CUDA_Interop"
|
||||
}
|
||||
]
|
||||
}
|
15
Samples/EGLStreams_CUDA_Interop/.vscode/tasks.json
vendored
Normal file
15
Samples/EGLStreams_CUDA_Interop/.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
433
Samples/EGLStreams_CUDA_Interop/Makefile
Normal file
433
Samples/EGLStreams_CUDA_Interop/Makefile
Normal file
|
@ -0,0 +1,433 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += --qpp-config 5.4.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on ARMv7
|
||||
ifeq ($(TARGET_ARCH),armv7l)
|
||||
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on ARMv7 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on android
|
||||
ifeq ($(TARGET_OS),android)
|
||||
$(info >>> WARNING - EGLStream_CUDA_Interop is not supported on android - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Makefile include to help find EGL Libraries
|
||||
include ./findegl.mk
|
||||
|
||||
# EGL specific libraries
|
||||
ifneq ($(TARGET_OS),darwin)
|
||||
LIBRARIES += -lEGL
|
||||
endif
|
||||
|
||||
#Detect if installed version of GCC supports required C++11
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
empty :=
|
||||
space := $(empty) $(empty)
|
||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
||||
#Create version number without "."
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
||||
# Make sure the version number has at least 3 decimals
|
||||
GCCVERSION += 00
|
||||
# Remove spaces from the version number
|
||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
||||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 51000)
|
||||
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 5.1.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required is 5.1.0<<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
||||
else
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
||||
ifdef TARGET_OVERRIDE
|
||||
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(HOST_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
|
||||
endif
|
||||
|
||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
||||
ifeq ("$(CUDALIB)","")
|
||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
else
|
||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: EGLStream_CUDA_Interop
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
cuda_consumer.o:cuda_consumer.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
cuda_producer.o:cuda_producer.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
eglstrm_common.o:eglstrm_common.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
main.o:main.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
EGLStream_CUDA_Interop: cuda_consumer.o cuda_producer.o eglstrm_common.o main.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./EGLStream_CUDA_Interop
|
||||
|
||||
clean:
|
||||
rm -f EGLStream_CUDA_Interop cuda_consumer.o cuda_producer.o eglstrm_common.o main.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/EGLStream_CUDA_Interop
|
||||
|
||||
clobber: clean
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user