mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 17:29:18 +08:00
Add and update samples for cuda 10.2 support
This commit is contained in:
parent
489d9f7b1f
commit
6be514679b
160
Common/dynlink_d3d11.h
Normal file
160
Common/dynlink_d3d11.h
Normal file
|
@ -0,0 +1,160 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//--------------------------------------------------------------------------------------
|
||||
// File: dynlink_d3d11.h
|
||||
//
|
||||
// Shortcut macros and functions for using DX objects
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved
|
||||
//--------------------------------------------------------------------------------------
|
||||
|
||||
#ifndef _DYNLINK_D3D11_H_
|
||||
#define _DYNLINK_D3D11_H_
|
||||
|
||||
// Standard Windows includes
|
||||
#include <windows.h>
|
||||
#include <initguid.h>
|
||||
#include <assert.h>
|
||||
#include <wchar.h>
|
||||
#include <mmsystem.h>
|
||||
#include <commctrl.h> // for InitCommonControls()
|
||||
#include <shellapi.h> // for ExtractIcon()
|
||||
#include <new.h> // for placement new
|
||||
#include <shlobj.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// CRT's memory leak detection
|
||||
#if defined(DEBUG) || defined(_DEBUG)
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
|
||||
// Direct3D10 includes
|
||||
#include <dxgi.h>
|
||||
#include <d3d11.h>
|
||||
// #include <..\Samples\C++\Effects11\Inc\d3dx11effect.h>
|
||||
|
||||
// XInput includes
|
||||
#include <xinput.h>
|
||||
|
||||
// strsafe.h deprecates old unsecure string functions. If you
|
||||
// really do not want to it to (not recommended), then uncomment the next line
|
||||
//#define STRSAFE_NO_DEPRECATE
|
||||
|
||||
#ifndef STRSAFE_NO_DEPRECATE
|
||||
#pragma deprecated("strncpy")
|
||||
#pragma deprecated("wcsncpy")
|
||||
#pragma deprecated("_tcsncpy")
|
||||
#pragma deprecated("wcsncat")
|
||||
#pragma deprecated("strncat")
|
||||
#pragma deprecated("_tcsncat")
|
||||
#endif
|
||||
|
||||
#pragma warning( disable : 4996 ) // disable deprecated warning
|
||||
#include <strsafe.h>
|
||||
#pragma warning( default : 4996 )
|
||||
|
||||
typedef HRESULT(WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
|
||||
typedef HRESULT(WINAPI *LPD3D11CREATEDEVICEANDSWAPCHAIN)(__in_opt IDXGIAdapter *pAdapter, D3D_DRIVER_TYPE DriverType, HMODULE Software, UINT Flags, __in_ecount_opt(FeatureLevels) CONST D3D_FEATURE_LEVEL *pFeatureLevels, UINT FeatureLevels, UINT SDKVersion, __in_opt CONST DXGI_SWAP_CHAIN_DESC *pSwapChainDesc, __out_opt IDXGISwapChain **ppSwapChain, __out_opt ID3D11Device **ppDevice, __out_opt D3D_FEATURE_LEVEL *pFeatureLevel, __out_opt ID3D11DeviceContext **ppImmediateContext);
|
||||
typedef HRESULT(WINAPI *LPD3D11CREATEDEVICE)(IDXGIAdapter *, D3D_DRIVER_TYPE, HMODULE, UINT32, D3D_FEATURE_LEVEL *, UINT, UINT32, ID3D11Device **, D3D_FEATURE_LEVEL *, ID3D11DeviceContext **);
|
||||
|
||||
static HMODULE s_hModDXGI = NULL;
|
||||
static LPCREATEDXGIFACTORY sFnPtr_CreateDXGIFactory = NULL;
|
||||
static HMODULE s_hModD3D11 = NULL;
|
||||
static LPD3D11CREATEDEVICE sFnPtr_D3D11CreateDevice = NULL;
|
||||
static LPD3D11CREATEDEVICEANDSWAPCHAIN sFnPtr_D3D11CreateDeviceAndSwapChain = NULL;
|
||||
|
||||
// unload the D3D10 DLLs
|
||||
static bool dynlinkUnloadD3D11API(void)
|
||||
{
|
||||
if (s_hModDXGI)
|
||||
{
|
||||
FreeLibrary(s_hModDXGI);
|
||||
s_hModDXGI = NULL;
|
||||
}
|
||||
|
||||
if (s_hModD3D11)
|
||||
{
|
||||
FreeLibrary(s_hModD3D11);
|
||||
s_hModD3D11 = NULL;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Dynamically load the D3D11 DLLs loaded and map the function pointers
|
||||
static bool dynlinkLoadD3D11API(void)
|
||||
{
|
||||
// If both modules are non-NULL, this function has already been called. Note
|
||||
// that this doesn't guarantee that all ProcAddresses were found.
|
||||
if (s_hModD3D11 != NULL && s_hModDXGI != NULL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#if 1
|
||||
// This may fail if Direct3D 11 isn't installed
|
||||
s_hModD3D11 = LoadLibrary("d3d11.dll");
|
||||
|
||||
if (s_hModD3D11 != NULL)
|
||||
{
|
||||
sFnPtr_D3D11CreateDevice = (LPD3D11CREATEDEVICE)GetProcAddress(s_hModD3D11, "D3D11CreateDevice");
|
||||
sFnPtr_D3D11CreateDeviceAndSwapChain = (LPD3D11CREATEDEVICEANDSWAPCHAIN)GetProcAddress(s_hModD3D11, "D3D11CreateDeviceAndSwapChain");
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("\nLoad d3d11.dll failed\n");
|
||||
fflush(0);
|
||||
}
|
||||
|
||||
if (!sFnPtr_CreateDXGIFactory)
|
||||
{
|
||||
s_hModDXGI = LoadLibrary("dxgi.dll");
|
||||
|
||||
if (s_hModDXGI)
|
||||
{
|
||||
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY)GetProcAddress(s_hModDXGI, "CreateDXGIFactory1");
|
||||
}
|
||||
|
||||
return (s_hModDXGI != NULL) && (s_hModD3D11 != NULL);
|
||||
}
|
||||
|
||||
return (s_hModD3D11 != NULL);
|
||||
#else
|
||||
sFnPtr_D3D11CreateDevice = (LPD3D11CREATEDEVICE)D3D11CreateDeviceAndSwapChain;
|
||||
sFnPtr_D3D11CreateDeviceAndSwapChain = (LPD3D11CREATEDEVICEANDSWAPCHAIN)D3D11CreateDeviceAndSwapChain;
|
||||
//sFnPtr_D3DX11CreateEffectFromMemory = ( LPD3DX11CREATEEFFECTFROMMEMORY )D3DX11CreateEffectFromMemory;
|
||||
sFnPtr_D3DX11CompileFromMemory = (LPD3DX11COMPILEFROMMEMORY)D3DX11CompileFromMemory;
|
||||
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY)CreateDXGIFactory;
|
||||
return true;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -579,24 +579,12 @@ static const char *_cudaGetErrorEnum(NppStatus error) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DRIVER_TYPES_H__
|
||||
#ifndef DEVICE_RESET
|
||||
#define DEVICE_RESET cudaDeviceReset();
|
||||
#endif
|
||||
#else
|
||||
#ifndef DEVICE_RESET
|
||||
#define DEVICE_RESET
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
void check(T result, char const *const func, const char *const file,
|
||||
int const line) {
|
||||
if (result) {
|
||||
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
|
||||
static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
|
||||
DEVICE_RESET
|
||||
// Make sure we call CUDA Device Reset before exiting
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
@ -619,7 +607,6 @@ inline void __getLastCudaError(const char *errorMessage, const char *file,
|
|||
" %s : (%d) %s.\n",
|
||||
file, line, errorMessage, static_cast<int>(err),
|
||||
cudaGetErrorString(err));
|
||||
DEVICE_RESET
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
@ -696,6 +683,50 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
|
||||
return nGpuArchCoresPerSM[index - 1].Cores;
|
||||
}
|
||||
|
||||
inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
||||
// Defines for GPU Architecture types (using the SM version to determine
|
||||
// the GPU Arch name)
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
|
||||
// and m = SM minor version
|
||||
const char* name;
|
||||
} sSMtoArchName;
|
||||
|
||||
sSMtoArchName nGpuArchNameSM[] = {
|
||||
{0x30, "Kepler"},
|
||||
{0x32, "Kepler"},
|
||||
{0x35, "Kepler"},
|
||||
{0x37, "Kepler"},
|
||||
{0x50, "Maxwell"},
|
||||
{0x52, "Maxwell"},
|
||||
{0x53, "Maxwell"},
|
||||
{0x60, "Pascal"},
|
||||
{0x61, "Pascal"},
|
||||
{0x62, "Pascal"},
|
||||
{0x70, "Volta"},
|
||||
{0x72, "Xavier"},
|
||||
{0x75, "Turing"},
|
||||
{-1, "Graphics Device"}};
|
||||
|
||||
int index = 0;
|
||||
|
||||
while (nGpuArchNameSM[index].SM != -1) {
|
||||
if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
|
||||
return nGpuArchNameSM[index].name;
|
||||
}
|
||||
|
||||
index++;
|
||||
}
|
||||
|
||||
// If we don't find the values, we default use the previous one
|
||||
// to run properly
|
||||
printf(
|
||||
"MapSMtoArchName for SM %d.%d is undefined."
|
||||
" Default to use %s\n",
|
||||
major, minor, nGpuArchNameSM[index - 1].name);
|
||||
return nGpuArchNameSM[index - 1].name;
|
||||
}
|
||||
// end of GPU Architecture definitions
|
||||
|
||||
#ifdef __CUDA_RUNTIME_H__
|
||||
|
@ -727,23 +758,24 @@ inline int gpuDeviceInit(int devID) {
|
|||
return -devID;
|
||||
}
|
||||
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
|
||||
|
||||
if (deviceProp.computeMode == cudaComputeModeProhibited) {
|
||||
int computeMode = -1, major = 0, minor = 0;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
|
||||
if (computeMode == cudaComputeModeProhibited) {
|
||||
fprintf(stderr,
|
||||
"Error: device is running in <Compute Mode "
|
||||
"Prohibited>, no threads can use cudaSetDevice().\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (deviceProp.major < 1) {
|
||||
if (major < 1) {
|
||||
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaSetDevice(devID));
|
||||
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
|
||||
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
|
||||
|
||||
return devID;
|
||||
}
|
||||
|
@ -756,7 +788,6 @@ inline int gpuGetMaxGflopsDeviceId() {
|
|||
int devices_prohibited = 0;
|
||||
|
||||
uint64_t max_compute_perf = 0;
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0) {
|
||||
|
@ -770,20 +801,24 @@ inline int gpuGetMaxGflopsDeviceId() {
|
|||
current_device = 0;
|
||||
|
||||
while (current_device < device_count) {
|
||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
||||
int computeMode = -1, major = 0, minor = 0;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
|
||||
|
||||
// If this GPU is not running on Compute Mode prohibited,
|
||||
// then we can add it to the list
|
||||
if (deviceProp.computeMode != cudaComputeModeProhibited) {
|
||||
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
|
||||
if (computeMode != cudaComputeModeProhibited) {
|
||||
if (major == 9999 && minor == 9999) {
|
||||
sm_per_multiproc = 1;
|
||||
} else {
|
||||
sm_per_multiproc =
|
||||
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
|
||||
_ConvertSMVer2Cores(major, minor);
|
||||
}
|
||||
|
||||
uint64_t compute_perf = (uint64_t)deviceProp.multiProcessorCount *
|
||||
sm_per_multiproc * deviceProp.clockRate;
|
||||
int multiProcessorCount = 0, clockRate = 0;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device));
|
||||
uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
|
||||
|
||||
if (compute_perf > max_compute_perf) {
|
||||
max_compute_perf = compute_perf;
|
||||
|
@ -808,7 +843,6 @@ inline int gpuGetMaxGflopsDeviceId() {
|
|||
|
||||
// Initialization code to find the best CUDA Device
|
||||
inline int findCudaDevice(int argc, const char **argv) {
|
||||
cudaDeviceProp deviceProp;
|
||||
int devID = 0;
|
||||
|
||||
// If the command-line has a device number specified, use it
|
||||
|
@ -830,9 +864,12 @@ inline int findCudaDevice(int argc, const char **argv) {
|
|||
// Otherwise pick the device with highest Gflops/s
|
||||
devID = gpuGetMaxGflopsDeviceId();
|
||||
checkCudaErrors(cudaSetDevice(devID));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
|
||||
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
|
||||
deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||
int major = 0, minor = 0;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
|
||||
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
|
||||
devID, _ConvertSMVer2ArchName(major, minor), major, minor);
|
||||
|
||||
}
|
||||
|
||||
return devID;
|
||||
|
@ -843,7 +880,6 @@ inline int findIntegratedGPU() {
|
|||
int device_count = 0;
|
||||
int devices_prohibited = 0;
|
||||
|
||||
cudaDeviceProp deviceProp;
|
||||
checkCudaErrors(cudaGetDeviceCount(&device_count));
|
||||
|
||||
if (device_count == 0) {
|
||||
|
@ -853,17 +889,19 @@ inline int findIntegratedGPU() {
|
|||
|
||||
// Find the integrated GPU which is compute capable
|
||||
while (current_device < device_count) {
|
||||
cudaGetDeviceProperties(&deviceProp, current_device);
|
||||
|
||||
int computeMode = -1, integrated = -1;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
|
||||
// If GPU is integrated and is not running on Compute Mode prohibited,
|
||||
// then cuda can map to GLES resource
|
||||
if (deviceProp.integrated &&
|
||||
(deviceProp.computeMode != cudaComputeModeProhibited)) {
|
||||
if (integrated && (computeMode != cudaComputeModeProhibited)) {
|
||||
checkCudaErrors(cudaSetDevice(current_device));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
|
||||
|
||||
int major = 0, minor = 0;
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
|
||||
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
|
||||
current_device, deviceProp.name, deviceProp.major,
|
||||
deviceProp.minor);
|
||||
current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
|
||||
|
||||
return current_device;
|
||||
} else {
|
||||
|
@ -885,19 +923,18 @@ inline int findIntegratedGPU() {
|
|||
|
||||
// General check for CUDA GPU SM Capabilities
|
||||
inline bool checkCudaCapabilities(int major_version, int minor_version) {
|
||||
cudaDeviceProp deviceProp;
|
||||
deviceProp.major = 0;
|
||||
deviceProp.minor = 0;
|
||||
int dev;
|
||||
int major = 0, minor = 0;
|
||||
|
||||
checkCudaErrors(cudaGetDevice(&dev));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
|
||||
|
||||
if ((deviceProp.major > major_version) ||
|
||||
(deviceProp.major == major_version &&
|
||||
deviceProp.minor >= minor_version)) {
|
||||
if ((major > major_version) ||
|
||||
(major == major_version &&
|
||||
minor >= minor_version)) {
|
||||
printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
|
||||
deviceProp.name, deviceProp.major, deviceProp.minor);
|
||||
_ConvertSMVer2ArchName(major, minor), major, minor);
|
||||
return true;
|
||||
} else {
|
||||
printf(
|
||||
|
|
|
@ -29,16 +29,11 @@
|
|||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info)
|
||||
{
|
||||
int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) {
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
info->size = sz;
|
||||
info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
|
||||
NULL,
|
||||
PAGE_READWRITE,
|
||||
0,
|
||||
(DWORD)sz,
|
||||
name);
|
||||
info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
|
||||
PAGE_READWRITE, 0, (DWORD)sz, name);
|
||||
if (info->shmHandle == 0) {
|
||||
return GetLastError();
|
||||
}
|
||||
|
@ -73,8 +68,7 @@ int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info)
|
|||
#endif
|
||||
}
|
||||
|
||||
int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info)
|
||||
{
|
||||
int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info) {
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
info->size = sz;
|
||||
|
||||
|
@ -106,8 +100,7 @@ int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info)
|
|||
#endif
|
||||
}
|
||||
|
||||
void sharedMemoryClose(sharedMemoryInfo *info)
|
||||
{
|
||||
void sharedMemoryClose(sharedMemoryInfo *info) {
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
if (info->addr) {
|
||||
UnmapViewOfFile(info->addr);
|
||||
|
@ -125,8 +118,7 @@ void sharedMemoryClose(sharedMemoryInfo *info)
|
|||
#endif
|
||||
}
|
||||
|
||||
int spawnProcess(Process *process, const char *app, char * const *args)
|
||||
{
|
||||
int spawnProcess(Process *process, const char *app, char *const *args) {
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
STARTUPINFO si = {0};
|
||||
BOOL status;
|
||||
|
@ -140,7 +132,8 @@ int spawnProcess(Process *process, const char *app, char * const *args)
|
|||
args++;
|
||||
}
|
||||
|
||||
status = CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0, NULL, NULL, &si, process);
|
||||
status = CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0,
|
||||
NULL, NULL, &si, process);
|
||||
|
||||
return status ? 0 : GetLastError();
|
||||
#else
|
||||
|
@ -149,16 +142,14 @@ int spawnProcess(Process *process, const char *app, char * const *args)
|
|||
if (0 > execvp(app, args)) {
|
||||
return errno;
|
||||
}
|
||||
}
|
||||
else if (*process < 0) {
|
||||
} else if (*process < 0) {
|
||||
return errno;
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int waitProcess(Process *process)
|
||||
{
|
||||
int waitProcess(Process *process) {
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
DWORD exitCode;
|
||||
WaitForSingleObject(process->hProcess, INFINITE);
|
||||
|
@ -176,3 +167,396 @@ int waitProcess(Process *process)
|
|||
return WEXITSTATUS(status);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
int ipcCreateSocket(ipcHandle *&handle, const char *name,
|
||||
const std::vector<Process> &processes) {
|
||||
int server_fd;
|
||||
struct sockaddr_un servaddr;
|
||||
|
||||
handle = new ipcHandle;
|
||||
memset(handle, 0, sizeof(*handle));
|
||||
handle->socket = -1;
|
||||
handle->socketName = NULL;
|
||||
|
||||
// Creating socket file descriptor
|
||||
if ((server_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == 0) {
|
||||
perror("IPC failure: Socket creation failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
unlink(name);
|
||||
bzero(&servaddr, sizeof(servaddr));
|
||||
servaddr.sun_family = AF_UNIX;
|
||||
|
||||
size_t len = strlen(name);
|
||||
if (len > (sizeof(servaddr.sun_path) - 1)) {
|
||||
perror("IPC failure: Cannot bind provided name to socket. Name too large");
|
||||
return -1;
|
||||
}
|
||||
|
||||
strncpy(servaddr.sun_path, name, len);
|
||||
|
||||
if (bind(server_fd, (struct sockaddr *)&servaddr, SUN_LEN(&servaddr)) < 0) {
|
||||
perror("IPC failure: Binding socket failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
handle->socketName = new char[strlen(name) + 1];
|
||||
strcpy(handle->socketName, name);
|
||||
handle->socket = server_fd;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcOpenSocket(ipcHandle *&handle) {
|
||||
int sock = 0;
|
||||
struct sockaddr_un cliaddr;
|
||||
|
||||
handle = new ipcHandle;
|
||||
memset(handle, 0, sizeof(*handle));
|
||||
|
||||
if ((sock = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
|
||||
perror("IPC failure:Socket creation error");
|
||||
return -1;
|
||||
}
|
||||
|
||||
bzero(&cliaddr, sizeof(cliaddr));
|
||||
cliaddr.sun_family = AF_UNIX;
|
||||
char temp[10];
|
||||
|
||||
// Create unique name for the socket.
|
||||
sprintf(temp, "%u", getpid());
|
||||
|
||||
strcpy(cliaddr.sun_path, temp);
|
||||
if (bind(sock, (struct sockaddr *)&cliaddr, sizeof(cliaddr)) < 0) {
|
||||
perror("IPC failure: Binding socket failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
handle->socket = sock;
|
||||
handle->socketName = new char[strlen(temp) + 1];
|
||||
strcpy(handle->socketName, temp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcCloseSocket(ipcHandle *handle) {
|
||||
if (!handle) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (handle->socketName) {
|
||||
unlink(handle->socketName);
|
||||
delete[] handle->socketName;
|
||||
}
|
||||
close(handle->socket);
|
||||
delete handle;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcRecvShareableHandle(ipcHandle *handle, ShareableHandle *shHandle) {
|
||||
struct msghdr msg = {0};
|
||||
struct iovec iov[1];
|
||||
struct cmsghdr cm;
|
||||
|
||||
// Union to guarantee alignment requirements for control array
|
||||
union {
|
||||
struct cmsghdr cm;
|
||||
char control[CMSG_SPACE(sizeof(int))];
|
||||
} control_un;
|
||||
|
||||
struct cmsghdr *cmptr;
|
||||
ssize_t n;
|
||||
int receivedfd;
|
||||
char dummy_buffer[1];
|
||||
ssize_t sendResult;
|
||||
|
||||
msg.msg_control = control_un.control;
|
||||
msg.msg_controllen = sizeof(control_un.control);
|
||||
|
||||
iov[0].iov_base = (void *)dummy_buffer;
|
||||
iov[0].iov_len = sizeof(dummy_buffer);
|
||||
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
if ((n = recvmsg(handle->socket, &msg, 0)) <= 0) {
|
||||
perror("IPC failure: Receiving data over socket failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) &&
|
||||
(cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) {
|
||||
if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd));
|
||||
*(int *)shHandle = receivedfd;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcRecvDataFromClient(ipcHandle *serverHandle, void *data, size_t size) {
|
||||
ssize_t readResult;
|
||||
struct sockaddr_un cliaddr;
|
||||
socklen_t len = sizeof(cliaddr);
|
||||
|
||||
readResult = recvfrom(serverHandle->socket, data, size, 0,
|
||||
(struct sockaddr *)&cliaddr, &len);
|
||||
if (readResult == -1) {
|
||||
perror("IPC failure: Receiving data over socket failed");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcSendDataToServer(ipcHandle *handle, const char *serverName,
|
||||
const void *data, size_t size) {
|
||||
ssize_t sendResult;
|
||||
struct sockaddr_un serveraddr;
|
||||
|
||||
bzero(&serveraddr, sizeof(serveraddr));
|
||||
serveraddr.sun_family = AF_UNIX;
|
||||
strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1);
|
||||
|
||||
sendResult = sendto(handle->socket, data, size, 0,
|
||||
(struct sockaddr *)&serveraddr, sizeof(serveraddr));
|
||||
if (sendResult <= 0) {
|
||||
perror("IPC failure: Sending data over socket failed");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcSendShareableHandle(ipcHandle *handle,
|
||||
const std::vector<ShareableHandle> &shareableHandles,
|
||||
Process process, int data) {
|
||||
struct msghdr msg;
|
||||
struct iovec iov[1];
|
||||
|
||||
union {
|
||||
struct cmsghdr cm;
|
||||
char control[CMSG_SPACE(sizeof(int))];
|
||||
} control_un;
|
||||
|
||||
struct cmsghdr *cmptr;
|
||||
ssize_t readResult;
|
||||
struct sockaddr_un cliaddr;
|
||||
socklen_t len = sizeof(cliaddr);
|
||||
|
||||
// Construct client address to send this SHareable handle to
|
||||
bzero(&cliaddr, sizeof(cliaddr));
|
||||
cliaddr.sun_family = AF_UNIX;
|
||||
char temp[10];
|
||||
sprintf(temp, "%u", process);
|
||||
strcpy(cliaddr.sun_path, temp);
|
||||
len = sizeof(cliaddr);
|
||||
|
||||
// Send corresponding shareable handle to the client
|
||||
int sendfd = (int)shareableHandles[data];
|
||||
|
||||
msg.msg_control = control_un.control;
|
||||
msg.msg_controllen = sizeof(control_un.control);
|
||||
|
||||
cmptr = CMSG_FIRSTHDR(&msg);
|
||||
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
|
||||
cmptr->cmsg_level = SOL_SOCKET;
|
||||
cmptr->cmsg_type = SCM_RIGHTS;
|
||||
|
||||
memmove(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd));
|
||||
|
||||
msg.msg_name = (void *)&cliaddr;
|
||||
msg.msg_namelen = sizeof(struct sockaddr_un);
|
||||
|
||||
iov[0].iov_base = (void *)"";
|
||||
iov[0].iov_len = 1;
|
||||
msg.msg_iov = iov;
|
||||
msg.msg_iovlen = 1;
|
||||
|
||||
ssize_t sendResult = sendmsg(handle->socket, &msg, 0);
|
||||
if (sendResult <= 0) {
|
||||
perror("IPC failure: Sending data over socket failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcSendShareableHandles(
|
||||
ipcHandle *handle, const std::vector<ShareableHandle> &shareableHandles,
|
||||
const std::vector<Process> &processes) {
|
||||
// Send all shareable handles to every single process.
|
||||
for (int i = 0; i < shareableHandles.size(); i++) {
|
||||
for (int j = 0; j < processes.size(); j++) {
|
||||
checkIpcErrors(
|
||||
ipcSendShareableHandle(handle, shareableHandles, processes[j], i));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcRecvShareableHandles(ipcHandle *handle,
|
||||
std::vector<ShareableHandle> &shareableHandles) {
|
||||
for (int i = 0; i < shareableHandles.size(); i++) {
|
||||
checkIpcErrors(ipcRecvShareableHandle(handle, &shareableHandles[i]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcCloseShareableHandle(ShareableHandle shHandle) {
|
||||
return close(shHandle);
|
||||
}
|
||||
|
||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
// Generic name to build individual Mailslot names by appending process ids.
|
||||
LPTSTR SlotName = (LPTSTR)TEXT("\\\\.\\mailslot\\sample_mailslot_");
|
||||
|
||||
int ipcCreateSocket(ipcHandle *&handle, const char *name,
|
||||
const std::vector<Process> &processes) {
|
||||
handle = new ipcHandle;
|
||||
handle->hMailslot.resize(processes.size());
|
||||
|
||||
// Open Mailslots of all clients and store respective handles.
|
||||
for (int i = 0; i < handle->hMailslot.size(); ++i) {
|
||||
std::basic_string<TCHAR> childSlotName(SlotName);
|
||||
char tempBuf[20];
|
||||
_itoa_s(processes[i].dwProcessId, tempBuf, 10);
|
||||
childSlotName += TEXT(tempBuf);
|
||||
|
||||
HANDLE hFile =
|
||||
CreateFile(TEXT(childSlotName.c_str()), GENERIC_WRITE, FILE_SHARE_READ,
|
||||
(LPSECURITY_ATTRIBUTES)NULL, OPEN_EXISTING,
|
||||
FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
printf("IPC failure: Opening Mailslot by CreateFile failed with %d\n",
|
||||
GetLastError());
|
||||
return -1;
|
||||
}
|
||||
handle->hMailslot[i] = hFile;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcOpenSocket(ipcHandle *&handle) {
|
||||
handle = new ipcHandle;
|
||||
HANDLE hSlot;
|
||||
|
||||
std::basic_string<TCHAR> clientSlotName(SlotName);
|
||||
char tempBuf[20];
|
||||
_itoa_s(GetCurrentProcessId(), tempBuf, 10);
|
||||
clientSlotName += TEXT(tempBuf);
|
||||
|
||||
hSlot = CreateMailslot((LPSTR)clientSlotName.c_str(), 0,
|
||||
MAILSLOT_WAIT_FOREVER, (LPSECURITY_ATTRIBUTES)NULL);
|
||||
if (hSlot == INVALID_HANDLE_VALUE) {
|
||||
printf("IPC failure: CreateMailslot failed for client with %d\n",
|
||||
GetLastError());
|
||||
return -1;
|
||||
}
|
||||
|
||||
handle->hMailslot.push_back(hSlot);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcSendData(HANDLE mailslot, const void *data, size_t sz) {
|
||||
BOOL result;
|
||||
DWORD cbWritten;
|
||||
|
||||
result = WriteFile(mailslot, data, (DWORD)sz, &cbWritten, (LPOVERLAPPED)NULL);
|
||||
if (!result) {
|
||||
printf("IPC failure: WriteFile failed with %d.\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcRecvData(ipcHandle *handle, void *data, size_t sz) {
|
||||
DWORD cbMessage, cMessage, cbRead;
|
||||
BOOL fResult;
|
||||
|
||||
cbMessage = cMessage = cbRead = 0;
|
||||
HANDLE mailslot = handle->hMailslot[0];
|
||||
|
||||
pollMailSlot:
|
||||
fResult = GetMailslotInfo(mailslot, (LPDWORD)NULL, &cbMessage, &cMessage,
|
||||
(LPDWORD)NULL);
|
||||
if (!fResult) {
|
||||
printf("IPC failure: GetMailslotInfo failed with %d.\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (cbMessage == MAILSLOT_NO_MESSAGE) {
|
||||
goto pollMailSlot;
|
||||
}
|
||||
|
||||
while (cMessage != 0) {
|
||||
fResult = ReadFile(mailslot, data, (DWORD)sz, &cbRead, NULL);
|
||||
if (!fResult) {
|
||||
printf("IPC failure: ReadFile failed with %d.\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
|
||||
fResult = GetMailslotInfo(mailslot, (LPDWORD)NULL, &cbMessage, &cMessage,
|
||||
(LPDWORD)NULL);
|
||||
if (!fResult) {
|
||||
printf("IPC failure: GetMailslotInfo failed (%d)\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcSendShareableHandles(
|
||||
ipcHandle *handle, const std::vector<ShareableHandle> &shareableHandles,
|
||||
const std::vector<Process> &processes) {
|
||||
// Send all shareable handles to every single process.
|
||||
for (int i = 0; i < processes.size(); i++) {
|
||||
HANDLE hProcess =
|
||||
OpenProcess(PROCESS_DUP_HANDLE, FALSE, processes[i].dwProcessId);
|
||||
if (hProcess == INVALID_HANDLE_VALUE) {
|
||||
printf("IPC failure: OpenProcess failed (%d)\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (int j = 0; j < shareableHandles.size(); j++) {
|
||||
HANDLE hDup = INVALID_HANDLE_VALUE;
|
||||
// Duplicate the handle into the target process's space
|
||||
if (!DuplicateHandle(GetCurrentProcess(), shareableHandles[j], hProcess,
|
||||
&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS)) {
|
||||
printf("IPC failure: DuplicateHandle failed (%d)\n", GetLastError());
|
||||
return -1;
|
||||
}
|
||||
checkIpcErrors(ipcSendData(handle->hMailslot[i], &hDup, sizeof(HANDLE)));
|
||||
}
|
||||
CloseHandle(hProcess);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcRecvShareableHandles(ipcHandle *handle,
|
||||
std::vector<ShareableHandle> &shareableHandles) {
|
||||
for (int i = 0; i < shareableHandles.size(); i++) {
|
||||
checkIpcErrors(
|
||||
ipcRecvData(handle, &shareableHandles[i], sizeof(shareableHandles[i])));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcCloseSocket(ipcHandle *handle) {
|
||||
for (int i = 0; i < handle->hMailslot.size(); i++) {
|
||||
CloseHandle(handle->hMailslot[i]);
|
||||
}
|
||||
delete handle;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ipcCloseShareableHandle(ShareableHandle shHandle) {
|
||||
CloseHandle(shHandle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -33,13 +33,26 @@
|
|||
#define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <tchar.h>
|
||||
#include <strsafe.h>
|
||||
#include <sddl.h>
|
||||
#include <aclapi.h>
|
||||
#include <winternl.h>
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <memory.h>
|
||||
#include <sys/un.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
|
||||
typedef struct sharedMemoryInfo_st {
|
||||
void *addr;
|
||||
|
@ -68,4 +81,40 @@ int spawnProcess(Process *process, const char *app, char * const *args);
|
|||
|
||||
int waitProcess(Process *process);
|
||||
|
||||
#define checkIpcErrors(ipcFuncResult) \
|
||||
if (ipcFuncResult == -1) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(EXIT_FAILURE); }
|
||||
|
||||
#if defined(__linux__)
|
||||
struct ipcHandle_st {
|
||||
int socket;
|
||||
char *socketName;
|
||||
};
|
||||
typedef int ShareableHandle;
|
||||
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
struct ipcHandle_st {
|
||||
std::vector<HANDLE> hMailslot; // 1 Handle in case of child and `num children` Handles for parent.
|
||||
};
|
||||
typedef HANDLE ShareableHandle;
|
||||
#endif
|
||||
|
||||
typedef struct ipcHandle_st ipcHandle;
|
||||
|
||||
int
|
||||
ipcCreateSocket(ipcHandle *&handle, const char *name, const std::vector<Process>& processes);
|
||||
|
||||
int
|
||||
ipcOpenSocket(ipcHandle *&handle);
|
||||
|
||||
int
|
||||
ipcCloseSocket(ipcHandle *handle);
|
||||
|
||||
int
|
||||
ipcRecvShareableHandles(ipcHandle *handle, std::vector<ShareableHandle>& shareableHandles);
|
||||
|
||||
int
|
||||
ipcSendShareableHandles(ipcHandle *handle, const std::vector<ShareableHandle>& shareableHandles, const std::vector<Process>& processes);
|
||||
|
||||
int
|
||||
ipcCloseShareableHandle(ShareableHandle shHandle);
|
||||
|
||||
#endif // HELPER_MULTIPROCESS_H
|
||||
|
|
|
@ -297,6 +297,19 @@ int writeBMPi(const char *filename, const unsigned char *d_RGB, int pitch,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int inputDirExists(const char *pathname) {
|
||||
struct stat info;
|
||||
if (stat(pathname, &info) != 0) {
|
||||
return 0; // Directory does not exists
|
||||
} else if (info.st_mode & S_IFDIR) {
|
||||
// is a directory
|
||||
return 1;
|
||||
} else {
|
||||
// is not a directory
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int readInput(const std::string &sInputPath,
|
||||
std::vector<std::string> &filelist) {
|
||||
int error_code = 1;
|
||||
|
@ -315,15 +328,18 @@ int readInput(const std::string &sInputPath,
|
|||
if (dir_handle) {
|
||||
error_code = 0;
|
||||
while ((dir = readdir(dir_handle)) != NULL) {
|
||||
if (dir->d_type == DT_REG) {
|
||||
std::string sFileName = sInputPath + dir->d_name;
|
||||
filelist.push_back(sFileName);
|
||||
} else if (dir->d_type == DT_DIR) {
|
||||
if (inputDirExists(sFileName.c_str()))
|
||||
{
|
||||
std::string sname = dir->d_name;
|
||||
if (sname != "." && sname != "..") {
|
||||
readInput(sInputPath + sname + "/", filelist);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filelist.push_back(sFileName);
|
||||
}
|
||||
}
|
||||
closedir(dir_handle);
|
||||
} else {
|
||||
|
@ -360,18 +376,6 @@ int readInput(const std::string &sInputPath,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int inputDirExists(const char *pathname) {
|
||||
struct stat info;
|
||||
if (stat(pathname, &info) != 0) {
|
||||
return 0; // Directory does not exists
|
||||
} else if (info.st_mode & S_IFDIR) {
|
||||
// is a directory
|
||||
return 1;
|
||||
} else {
|
||||
// is not a directory
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int getInputDir(std::string &input_dir, const char *executable_path) {
|
||||
int found = 0;
|
||||
|
@ -399,6 +403,7 @@ int getInputDir(std::string &input_dir, const char *executable_path) {
|
|||
"../../../Samples/<executable_name>/images",
|
||||
"../../Samples/<executable_name>/images"};
|
||||
|
||||
|
||||
for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
|
||||
std::string pathname(searchPath[i]);
|
||||
size_t executable_name_pos = pathname.find("<executable_name>");
|
||||
|
|
124
Common/rendercheck_d3d11.cpp
Normal file
124
Common/rendercheck_d3d11.cpp
Normal file
|
@ -0,0 +1,124 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Utility funcs to wrap up saving a surface or the back buffer as a PPM file
|
||||
// In addition, wraps up a threshold comparision of two PPMs.
|
||||
//
|
||||
// These functions are designed to be used to implement an automated QA testing for SDK samples.
|
||||
//
|
||||
// Author: Bryan Dudash
|
||||
// Email: sdkfeedback@nvidia.com
|
||||
//
|
||||
// Copyright (c) NVIDIA Corporation. All rights reserved.
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <helper_functions.h>
|
||||
#include <rendercheck_d3d11.h>
|
||||
|
||||
HRESULT CheckRenderD3D11::ActiveRenderTargetToPPM(ID3D11Device *pDevice, const char *zFileName)
|
||||
{
|
||||
ID3D11DeviceContext *pDeviceCtxt;
|
||||
pDevice->GetImmediateContext(&pDeviceCtxt);
|
||||
ID3D11RenderTargetView *pRTV = NULL;
|
||||
pDeviceCtxt->OMGetRenderTargets(1,&pRTV,NULL);
|
||||
|
||||
ID3D11Resource *pSourceResource = NULL;
|
||||
pRTV->GetResource(&pSourceResource);
|
||||
|
||||
return ResourceToPPM(pDevice,pSourceResource,zFileName);
|
||||
}
|
||||
|
||||
HRESULT CheckRenderD3D11::ResourceToPPM(ID3D11Device *pDevice, ID3D11Resource *pResource, const char *zFileName)
|
||||
{
|
||||
ID3D11DeviceContext *pDeviceCtxt;
|
||||
pDevice->GetImmediateContext(&pDeviceCtxt);
|
||||
D3D11_RESOURCE_DIMENSION rType;
|
||||
pResource->GetType(&rType);
|
||||
|
||||
if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D)
|
||||
{
|
||||
printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
ID3D11Texture2D *pSourceTexture = (ID3D11Texture2D *)pResource;
|
||||
ID3D11Texture2D *pTargetTexture = NULL;
|
||||
|
||||
D3D11_TEXTURE2D_DESC desc;
|
||||
pSourceTexture->GetDesc(&desc);
|
||||
desc.BindFlags = 0;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
desc.Usage = D3D11_USAGE_STAGING;
|
||||
|
||||
if (FAILED(pDevice->CreateTexture2D(&desc,NULL,&pTargetTexture)))
|
||||
{
|
||||
printf("SurfaceToPPM: Unable to create target Texture resoruce! Aborting... \n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
pDeviceCtxt->CopyResource(pTargetTexture,pSourceTexture);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE mappedTex2D;
|
||||
pDeviceCtxt->Map(pTargetTexture, 0, D3D11_MAP_READ,0,&mappedTex2D);
|
||||
|
||||
// Need to convert from dx pitch to pitch=width
|
||||
unsigned char *pPPMData = new unsigned char[desc.Width*desc.Height*4];
|
||||
|
||||
for (unsigned int iHeight = 0; iHeight<desc.Height; iHeight++)
|
||||
{
|
||||
memcpy(&(pPPMData[iHeight*desc.Width*4]),(unsigned char *)(mappedTex2D.pData)+iHeight*mappedTex2D.RowPitch,desc.Width*4);
|
||||
}
|
||||
|
||||
pDeviceCtxt->Unmap(pTargetTexture, 0);
|
||||
|
||||
// Prepends the PPM header info and bumps byte data afterwards
|
||||
sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);
|
||||
|
||||
delete [] pPPMData;
|
||||
pTargetTexture->Release();
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
bool CheckRenderD3D11::PPMvsPPM(const char *src_file, const char *ref_file, const char *exec_path,
|
||||
const float epsilon, const float threshold)
|
||||
{
|
||||
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
|
||||
|
||||
if (ref_file_path == NULL)
|
||||
{
|
||||
printf("CheckRenderD3D11::PPMvsPPM unable to find <%s> in <%s> Aborting comparison!\n", ref_file, exec_path);
|
||||
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", ref_file);
|
||||
printf("Aborting comparison!\n");
|
||||
printf(" FAILURE!\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return sdkComparePPM(src_file,ref_file_path,epsilon,threshold,true) == true;
|
||||
}
|
52
Common/rendercheck_d3d11.h
Normal file
52
Common/rendercheck_d3d11.h
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef _RENDERCHECK_D3D11_H_
|
||||
#define _RENDERCHECK_D3D11_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <d3d11.h>
|
||||
|
||||
class CheckRenderD3D11
|
||||
{
|
||||
public:
|
||||
|
||||
CheckRenderD3D11() {}
|
||||
|
||||
static HRESULT ActiveRenderTargetToPPM(ID3D11Device *pDevice, const char *zFileName);
|
||||
static HRESULT ResourceToPPM(ID3D11Device *pDevice, ID3D11Resource *pResource, const char *zFileName);
|
||||
|
||||
static bool PPMvsPPM(const char *src_file, const char *ref_file, const char *exec_path,
|
||||
const float epsilon, const float threshold = 0.0f);
|
||||
};
|
||||
|
||||
#endif
|
57
README.md
57
README.md
|
@ -1,11 +1,21 @@
|
|||
# CUDA Samples
|
||||
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 10.1 Update 2](https://developer.nvidia.com/cuda-downloads).
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Release Notes
|
||||
|
||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||
|
||||
### CUDA 10.2
|
||||
* Added `simpleD3D11`. Demonstrates CUDA-D3D11 External Resource Interoperability APIs for updating D3D11 buffers from CUDA and synchronization between D3D11 and CUDA with Keyed Mutexes.
|
||||
* Added `simpleDrvRuntime`. Demonstrates CUDA Driver and Runtime APIs working together to load fatbinary of a CUDA kernel.
|
||||
* Added `vectorAddMMAP`. Demonstrates how cuMemMap API allows the user to specify the physical properties of their memory while retaining the contiguous nature of their access.
|
||||
* Added `memMapIPCDrv`. Demonstrates Inter Process Communication using cuMemMap APIs.
|
||||
* Added `cudaNvSci`. Demonstrates CUDA-NvSciBuf/NvSciSync Interop.
|
||||
* Added `jacobiCudaGraphs`. Demonstrates Instantiated CUDA Graph Update with Jacobi Iterative Method using different approaches.
|
||||
* Added `cuSolverSp_LinearSolver`. Demonstrates cuSolverSP's LU, QR and Cholesky factorization.
|
||||
* Added `MersenneTwisterGP11213`. Demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.
|
||||
|
||||
### CUDA 10.1 Update 2
|
||||
* Added `vulkanImageCUDA`. Demonstrates how to perform Vulkan image - CUDA Interop.
|
||||
* Added `nvJPEG_encoder`. Demonstrates encoding of jpeg images using NVJPEG Library.
|
||||
|
@ -59,7 +69,7 @@ This is the first release of CUDA Samples on GitHub:
|
|||
|
||||
### Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html), and the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html).
|
||||
|
||||
### Getting the CUDA Samples
|
||||
|
@ -85,7 +95,6 @@ Each individual sample has its own set of solution files at:
|
|||
`<CUDA_SAMPLES_REPO>\Samples\<sample_dir>\`
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check [DirectX Dependencies](#directx) section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
|
@ -143,31 +152,36 @@ The samples makefiles can take advantage of certain options:
|
|||
#### Linux
|
||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** |
|
||||
---|---|---|---|
|
||||
**[simpleIPC](./Samples/simpleIPC)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** |
|
||||
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
|
||||
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
|
||||
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
|
||||
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
||||
**[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** |
|
||||
**[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** |
|
||||
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
|
||||
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
|
||||
**[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
|
||||
**[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||
**[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
||||
|
||||
#### Windows
|
||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** |
|
||||
---|---|---|---|
|
||||
**[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** | **[simpleD3D12](./Samples/simpleD3D12)** |
|
||||
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
|
||||
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
|
||||
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
|
||||
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
|
||||
**[nvJPEG](./Samples/nvJPEG)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** |
|
||||
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
|
||||
**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
|
||||
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
|
||||
**[simpleD3D11](./Samples/simpleD3D11)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||
**[matrixMul](./Samples/matrixMul)** |
|
||||
|
||||
#### Mac OSX
|
||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
|
||||
---|---|---|---|
|
||||
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||
**[reduction](./Samples/reduction)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
|
||||
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||
**[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
|
||||
## Dependencies
|
||||
|
@ -196,7 +210,7 @@ Some samples can only be run on a 64-bit operating system.
|
|||
|
||||
#### DirectX
|
||||
|
||||
DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install [Direct X SDK (June 2010 or newer)](http://www.microsoft.com/en-us/download/details.aspx?id=6812) , this is required to be installed on Windows 7, Windows 10 and Windows Server 2008, Other Windows OSes do not need to explicitly install the DirectX SDK.
|
||||
DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install Microsoft Visual Studio 2012 or higher which provides Microsoft Windows SDK for Windows 8.
|
||||
|
||||
#### DirectX12
|
||||
|
||||
|
@ -238,6 +252,9 @@ EGLOutput is a set of EGL extensions which allow EGL to render directly to the d
|
|||
|
||||
EGLSync is a set of EGL extensions which provides sync objects that are synchronization primitive, representing events whose completion can be tested or waited upon.
|
||||
|
||||
#### NVSCI
|
||||
NvSci is a set of communication interface libraries out of which CUDA interops with NvSciBuf and NvSciSync. NvSciBuf allows applications to allocate and exchange buffers in memory. NvSciSync allows applications to manage synchronization objects which coordinate when sequences of operations begin and end.
|
||||
|
||||
### CUDA Features
|
||||
|
||||
These CUDA features are needed by some CUDA samples. They are provided by either the CUDA Toolkit or CUDA Driver. Some features may not be available on your system.
|
||||
|
|
|
@ -30,7 +30,7 @@ cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount,
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
289
Samples/MersenneTwisterGP11213/Makefile
Normal file
289
Samples/MersenneTwisterGP11213/Makefile
Normal file
|
@ -0,0 +1,289 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),qnx)
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM
|
||||
LDFLAGS += -lsocket
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Gencode arguments
|
||||
SMS ?=
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
ifeq ($(SMS),)
|
||||
# Generate PTX code from SM 30
|
||||
GENCODE_FLAGS += -gencode arch=compute_30,code=compute_30
|
||||
endif
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
LIBRARIES += -lcurand_static -lculibos
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: MersenneTwisterGP11213
|
||||
|
||||
MersenneTwister.o:MersenneTwister.cpp
|
||||
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
MersenneTwisterGP11213: MersenneTwister.o
|
||||
$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
./MersenneTwisterGP11213
|
||||
|
||||
clean:
|
||||
rm -f MersenneTwisterGP11213 MersenneTwister.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/MersenneTwisterGP11213
|
||||
|
||||
clobber: clean
|
184
Samples/MersenneTwisterGP11213/MersenneTwister.cpp
Normal file
184
Samples/MersenneTwisterGP11213/MersenneTwister.cpp
Normal file
|
@ -0,0 +1,184 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This sample demonstrates the use of CURAND to generate
|
||||
* random numbers on GPU and CPU.
|
||||
*/
|
||||
|
||||
// Utilities and system includes
|
||||
// includes, system
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <curand.h>
|
||||
|
||||
// Utilities and system includes
|
||||
#include <helper_functions.h>
|
||||
#include <helper_cuda.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand.h>
|
||||
|
||||
float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU);
|
||||
|
||||
const int DEFAULT_RAND_N = 2400000;
|
||||
const unsigned int DEFAULT_SEED = 777;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// Start logs
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
// initialize the GPU, either identified by --device
|
||||
// or by picking the device with highest flop rate.
|
||||
int devID = findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
// parsing the number of random numbers to generate
|
||||
int rand_n = DEFAULT_RAND_N;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **) argv, "count"))
|
||||
{
|
||||
rand_n = getCmdLineArgumentInt(argc, (const char **) argv, "count");
|
||||
}
|
||||
|
||||
printf("Allocating data for %i samples...\n", rand_n);
|
||||
|
||||
// parsing the seed
|
||||
int seed = DEFAULT_SEED;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **) argv, "seed"))
|
||||
{
|
||||
seed = getCmdLineArgumentInt(argc, (const char **) argv, "seed");
|
||||
}
|
||||
|
||||
printf("Seeding with %i ...\n", seed);
|
||||
|
||||
cudaStream_t stream;
|
||||
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
|
||||
float *d_Rand;
|
||||
checkCudaErrors(cudaMalloc((void **)&d_Rand, rand_n * sizeof(float)));
|
||||
|
||||
curandGenerator_t prngGPU;
|
||||
checkCudaErrors(curandCreateGenerator(&prngGPU, CURAND_RNG_PSEUDO_MTGP32));
|
||||
checkCudaErrors(curandSetStream(prngGPU, stream));
|
||||
checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngGPU, seed));
|
||||
|
||||
curandGenerator_t prngCPU;
|
||||
checkCudaErrors(curandCreateGeneratorHost(&prngCPU, CURAND_RNG_PSEUDO_MTGP32));
|
||||
checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngCPU, seed));
|
||||
|
||||
//
|
||||
// Example 1: Compare random numbers generated on GPU and CPU
|
||||
float *h_RandGPU = (float *)malloc(rand_n * sizeof(float));
|
||||
|
||||
printf("Generating random numbers on GPU...\n\n");
|
||||
checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
|
||||
|
||||
printf("\nReading back the results...\n");
|
||||
checkCudaErrors(cudaMemcpyAsync(h_RandGPU, d_Rand, rand_n * sizeof(float), cudaMemcpyDeviceToHost, stream));
|
||||
|
||||
|
||||
float *h_RandCPU = (float *)malloc(rand_n * sizeof(float));
|
||||
|
||||
printf("Generating random numbers on CPU...\n\n");
|
||||
checkCudaErrors(curandGenerateUniform(prngCPU, (float *) h_RandCPU, rand_n));
|
||||
|
||||
checkCudaErrors(cudaStreamSynchronize(stream));
|
||||
printf("Comparing CPU/GPU random numbers...\n\n");
|
||||
float L1norm = compareResults(rand_n, h_RandGPU, h_RandCPU);
|
||||
|
||||
//
|
||||
// Example 2: Timing of random number generation on GPU
|
||||
const int numIterations = 10;
|
||||
int i;
|
||||
StopWatchInterface *hTimer;
|
||||
|
||||
sdkCreateTimer(&hTimer);
|
||||
sdkResetTimer(&hTimer);
|
||||
sdkStartTimer(&hTimer);
|
||||
|
||||
for (i = 0; i < numIterations; i++)
|
||||
{
|
||||
checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaStreamSynchronize(stream));
|
||||
sdkStopTimer(&hTimer);
|
||||
|
||||
double gpuTime = 1.0e-3 * sdkGetTimerValue(&hTimer)/(double)numIterations;
|
||||
|
||||
printf("MersenneTwisterGP11213, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers\n",
|
||||
1.0e-9 * rand_n / gpuTime, gpuTime, rand_n);
|
||||
|
||||
printf("Shutting down...\n");
|
||||
|
||||
checkCudaErrors(curandDestroyGenerator(prngGPU));
|
||||
checkCudaErrors(curandDestroyGenerator(prngCPU));
|
||||
checkCudaErrors(cudaStreamDestroy(stream));
|
||||
checkCudaErrors(cudaFree(d_Rand));
|
||||
sdkDeleteTimer(&hTimer);
|
||||
free(h_RandGPU);
|
||||
free(h_RandCPU);
|
||||
|
||||
exit(L1norm < 1e-6 ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU)
|
||||
{
|
||||
int i;
|
||||
float rCPU, rGPU, delta;
|
||||
float max_delta = 0.;
|
||||
float sum_delta = 0.;
|
||||
float sum_ref = 0.;
|
||||
|
||||
for (i = 0; i < rand_n; i++)
|
||||
{
|
||||
rCPU = h_RandCPU[i];
|
||||
rGPU = h_RandGPU[i];
|
||||
delta = fabs(rCPU - rGPU);
|
||||
sum_delta += delta;
|
||||
sum_ref += fabs(rCPU);
|
||||
|
||||
if (delta >= max_delta)
|
||||
{
|
||||
max_delta = delta;
|
||||
}
|
||||
}
|
||||
|
||||
float L1norm = (float)(sum_delta / sum_ref);
|
||||
printf("Max absolute error: %E\n", max_delta);
|
||||
printf("L1 norm: %E\n\n", L1norm);
|
||||
|
||||
return L1norm;
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,107 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2012</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,107 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2013</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 14.00
|
||||
# Visual Studio 2015
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,107 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2015</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2017
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,112 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2017</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2019
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,108 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2019</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
73
Samples/MersenneTwisterGP11213/NsightEclipse.xml
Normal file
73
Samples/MersenneTwisterGP11213/NsightEclipse.xml
Normal file
|
@ -0,0 +1,73 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>MersenneTwisterGP11213</name>
|
||||
<description><![CDATA[This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<fallback_min_ptx>true</fallback_min_ptx>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../common/inc</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">CURAND Library</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>CURAND</keyword>
|
||||
<keyword>Monte-Carlo</keyword>
|
||||
<keyword>random number generation</keyword>
|
||||
<keyword>GSFR</keyword>
|
||||
<keyword>Mersenne Twister</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library>curand_static</library>
|
||||
<library>culibos</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>MersenneTwister.cpp</primary_file>
|
||||
<required_dependencies>
|
||||
<dependency>CURAND</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Advanced Topics</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm30</sm-arch>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>macosx</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>MersenneTwisterGP11213</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
95
Samples/MersenneTwisterGP11213/README.md
Normal file
95
Samples/MersenneTwisterGP11213/README.md
Normal file
|
@ -0,0 +1,95 @@
|
|||
# MersenneTwisterGP11213 - MersenneTwisterGP11213
|
||||
|
||||
## Description
|
||||
|
||||
This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
CURAND Library
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.0 ](https://developer.nvidia.com/cuda-gpus) [SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux, Windows, MacOSX
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, ppc64le, armv7l
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[CURAND](../../README.md#curand)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
### Mac
|
||||
The Mac samples are built using makefiles. To use the makefiles, change directory into the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
|
||||
The samples makefiles can take advantage of certain options:
|
||||
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where "A B ..." is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use SMS="50 60".
|
||||
```
|
||||
$ make SMS="A B ..."
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default clang host compiler. See the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=clang
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -27,7 +27,7 @@ cudaMemcpy2D, cudaMallocManaged
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ cudaMallocManaged, cudaStreamAttachMemAsync, cudaMemcpyAsync, cudaMallocHost, cu
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -110,6 +110,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -106,6 +106,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -27,7 +27,7 @@ cudaSetDevice, cudaHostAlloc, cudaFree, cudaMallocHost, cudaFreeHost, cudaMemcpy
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -117,6 +117,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -117,6 +117,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -30,7 +30,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -27,7 +27,7 @@ x86_64, ppc64le
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -30,7 +30,7 @@ cudaMemAdvise, cudaMemPrefetchAsync, cudaLaunchCooperativeKernelMultiDevice, cud
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
82
Samples/cuSolverDn_LinearSolver/NsightEclipse.xml
Normal file
82
Samples/cuSolverDn_LinearSolver/NsightEclipse.xml
Normal file
|
@ -0,0 +1,82 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>cuSolverDn_LinearSolver</name>
|
||||
<description><![CDATA[A CUDA Sample that demonstrates cuSolverDN's LU, QR and Cholesky factorization.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<files>
|
||||
<file>gr_900_900_crg.mtx</file>
|
||||
<file>lap3D_7pt_n20.mtx</file>
|
||||
</files>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../common/inc</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">Linear Algebra</concept>
|
||||
<concept level="basic">CUSOLVER Library</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUSOLVER</keyword>
|
||||
<keyword>Linear Algebra</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library>cusolver</library>
|
||||
<library>cublas</library>
|
||||
<library>cusparse</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>cuSolverDn_LinearSolver.cpp</primary_file>
|
||||
<qatests>
|
||||
<qatest>-R=qr</qatest>
|
||||
<qatest>-R=chol</qatest>
|
||||
<qatest>-R=lu</qatest>
|
||||
</qatests>
|
||||
<required_dependencies>
|
||||
<dependency>CUSOLVER</dependency>
|
||||
<dependency>CUBLAS</dependency>
|
||||
<dependency>CUSPARSE</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Basic Topics</scope>
|
||||
<scope>3:Linear Algebra</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm30</sm-arch>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>macosx</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>aarch64</arch>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>cuSolverDn Linear Solver </title>
|
||||
<type>exe</type>
|
||||
</entry>
|
|
@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -104,6 +104,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -109,6 +109,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
316
Samples/cuSolverSp_LinearSolver/Makefile
Normal file
316
Samples/cuSolverSp_LinearSolver/Makefile
Normal file
|
@ -0,0 +1,316 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),qnx)
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM
|
||||
LDFLAGS += -lsocket
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
ALL_CCFLAGS += -Xcompiler \"-Wl,--no-as-needed\"
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 30 35 37 50 52 60 61 70 72 75
|
||||
else
|
||||
SMS ?= 30 35 37 50 52 60 61 70 75
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
LIBRARIES += -lcusolver -lcusparse
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: cuSolverSp_LinearSolver
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
cuSolverSp_LinearSolver.o:cuSolverSp_LinearSolver.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
mmio.c.o:mmio.c
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
mmio_wrapper.o:mmio_wrapper.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
cuSolverSp_LinearSolver: cuSolverSp_LinearSolver.o mmio.c.o mmio_wrapper.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./cuSolverSp_LinearSolver
|
||||
|
||||
clean:
|
||||
rm -f cuSolverSp_LinearSolver cuSolverSp_LinearSolver.o mmio.c.o mmio_wrapper.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cuSolverSp_LinearSolver
|
||||
|
||||
clobber: clean
|
83
Samples/cuSolverSp_LinearSolver/NsightEclipse.xml
Normal file
83
Samples/cuSolverSp_LinearSolver/NsightEclipse.xml
Normal file
|
@ -0,0 +1,83 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>cuSolverSp_LinearSolver</name>
|
||||
<description><![CDATA[A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<files>
|
||||
<file>lap2D_5pt_n100.mtx</file>
|
||||
<file>lap3D_7pt_n20.mtx</file>
|
||||
</files>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../common/inc</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">Linear Algebra</concept>
|
||||
<concept level="basic">CUSOLVER Library</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUSOLVER</keyword>
|
||||
<keyword>Linear Algebra</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library>cusolver</library>
|
||||
<library>cusparse</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>cuSolverSp_LinearSolver.cpp</primary_file>
|
||||
<qatests>
|
||||
<qatest>-R=qr</qatest>
|
||||
<qatest>-R=chol</qatest>
|
||||
<qatest>-R=lu</qatest>
|
||||
<qatest>-R=qr -P=symamd</qatest>
|
||||
<qatest>-R=chol -P=symamd</qatest>
|
||||
<qatest>-R=lu -P=symamd</qatest>
|
||||
</qatests>
|
||||
<required_dependencies>
|
||||
<dependency>CUSOLVER</dependency>
|
||||
<dependency>CUSPARSE</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Basic Topics</scope>
|
||||
<scope>3:Linear Algebra</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm30</sm-arch>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>macosx</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>cuSolverSp Linear Solver </title>
|
||||
<type>exe</type>
|
||||
</entry>
|
95
Samples/cuSolverSp_LinearSolver/README.md
Normal file
95
Samples/cuSolverSp_LinearSolver/README.md
Normal file
|
@ -0,0 +1,95 @@
|
|||
# cuSolverSp_LinearSolver - cuSolverSp Linear Solver
|
||||
|
||||
## Description
|
||||
|
||||
A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Linear Algebra, CUSOLVER Library
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.0 ](https://developer.nvidia.com/cuda-gpus) [SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux, Windows, MacOSX
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, ppc64le, armv7l
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[CUSOLVER](../../README.md#cusolver), [CUSPARSE](../../README.md#cusparse)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
### Mac
|
||||
The Mac samples are built using makefiles. To use the makefiles, change directory into the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
|
||||
The samples makefiles can take advantage of certain options:
|
||||
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where "A B ..." is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use SMS="50 60".
|
||||
```
|
||||
$ make SMS="A B ..."
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default clang host compiler. See the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=clang
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
654
Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp
Normal file
654
Samples/cuSolverSp_LinearSolver/cuSolverSp_LinearSolver.cpp
Normal file
|
@ -0,0 +1,654 @@
|
|||
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
* Test three linear solvers, including Cholesky, LU and QR.
|
||||
* The user has to prepare a sparse matrix of "matrix market format" (with extension .mtx).
|
||||
* For example, the user can download matrices in Florida Sparse Matrix Collection.
|
||||
* (http://www.cise.ufl.edu/research/sparse/matrices/)
|
||||
*
|
||||
* The user needs to choose a solver by the switch -R<solver> and
|
||||
* to provide the path of the matrix by the switch -F<file>, then
|
||||
* the program solves
|
||||
* A*x = b
|
||||
* and reports relative error
|
||||
* |b-A*x|/(|A|*|x|+|b|)
|
||||
*
|
||||
* How does it work?
|
||||
* The example solves A*x = b by the following steps
|
||||
* step 1: B = A(Q,Q)
|
||||
* Q is the ordering to minimize zero fill-in.
|
||||
* The user can choose symrcm or symamd.
|
||||
* step 2: solve B*z = Q*b
|
||||
* step 3: x = inv(Q)*z
|
||||
*
|
||||
* Above three steps can be combined by the formula
|
||||
* (Q*A*Q')*(Q*x) = (Q*b)
|
||||
*
|
||||
* The elapsed time is also reported so the user can compare efficiency of different solvers.
|
||||
*
|
||||
* How to use
|
||||
/cuSolverSp_LinearSolver // Default: Cholesky, symrcm & file=lap2D_5pt_n100.mtx
|
||||
* ./cuSolverSp_LinearSolver -R=chol -file=<file> // cholesky factorization
|
||||
* ./cuSolverSp_LinearSolver -R=lu -P=symrcm -file=<file> // symrcm + LU with partial pivoting
|
||||
* ./cuSolverSp_LinearSolver -R=qr -P=symamd -file=<file> // symamd + QR factorization
|
||||
*
|
||||
*
|
||||
* Remark: the absolute error on solution x is meaningless without knowing condition number of A.
|
||||
* The relative error on residual should be close to machine zero, i.e. 1.e-15.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "cusparse.h"
|
||||
#include "cusolverSp.h"
|
||||
|
||||
#include "helper_cuda.h"
|
||||
#include "helper_cusolver.h"
|
||||
|
||||
template <typename T_ELEM>
|
||||
int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m,
|
||||
int *n, int *nnz, T_ELEM **aVal, int **aRowInd,
|
||||
int **aColInd, int extendSymMatrix);
|
||||
|
||||
void UsageSP(void) {
|
||||
printf("<options>\n");
|
||||
printf("-h : display this help\n");
|
||||
printf("-R=<name> : choose a linear solver\n");
|
||||
printf(" chol (cholesky factorization), this is default\n");
|
||||
printf(" qr (QR factorization)\n");
|
||||
printf(" lu (LU factorization)\n");
|
||||
printf("-P=<name> : choose a reordering\n");
|
||||
printf(" symrcm (Reverse Cuthill-McKee)\n");
|
||||
printf(" symamd (Approximate Minimum Degree)\n");
|
||||
printf(" metis (nested dissection)\n");
|
||||
printf("-file=<filename> : filename containing a matrix in MM format\n");
|
||||
printf("-device=<device_id> : <device_id> if want to run on specific GPU\n");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void parseCommandLineArguments(int argc, char *argv[], struct testOpts &opts) {
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "-h")) {
|
||||
UsageSP();
|
||||
}
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "R")) {
|
||||
char *solverType = NULL;
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "R", &solverType);
|
||||
|
||||
if (solverType) {
|
||||
if ((STRCASECMP(solverType, "chol") != 0) &&
|
||||
(STRCASECMP(solverType, "lu") != 0) &&
|
||||
(STRCASECMP(solverType, "qr") != 0)) {
|
||||
printf("\nIncorrect argument passed to -R option\n");
|
||||
UsageSP();
|
||||
} else {
|
||||
opts.testFunc = solverType;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "P")) {
|
||||
char *reorderType = NULL;
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "P", &reorderType);
|
||||
|
||||
if (reorderType) {
|
||||
if ((STRCASECMP(reorderType, "symrcm") != 0) &&
|
||||
(STRCASECMP(reorderType, "symamd") != 0) &&
|
||||
(STRCASECMP(reorderType, "metis") != 0)) {
|
||||
printf("\nIncorrect argument passed to -P option\n");
|
||||
UsageSP();
|
||||
} else {
|
||||
opts.reorder = reorderType;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
||||
char *fileName = 0;
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file", &fileName);
|
||||
|
||||
if (fileName) {
|
||||
opts.sparse_mat_filename = fileName;
|
||||
} else {
|
||||
printf("\nIncorrect filename passed to -file \n ");
|
||||
UsageSP();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
struct testOpts opts;
|
||||
cusolverSpHandle_t handle = NULL;
|
||||
cusparseHandle_t cusparseHandle = NULL; /* used in residual evaluation */
|
||||
cudaStream_t stream = NULL;
|
||||
cusparseMatDescr_t descrA = NULL;
|
||||
|
||||
int rowsA = 0; /* number of rows of A */
|
||||
int colsA = 0; /* number of columns of A */
|
||||
int nnzA = 0; /* number of nonzeros of A */
|
||||
int baseA = 0; /* base index in CSR format */
|
||||
|
||||
/* CSR(A) from I/O */
|
||||
int *h_csrRowPtrA = NULL;
|
||||
int *h_csrColIndA = NULL;
|
||||
double *h_csrValA = NULL;
|
||||
|
||||
double *h_z = NULL; /* z = B \ (Q*b) */
|
||||
double *h_x = NULL; /* x = A \ b */
|
||||
double *h_b = NULL; /* b = ones(n,1) */
|
||||
double *h_Qb = NULL; /* Q*b */
|
||||
double *h_r = NULL; /* r = b - A*x */
|
||||
|
||||
int *h_Q = NULL; /* <int> n */
|
||||
/* reorder to reduce zero fill-in */
|
||||
/* Q = symrcm(A) or Q = symamd(A) */
|
||||
/* B = Q*A*Q' or B = A(Q,Q) by MATLAB notation */
|
||||
int *h_csrRowPtrB = NULL; /* <int> n+1 */
|
||||
int *h_csrColIndB = NULL; /* <int> nnzA */
|
||||
double *h_csrValB = NULL; /* <double> nnzA */
|
||||
int *h_mapBfromA = NULL; /* <int> nnzA */
|
||||
|
||||
size_t size_perm = 0;
|
||||
void *buffer_cpu = NULL; /* working space for permutation: B = Q*A*Q^T */
|
||||
|
||||
/* device copy of A: used in residual evaluation */
|
||||
int *d_csrRowPtrA = NULL;
|
||||
int *d_csrColIndA = NULL;
|
||||
double *d_csrValA = NULL;
|
||||
|
||||
/* device copy of B: used in B*z = Q*b */
|
||||
int *d_csrRowPtrB = NULL;
|
||||
int *d_csrColIndB = NULL;
|
||||
double *d_csrValB = NULL;
|
||||
|
||||
int *d_Q = NULL; /* device copy of h_Q */
|
||||
double *d_z = NULL; /* z = B \ Q*b */
|
||||
double *d_x = NULL; /* x = A \ b */
|
||||
double *d_b = NULL; /* a copy of h_b */
|
||||
double *d_Qb = NULL; /* a copy of h_Qb */
|
||||
double *d_r = NULL; /* r = b - A*x */
|
||||
|
||||
double tol = 1.e-12;
|
||||
const int reorder = 0; /* no reordering */
|
||||
int singularity = 0; /* -1 if A is invertible under tol. */
|
||||
|
||||
/* the constants are used in residual evaluation, r = b - A*x */
|
||||
const double minus_one = -1.0;
|
||||
const double one = 1.0;
|
||||
|
||||
double b_inf = 0.0;
|
||||
double x_inf = 0.0;
|
||||
double r_inf = 0.0;
|
||||
double A_inf = 0.0;
|
||||
int errors = 0;
|
||||
int issym = 0;
|
||||
|
||||
double start, stop;
|
||||
double time_solve_cpu;
|
||||
double time_solve_gpu;
|
||||
|
||||
parseCommandLineArguments(argc, argv, opts);
|
||||
|
||||
if (NULL == opts.testFunc) {
|
||||
opts.testFunc =
|
||||
"chol"; /* By default running Cholesky as NO solver selected with -R
|
||||
option. */
|
||||
}
|
||||
|
||||
findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
if (opts.sparse_mat_filename == NULL) {
|
||||
opts.sparse_mat_filename = sdkFindFilePath("lap2D_5pt_n100.mtx", argv[0]);
|
||||
if (opts.sparse_mat_filename != NULL)
|
||||
printf("Using default input file [%s]\n", opts.sparse_mat_filename);
|
||||
else
|
||||
printf("Could not find lap2D_5pt_n100.mtx\n");
|
||||
} else {
|
||||
printf("Using input file [%s]\n", opts.sparse_mat_filename);
|
||||
}
|
||||
|
||||
printf("step 1: read matrix market format\n");
|
||||
|
||||
if (opts.sparse_mat_filename == NULL) {
|
||||
fprintf(stderr, "Error: input matrix is not provided\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (loadMMSparseMatrix<double>(opts.sparse_mat_filename, 'd', true, &rowsA,
|
||||
&colsA, &nnzA, &h_csrValA, &h_csrRowPtrA,
|
||||
&h_csrColIndA, true)) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
baseA = h_csrRowPtrA[0]; // baseA = {0,1}
|
||||
printf("sparse matrix A is %d x %d with %d nonzeros, base=%d\n", rowsA, colsA,
|
||||
nnzA, baseA);
|
||||
|
||||
if (rowsA != colsA) {
|
||||
fprintf(stderr, "Error: only support square matrix\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
checkCudaErrors(cusolverSpCreate(&handle));
|
||||
checkCudaErrors(cusparseCreate(&cusparseHandle));
|
||||
|
||||
checkCudaErrors(cudaStreamCreate(&stream));
|
||||
/* bind stream to cusparse and cusolver*/
|
||||
checkCudaErrors(cusolverSpSetStream(handle, stream));
|
||||
checkCudaErrors(cusparseSetStream(cusparseHandle, stream));
|
||||
|
||||
/* configure matrix descriptor*/
|
||||
checkCudaErrors(cusparseCreateMatDescr(&descrA));
|
||||
checkCudaErrors(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
|
||||
if (baseA) {
|
||||
checkCudaErrors(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE));
|
||||
} else {
|
||||
checkCudaErrors(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
|
||||
}
|
||||
|
||||
h_z = (double *)malloc(sizeof(double) * colsA);
|
||||
h_x = (double *)malloc(sizeof(double) * colsA);
|
||||
h_b = (double *)malloc(sizeof(double) * rowsA);
|
||||
h_Qb = (double *)malloc(sizeof(double) * rowsA);
|
||||
h_r = (double *)malloc(sizeof(double) * rowsA);
|
||||
|
||||
h_Q = (int *)malloc(sizeof(int) * colsA);
|
||||
h_csrRowPtrB = (int *)malloc(sizeof(int) * (rowsA + 1));
|
||||
h_csrColIndB = (int *)malloc(sizeof(int) * nnzA);
|
||||
h_csrValB = (double *)malloc(sizeof(double) * nnzA);
|
||||
h_mapBfromA = (int *)malloc(sizeof(int) * nnzA);
|
||||
|
||||
assert(NULL != h_z);
|
||||
assert(NULL != h_x);
|
||||
assert(NULL != h_b);
|
||||
assert(NULL != h_Qb);
|
||||
assert(NULL != h_r);
|
||||
assert(NULL != h_Q);
|
||||
assert(NULL != h_csrRowPtrB);
|
||||
assert(NULL != h_csrColIndB);
|
||||
assert(NULL != h_csrValB);
|
||||
assert(NULL != h_mapBfromA);
|
||||
|
||||
checkCudaErrors(
|
||||
cudaMalloc((void **)&d_csrRowPtrA, sizeof(int) * (rowsA + 1)));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_csrColIndA, sizeof(int) * nnzA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_csrValA, sizeof(double) * nnzA));
|
||||
checkCudaErrors(
|
||||
cudaMalloc((void **)&d_csrRowPtrB, sizeof(int) * (rowsA + 1)));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_csrColIndB, sizeof(int) * nnzA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_csrValB, sizeof(double) * nnzA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_Q, sizeof(int) * colsA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_z, sizeof(double) * colsA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_x, sizeof(double) * colsA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_b, sizeof(double) * rowsA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_Qb, sizeof(double) * rowsA));
|
||||
checkCudaErrors(cudaMalloc((void **)&d_r, sizeof(double) * rowsA));
|
||||
|
||||
/* verify if A has symmetric pattern or not */
|
||||
checkCudaErrors(cusolverSpXcsrissymHost(handle, rowsA, nnzA, descrA,
|
||||
h_csrRowPtrA, h_csrRowPtrA + 1,
|
||||
h_csrColIndA, &issym));
|
||||
|
||||
if (0 == strcmp(opts.testFunc, "chol")) {
|
||||
if (!issym) {
|
||||
printf("Error: A has no symmetric pattern, please use LU or QR \n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
printf("step 2: reorder the matrix A to minimize zero fill-in\n");
|
||||
printf(
|
||||
" if the user choose a reordering by -P=symrcm, -P=symamd or "
|
||||
"-P=metis\n");
|
||||
|
||||
if (NULL != opts.reorder) {
|
||||
if (0 == strcmp(opts.reorder, "symrcm")) {
|
||||
printf("step 2.1: Q = symrcm(A) \n");
|
||||
checkCudaErrors(cusolverSpXcsrsymrcmHost(
|
||||
handle, rowsA, nnzA, descrA, h_csrRowPtrA, h_csrColIndA, h_Q));
|
||||
} else if (0 == strcmp(opts.reorder, "symamd")) {
|
||||
printf("step 2.1: Q = symamd(A) \n");
|
||||
checkCudaErrors(cusolverSpXcsrsymamdHost(
|
||||
handle, rowsA, nnzA, descrA, h_csrRowPtrA, h_csrColIndA, h_Q));
|
||||
} else if (0 == strcmp(opts.reorder, "metis")) {
|
||||
printf("step 2.1: Q = metis(A) \n");
|
||||
checkCudaErrors(cusolverSpXcsrmetisndHost(handle, rowsA, nnzA, descrA,
|
||||
h_csrRowPtrA, h_csrColIndA,
|
||||
NULL, /* default setting. */
|
||||
h_Q));
|
||||
} else {
|
||||
fprintf(stderr, "Error: %s is unknown reordering\n", opts.reorder);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
printf("step 2.1: no reordering is chosen, Q = 0:n-1 \n");
|
||||
for (int j = 0; j < rowsA; j++) {
|
||||
h_Q[j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
printf("step 2.2: B = A(Q,Q) \n");
|
||||
|
||||
memcpy(h_csrRowPtrB, h_csrRowPtrA, sizeof(int) * (rowsA + 1));
|
||||
memcpy(h_csrColIndB, h_csrColIndA, sizeof(int) * nnzA);
|
||||
|
||||
checkCudaErrors(cusolverSpXcsrperm_bufferSizeHost(
|
||||
handle, rowsA, colsA, nnzA, descrA, h_csrRowPtrB, h_csrColIndB, h_Q, h_Q,
|
||||
&size_perm));
|
||||
|
||||
if (buffer_cpu) {
|
||||
free(buffer_cpu);
|
||||
}
|
||||
buffer_cpu = (void *)malloc(sizeof(char) * size_perm);
|
||||
assert(NULL != buffer_cpu);
|
||||
|
||||
/* h_mapBfromA = Identity */
|
||||
for (int j = 0; j < nnzA; j++) {
|
||||
h_mapBfromA[j] = j;
|
||||
}
|
||||
checkCudaErrors(cusolverSpXcsrpermHost(handle, rowsA, colsA, nnzA, descrA,
|
||||
h_csrRowPtrB, h_csrColIndB, h_Q, h_Q,
|
||||
h_mapBfromA, buffer_cpu));
|
||||
|
||||
/* B = A( mapBfromA ) */
|
||||
for (int j = 0; j < nnzA; j++) {
|
||||
h_csrValB[j] = h_csrValA[h_mapBfromA[j]];
|
||||
}
|
||||
|
||||
printf("step 3: b(j) = 1 + j/n \n");
|
||||
for (int row = 0; row < rowsA; row++) {
|
||||
h_b[row] = 1.0 + ((double)row) / ((double)rowsA);
|
||||
}
|
||||
|
||||
/* h_Qb = b(Q) */
|
||||
for (int row = 0; row < rowsA; row++) {
|
||||
h_Qb[row] = h_b[h_Q[row]];
|
||||
}
|
||||
|
||||
printf("step 4: prepare data on device\n");
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrRowPtrA, h_csrRowPtrA,
|
||||
sizeof(int) * (rowsA + 1),
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrColIndA, h_csrColIndA,
|
||||
sizeof(int) * nnzA, cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrValA, h_csrValA, sizeof(double) * nnzA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrRowPtrB, h_csrRowPtrB,
|
||||
sizeof(int) * (rowsA + 1),
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrColIndB, h_csrColIndB,
|
||||
sizeof(int) * nnzA, cudaMemcpyHostToDevice,
|
||||
stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_csrValB, h_csrValB, sizeof(double) * nnzA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_b, h_b, sizeof(double) * rowsA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_Qb, h_Qb, sizeof(double) * rowsA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_Q, h_Q, sizeof(int) * rowsA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
|
||||
printf("step 5: solve A*x = b on CPU \n");
|
||||
start = second();
|
||||
|
||||
/* solve B*z = Q*b */
|
||||
if (0 == strcmp(opts.testFunc, "chol")) {
|
||||
checkCudaErrors(cusolverSpDcsrlsvcholHost(
|
||||
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
|
||||
h_Qb, tol, reorder, h_z, &singularity));
|
||||
} else if (0 == strcmp(opts.testFunc, "lu")) {
|
||||
checkCudaErrors(cusolverSpDcsrlsvluHost(
|
||||
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
|
||||
h_Qb, tol, reorder, h_z, &singularity));
|
||||
|
||||
} else if (0 == strcmp(opts.testFunc, "qr")) {
|
||||
checkCudaErrors(cusolverSpDcsrlsvqrHost(
|
||||
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
|
||||
h_Qb, tol, reorder, h_z, &singularity));
|
||||
} else {
|
||||
fprintf(stderr, "Error: %s is unknown function\n", opts.testFunc);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Q*x = z */
|
||||
for (int row = 0; row < rowsA; row++) {
|
||||
h_x[h_Q[row]] = h_z[row];
|
||||
}
|
||||
|
||||
if (0 <= singularity) {
|
||||
printf("WARNING: the matrix is singular at row %d under tol (%E)\n",
|
||||
singularity, tol);
|
||||
}
|
||||
|
||||
stop = second();
|
||||
time_solve_cpu = stop - start;
|
||||
|
||||
printf("step 6: evaluate residual r = b - A*x (result on CPU)\n");
|
||||
checkCudaErrors(cudaMemcpyAsync(d_r, d_b, sizeof(double) * rowsA,
|
||||
cudaMemcpyDeviceToDevice, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(d_x, h_x, sizeof(double) * colsA,
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
checkCudaErrors(cusparseDcsrmv(cusparseHandle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, rowsA, colsA,
|
||||
nnzA, &minus_one, descrA, d_csrValA,
|
||||
d_csrRowPtrA, d_csrColIndA, d_x, &one, d_r));
|
||||
checkCudaErrors(cudaMemcpyAsync(h_r, d_r, sizeof(double) * rowsA,
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
/* wait until h_r is ready */
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
|
||||
b_inf = vec_norminf(rowsA, h_b);
|
||||
x_inf = vec_norminf(colsA, h_x);
|
||||
r_inf = vec_norminf(rowsA, h_r);
|
||||
A_inf = csr_mat_norminf(rowsA, colsA, nnzA, descrA, h_csrValA, h_csrRowPtrA,
|
||||
h_csrColIndA);
|
||||
|
||||
printf("(CPU) |b - A*x| = %E \n", r_inf);
|
||||
printf("(CPU) |A| = %E \n", A_inf);
|
||||
printf("(CPU) |x| = %E \n", x_inf);
|
||||
printf("(CPU) |b| = %E \n", b_inf);
|
||||
printf("(CPU) |b - A*x|/(|A|*|x| + |b|) = %E \n",
|
||||
r_inf / (A_inf * x_inf + b_inf));
|
||||
|
||||
printf("step 7: solve A*x = b on GPU\n");
|
||||
start = second();
|
||||
|
||||
/* solve B*z = Q*b */
|
||||
if (0 == strcmp(opts.testFunc, "chol")) {
|
||||
checkCudaErrors(cusolverSpDcsrlsvchol(
|
||||
handle, rowsA, nnzA, descrA, d_csrValB, d_csrRowPtrB, d_csrColIndB,
|
||||
d_Qb, tol, reorder, d_z, &singularity));
|
||||
|
||||
} else if (0 == strcmp(opts.testFunc, "lu")) {
|
||||
printf("WARNING: no LU available on GPU \n");
|
||||
} else if (0 == strcmp(opts.testFunc, "qr")) {
|
||||
checkCudaErrors(cusolverSpDcsrlsvqr(handle, rowsA, nnzA, descrA, d_csrValB,
|
||||
d_csrRowPtrB, d_csrColIndB, d_Qb, tol,
|
||||
reorder, d_z, &singularity));
|
||||
} else {
|
||||
fprintf(stderr, "Error: %s is unknow function\n", opts.testFunc);
|
||||
return 1;
|
||||
}
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
if (0 <= singularity) {
|
||||
printf("WARNING: the matrix is singular at row %d under tol (%E)\n",
|
||||
singularity, tol);
|
||||
}
|
||||
/* Q*x = z */
|
||||
checkCudaErrors(cusparseDsctr(cusparseHandle, rowsA, d_z, d_Q, d_x,
|
||||
CUSPARSE_INDEX_BASE_ZERO));
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
|
||||
stop = second();
|
||||
time_solve_gpu = stop - start;
|
||||
|
||||
printf("step 8: evaluate residual r = b - A*x (result on GPU)\n");
|
||||
checkCudaErrors(cudaMemcpyAsync(d_r, d_b, sizeof(double) * rowsA,
|
||||
cudaMemcpyDeviceToDevice, stream));
|
||||
checkCudaErrors(cusparseDcsrmv(cusparseHandle,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE, rowsA, colsA,
|
||||
nnzA, &minus_one, descrA, d_csrValA,
|
||||
d_csrRowPtrA, d_csrColIndA, d_x, &one, d_r));
|
||||
checkCudaErrors(cudaMemcpyAsync(h_x, d_x, sizeof(double) * colsA,
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
checkCudaErrors(cudaMemcpyAsync(h_r, d_r, sizeof(double) * rowsA,
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
/* wait until h_x and h_r are ready */
|
||||
checkCudaErrors(cudaDeviceSynchronize());
|
||||
|
||||
b_inf = vec_norminf(rowsA, h_b);
|
||||
x_inf = vec_norminf(colsA, h_x);
|
||||
r_inf = vec_norminf(rowsA, h_r);
|
||||
|
||||
if (0 != strcmp(opts.testFunc, "lu")) {
|
||||
// only cholesky and qr have GPU version
|
||||
printf("(GPU) |b - A*x| = %E \n", r_inf);
|
||||
printf("(GPU) |A| = %E \n", A_inf);
|
||||
printf("(GPU) |x| = %E \n", x_inf);
|
||||
printf("(GPU) |b| = %E \n", b_inf);
|
||||
printf("(GPU) |b - A*x|/(|A|*|x| + |b|) = %E \n",
|
||||
r_inf / (A_inf * x_inf + b_inf));
|
||||
}
|
||||
|
||||
fprintf(stdout, "timing %s: CPU = %10.6f sec , GPU = %10.6f sec\n",
|
||||
opts.testFunc, time_solve_cpu, time_solve_gpu);
|
||||
|
||||
if (0 != strcmp(opts.testFunc, "lu")) {
|
||||
printf("show last 10 elements of solution vector (GPU) \n");
|
||||
printf("consistent result for different reordering and solver \n");
|
||||
for (int j = rowsA - 10; j < rowsA; j++) {
|
||||
printf("x[%d] = %E\n", j, h_x[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if (handle) {
|
||||
checkCudaErrors(cusolverSpDestroy(handle));
|
||||
}
|
||||
if (cusparseHandle) {
|
||||
checkCudaErrors(cusparseDestroy(cusparseHandle));
|
||||
}
|
||||
if (stream) {
|
||||
checkCudaErrors(cudaStreamDestroy(stream));
|
||||
}
|
||||
if (descrA) {
|
||||
checkCudaErrors(cusparseDestroyMatDescr(descrA));
|
||||
}
|
||||
|
||||
if (h_csrValA) {
|
||||
free(h_csrValA);
|
||||
}
|
||||
if (h_csrRowPtrA) {
|
||||
free(h_csrRowPtrA);
|
||||
}
|
||||
if (h_csrColIndA) {
|
||||
free(h_csrColIndA);
|
||||
}
|
||||
if (h_z) {
|
||||
free(h_z);
|
||||
}
|
||||
if (h_x) {
|
||||
free(h_x);
|
||||
}
|
||||
if (h_b) {
|
||||
free(h_b);
|
||||
}
|
||||
if (h_Qb) {
|
||||
free(h_Qb);
|
||||
}
|
||||
if (h_r) {
|
||||
free(h_r);
|
||||
}
|
||||
|
||||
if (h_Q) {
|
||||
free(h_Q);
|
||||
}
|
||||
|
||||
if (h_csrRowPtrB) {
|
||||
free(h_csrRowPtrB);
|
||||
}
|
||||
if (h_csrColIndB) {
|
||||
free(h_csrColIndB);
|
||||
}
|
||||
if (h_csrValB) {
|
||||
free(h_csrValB);
|
||||
}
|
||||
if (h_mapBfromA) {
|
||||
free(h_mapBfromA);
|
||||
}
|
||||
|
||||
if (buffer_cpu) {
|
||||
free(buffer_cpu);
|
||||
}
|
||||
|
||||
if (d_csrValA) {
|
||||
checkCudaErrors(cudaFree(d_csrValA));
|
||||
}
|
||||
if (d_csrRowPtrA) {
|
||||
checkCudaErrors(cudaFree(d_csrRowPtrA));
|
||||
}
|
||||
if (d_csrColIndA) {
|
||||
checkCudaErrors(cudaFree(d_csrColIndA));
|
||||
}
|
||||
if (d_csrValB) {
|
||||
checkCudaErrors(cudaFree(d_csrValB));
|
||||
}
|
||||
if (d_csrRowPtrB) {
|
||||
checkCudaErrors(cudaFree(d_csrRowPtrB));
|
||||
}
|
||||
if (d_csrColIndB) {
|
||||
checkCudaErrors(cudaFree(d_csrColIndB));
|
||||
}
|
||||
if (d_Q) {
|
||||
checkCudaErrors(cudaFree(d_Q));
|
||||
}
|
||||
if (d_z) {
|
||||
checkCudaErrors(cudaFree(d_z));
|
||||
}
|
||||
if (d_x) {
|
||||
checkCudaErrors(cudaFree(d_x));
|
||||
}
|
||||
if (d_b) {
|
||||
checkCudaErrors(cudaFree(d_b));
|
||||
}
|
||||
if (d_Qb) {
|
||||
checkCudaErrors(cudaFree(d_Qb));
|
||||
}
|
||||
if (d_r) {
|
||||
checkCudaErrors(cudaFree(d_r));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,109 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cuSolverSp_LinearSolver_vs2012</RootNamespace>
|
||||
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
|
||||
<ClCompile Include="mmio.c" />
|
||||
<ClCompile Include="mmio_wrapper.cpp" />
|
||||
<ClInclude Include="mmio.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,109 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cuSolverSp_LinearSolver_vs2013</RootNamespace>
|
||||
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
|
||||
<ClCompile Include="mmio.c" />
|
||||
<ClCompile Include="mmio_wrapper.cpp" />
|
||||
<ClInclude Include="mmio.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 14.00
|
||||
# Visual Studio 2015
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,109 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cuSolverSp_LinearSolver_vs2015</RootNamespace>
|
||||
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
|
||||
<ClCompile Include="mmio.c" />
|
||||
<ClCompile Include="mmio_wrapper.cpp" />
|
||||
<ClInclude Include="mmio.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2017
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,114 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cuSolverSp_LinearSolver_vs2017</RootNamespace>
|
||||
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
|
||||
<ClCompile Include="mmio.c" />
|
||||
<ClCompile Include="mmio_wrapper.cpp" />
|
||||
<ClInclude Include="mmio.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -0,0 +1,20 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2019
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -0,0 +1,110 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cuSolverSp_LinearSolver_vs2019</RootNamespace>
|
||||
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
|
||||
<ClCompile Include="mmio.c" />
|
||||
<ClCompile Include="mmio_wrapper.cpp" />
|
||||
<ClInclude Include="mmio.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
29803
Samples/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx
Normal file
29803
Samples/cuSolverSp_LinearSolver/lap2D_5pt_n100.mtx
Normal file
File diff suppressed because it is too large
Load Diff
30803
Samples/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx
Normal file
30803
Samples/cuSolverSp_LinearSolver/lap3D_7pt_n20.mtx
Normal file
File diff suppressed because it is too large
Load Diff
517
Samples/cuSolverSp_LinearSolver/mmio.c
Normal file
517
Samples/cuSolverSp_LinearSolver/mmio.c
Normal file
|
@ -0,0 +1,517 @@
|
|||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */
|
||||
#if defined(_WIN32)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "mmio.h"
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_)
|
||||
{
|
||||
FILE *f;
|
||||
MM_typecode matcode;
|
||||
int M, N, nz;
|
||||
int i;
|
||||
double *val;
|
||||
int *I, *J;
|
||||
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return -1;
|
||||
|
||||
|
||||
if (mm_read_banner(f, &matcode) != 0)
|
||||
{
|
||||
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
|
||||
printf(" in file [%s]\n", fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
|
||||
mm_is_sparse(matcode)))
|
||||
{
|
||||
fprintf(stderr, "Sorry, this application does not support ");
|
||||
fprintf(stderr, "Market Market type: [%s]\n",
|
||||
mm_typecode_to_str(matcode));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* find out size of sparse matrix: M, N, nz .... */
|
||||
|
||||
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
|
||||
{
|
||||
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
*M_ = M;
|
||||
*N_ = N;
|
||||
*nz_ = nz;
|
||||
|
||||
/* reserve memory for matrices */
|
||||
|
||||
I = (int *) malloc(nz * sizeof(int));
|
||||
J = (int *) malloc(nz * sizeof(int));
|
||||
val = (double *) malloc(nz * sizeof(double));
|
||||
|
||||
*val_ = val;
|
||||
*I_ = I;
|
||||
*J_ = J;
|
||||
|
||||
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
|
||||
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
|
||||
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
|
||||
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]) != 3) {
|
||||
return -1;
|
||||
}
|
||||
I[i]--; /* adjust from 1-based to 0-based */
|
||||
J[i]--;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_is_valid(MM_typecode matcode)
|
||||
{
|
||||
if (!mm_is_matrix(matcode)) return 0;
|
||||
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
|
||||
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
|
||||
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
|
||||
mm_is_skew(matcode))) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
char banner[MM_MAX_TOKEN_LENGTH];
|
||||
char mtx[MM_MAX_TOKEN_LENGTH];
|
||||
char crd[MM_MAX_TOKEN_LENGTH];
|
||||
char data_type[MM_MAX_TOKEN_LENGTH];
|
||||
char storage_scheme[MM_MAX_TOKEN_LENGTH];
|
||||
char *p;
|
||||
|
||||
|
||||
mm_clear_typecode(matcode);
|
||||
|
||||
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
|
||||
storage_scheme) != 5)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
|
||||
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
|
||||
|
||||
/* check for banner */
|
||||
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
|
||||
return MM_NO_HEADER;
|
||||
|
||||
/* first field should be "mtx" */
|
||||
if (strcmp(mtx, MM_MTX_STR) != 0)
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
mm_set_matrix(matcode);
|
||||
|
||||
|
||||
/* second field describes whether this is a sparse matrix (in coordinate
|
||||
storage) or a dense array */
|
||||
|
||||
|
||||
if (strcmp(crd, MM_SPARSE_STR) == 0)
|
||||
mm_set_sparse(matcode);
|
||||
else
|
||||
if (strcmp(crd, MM_DENSE_STR) == 0)
|
||||
mm_set_dense(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* third field */
|
||||
|
||||
if (strcmp(data_type, MM_REAL_STR) == 0)
|
||||
mm_set_real(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
|
||||
mm_set_complex(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_PATTERN_STR) == 0)
|
||||
mm_set_pattern(matcode);
|
||||
else
|
||||
if (strcmp(data_type, MM_INT_STR) == 0)
|
||||
mm_set_integer(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* fourth field */
|
||||
|
||||
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
|
||||
mm_set_general(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
|
||||
mm_set_symmetric(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
|
||||
mm_set_hermitian(matcode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
|
||||
mm_set_skew(matcode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
|
||||
{
|
||||
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = *nz = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
|
||||
return 0;
|
||||
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d", M, N) == 2)
|
||||
return 0;
|
||||
|
||||
else /* we have a blank line */
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d", M, N);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 2);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N)
|
||||
{
|
||||
if (fprintf(f, "%d %d\n", M, N) != 2)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------*/
|
||||
|
||||
/******************************************************************/
|
||||
/* use when I[], J[], and val[]J, and val[] are already allocated */
|
||||
/******************************************************************/
|
||||
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
int i;
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode) || mm_is_integer(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
for (i=0; i<nz; i++)
|
||||
if (fscanf(f, "%d %d", &I[i], &J[i])
|
||||
!= 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
|
||||
double *real, double *imag, MM_typecode matcode)
|
||||
{
|
||||
if (mm_is_complex(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
|
||||
!= 4) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else if (mm_is_real(matcode) || mm_is_integer(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d %lg\n", I, J, real)
|
||||
!= 3) return MM_PREMATURE_EOF;
|
||||
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(matcode))
|
||||
{
|
||||
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
|
||||
}
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/************************************************************************
|
||||
mm_read_mtx_crd() fills M, N, nz, array of values, and return
|
||||
type code, e.g. 'MCRS'
|
||||
|
||||
if matrix is complex, values[] is of size 2*nz,
|
||||
(nz pairs of real/imaginary values)
|
||||
************************************************************************/
|
||||
|
||||
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
|
||||
double **val, MM_typecode *matcode)
|
||||
{
|
||||
int ret_code;
|
||||
FILE *f;
|
||||
|
||||
if (strcmp(fname, "stdin") == 0) f=stdin;
|
||||
else
|
||||
if ((f = fopen(fname, "r")) == NULL)
|
||||
return MM_COULD_NOT_READ_FILE;
|
||||
|
||||
|
||||
if ((ret_code = mm_read_banner(f, matcode)) != 0)
|
||||
return ret_code;
|
||||
|
||||
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
|
||||
mm_is_matrix(*matcode)))
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
|
||||
return ret_code;
|
||||
|
||||
|
||||
*I = (int *) malloc(*nz * sizeof(int));
|
||||
*J = (int *) malloc(*nz * sizeof(int));
|
||||
*val = NULL;
|
||||
|
||||
if (mm_is_complex(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * 2 * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
else if (mm_is_real(*matcode) || mm_is_integer(*matcode))
|
||||
{
|
||||
*val = (double *) malloc(*nz * sizeof(double));
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
else if (mm_is_pattern(*matcode))
|
||||
{
|
||||
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
|
||||
*matcode);
|
||||
if (ret_code != 0) return ret_code;
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode)
|
||||
{
|
||||
char *str = mm_typecode_to_str(matcode);
|
||||
int ret_code;
|
||||
|
||||
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
|
||||
free(str);
|
||||
if (ret_code !=2 )
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode)
|
||||
{
|
||||
FILE *f;
|
||||
int i;
|
||||
|
||||
if (strcmp(fname, "stdout") == 0)
|
||||
f = stdout;
|
||||
else
|
||||
if ((f = fopen(fname, "w")) == NULL)
|
||||
return MM_COULD_NOT_WRITE_FILE;
|
||||
|
||||
/* print banner followed by typecode */
|
||||
fprintf(f, "%s ", MatrixMarketBanner);
|
||||
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
|
||||
|
||||
/* print matrix sizes and nonzeros */
|
||||
fprintf(f, "%d %d %d\n", M, N, nz);
|
||||
|
||||
/* print values */
|
||||
if (mm_is_pattern(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d\n", I[i], J[i]);
|
||||
else
|
||||
if (mm_is_integer(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %d\n", I[i], J[i], (int)val[i]);
|
||||
else
|
||||
if (mm_is_real(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
for (i=0; i<nz; i++)
|
||||
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
|
||||
val[2*i+1]);
|
||||
else
|
||||
{
|
||||
if (f != stdout) fclose(f);
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
}
|
||||
|
||||
if (f !=stdout) fclose(f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new copy of a string s. mm_strdup() is a common routine, but
|
||||
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
|
||||
*
|
||||
*/
|
||||
static char *mm_strdup(const char *s)
|
||||
{
|
||||
size_t len = strlen(s);
|
||||
char *s2 = (char *) malloc((len+1)*sizeof(char));
|
||||
return strcpy(s2, s);
|
||||
}
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode)
|
||||
{
|
||||
char buffer[MM_MAX_LINE_LENGTH];
|
||||
char *types[4];
|
||||
//char *mm_strdup(const char *);
|
||||
//int error =0;
|
||||
|
||||
/* check for MTX type */
|
||||
if (mm_is_matrix(matcode))
|
||||
types[0] = MM_MTX_STR;
|
||||
else
|
||||
return NULL; // error=1;
|
||||
|
||||
/* check for CRD or ARR matrix */
|
||||
if (mm_is_sparse(matcode))
|
||||
types[1] = MM_SPARSE_STR;
|
||||
else
|
||||
if (mm_is_dense(matcode))
|
||||
types[1] = MM_DENSE_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
/* check for element data type */
|
||||
if (mm_is_real(matcode))
|
||||
types[2] = MM_REAL_STR;
|
||||
else
|
||||
if (mm_is_complex(matcode))
|
||||
types[2] = MM_COMPLEX_STR;
|
||||
else
|
||||
if (mm_is_pattern(matcode))
|
||||
types[2] = MM_PATTERN_STR;
|
||||
else
|
||||
if (mm_is_integer(matcode))
|
||||
types[2] = MM_INT_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
|
||||
/* check for symmetry type */
|
||||
if (mm_is_general(matcode))
|
||||
types[3] = MM_GENERAL_STR;
|
||||
else
|
||||
if (mm_is_symmetric(matcode))
|
||||
types[3] = MM_SYMM_STR;
|
||||
else
|
||||
if (mm_is_hermitian(matcode))
|
||||
types[3] = MM_HERM_STR;
|
||||
else
|
||||
if (mm_is_skew(matcode))
|
||||
types[3] = MM_SKEW_STR;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
|
||||
return mm_strdup(buffer);
|
||||
}
|
141
Samples/cuSolverSp_LinearSolver/mmio.h
Normal file
141
Samples/cuSolverSp_LinearSolver/mmio.h
Normal file
|
@ -0,0 +1,141 @@
|
|||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MM_IO_H
|
||||
#define MM_IO_H
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
|
||||
typedef char MM_typecode[4];
|
||||
|
||||
char *mm_typecode_to_str(MM_typecode matcode);
|
||||
|
||||
int mm_read_banner(FILE *f, MM_typecode *matcode);
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
|
||||
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
|
||||
|
||||
int mm_write_banner(FILE *f, MM_typecode matcode);
|
||||
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
|
||||
int mm_write_mtx_array_size(FILE *f, int M, int N);
|
||||
|
||||
|
||||
/********************* MM_typecode query fucntions ***************************/
|
||||
|
||||
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
|
||||
|
||||
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
|
||||
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
|
||||
#define mm_is_dense(typecode) ((typecode)[1]=='A')
|
||||
#define mm_is_array(typecode) ((typecode)[1]=='A')
|
||||
|
||||
#define mm_is_complex(typecode) ((typecode)[2]=='C')
|
||||
#define mm_is_real(typecode) ((typecode)[2]=='R')
|
||||
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
|
||||
#define mm_is_integer(typecode) ((typecode)[2]=='I')
|
||||
|
||||
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
|
||||
#define mm_is_general(typecode) ((typecode)[3]=='G')
|
||||
#define mm_is_skew(typecode) ((typecode)[3]=='K')
|
||||
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
|
||||
|
||||
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
|
||||
|
||||
|
||||
/********************* MM_typecode modify fucntions ***************************/
|
||||
|
||||
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
|
||||
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
|
||||
#define mm_set_array(typecode) ((*typecode)[1]='A')
|
||||
#define mm_set_dense(typecode) mm_set_array(typecode)
|
||||
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
|
||||
|
||||
#define mm_set_complex(typecode)((*typecode)[2]='C')
|
||||
#define mm_set_real(typecode) ((*typecode)[2]='R')
|
||||
#define mm_set_pattern(typecode)((*typecode)[2]='P')
|
||||
#define mm_set_integer(typecode)((*typecode)[2]='I')
|
||||
|
||||
|
||||
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
|
||||
#define mm_set_general(typecode)((*typecode)[3]='G')
|
||||
#define mm_set_skew(typecode) ((*typecode)[3]='K')
|
||||
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
|
||||
|
||||
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
|
||||
(*typecode)[2]=' ',(*typecode)[3]='G')
|
||||
|
||||
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
|
||||
|
||||
|
||||
/********************* Matrix Market error codes ***************************/
|
||||
|
||||
|
||||
#define MM_COULD_NOT_READ_FILE 11
|
||||
#define MM_PREMATURE_EOF 12
|
||||
#define MM_NOT_MTX 13
|
||||
#define MM_NO_HEADER 14
|
||||
#define MM_UNSUPPORTED_TYPE 15
|
||||
#define MM_LINE_TOO_LONG 16
|
||||
#define MM_COULD_NOT_WRITE_FILE 17
|
||||
|
||||
|
||||
/******************** Matrix Market internal definitions ********************
|
||||
|
||||
MM_matrix_typecode: 4-character sequence
|
||||
|
||||
ojbect sparse/ data storage
|
||||
dense type scheme
|
||||
|
||||
string position: [0] [1] [2] [3]
|
||||
|
||||
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
|
||||
A(array) C(omplex) H(ermitian)
|
||||
P(attern) S(ymmetric)
|
||||
I(nteger) K(kew)
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
#define MM_MTX_STR "matrix"
|
||||
#define MM_ARRAY_STR "array"
|
||||
#define MM_DENSE_STR "array"
|
||||
#define MM_COORDINATE_STR "coordinate"
|
||||
#define MM_SPARSE_STR "coordinate"
|
||||
#define MM_COMPLEX_STR "complex"
|
||||
#define MM_REAL_STR "real"
|
||||
#define MM_INT_STR "integer"
|
||||
#define MM_GENERAL_STR "general"
|
||||
#define MM_SYMM_STR "symmetric"
|
||||
#define MM_HERM_STR "hermitian"
|
||||
#define MM_SKEW_STR "skew-symmetric"
|
||||
#define MM_PATTERN_STR "pattern"
|
||||
|
||||
|
||||
/* high level routines */
|
||||
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
|
||||
double **val, MM_typecode *matcode);
|
||||
|
||||
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
|
||||
double val[], MM_typecode matcode);
|
||||
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
|
||||
MM_typecode matcode);
|
||||
|
||||
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
|
||||
double **val_, int **I_, int **J_);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif
|
529
Samples/cuSolverSp_LinearSolver/mmio_wrapper.cpp
Normal file
529
Samples/cuSolverSp_LinearSolver/mmio_wrapper.cpp
Normal file
|
@ -0,0 +1,529 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "mmio.h"
|
||||
|
||||
#include <cusolverDn.h>
|
||||
|
||||
/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */
|
||||
#if defined(_WIN32)
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
/* various __inline__ __device__ function to initialize a T_ELEM */
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (int );
|
||||
template <> __inline__ float cuGet<float >(int x)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(int x)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(int x)
|
||||
{
|
||||
return (make_cuComplex( float(x), 0.0f ));
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), 0.0 ));
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (int , int );
|
||||
template <> __inline__ float cuGet<float >(int x, int y)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(int x, int y)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(int x, int y)
|
||||
{
|
||||
return make_cuComplex( float(x), float(y) );
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x, int y)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), double(y) ));
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (float );
|
||||
template <> __inline__ float cuGet<float >(float x)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(float x)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(float x)
|
||||
{
|
||||
return (make_cuComplex( float(x), 0.0f ));
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), 0.0 ));
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (float, float );
|
||||
template <> __inline__ float cuGet<float >(float x, float y)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(float x, float y)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(float x, float y)
|
||||
{
|
||||
return (make_cuComplex( float(x), float(y) ));
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x, float y)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), double(y) ));
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (double );
|
||||
template <> __inline__ float cuGet<float >(double x)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(double x)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(double x)
|
||||
{
|
||||
return (make_cuComplex( float(x), 0.0f ));
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), 0.0 ));
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM> __inline__ T_ELEM cuGet (double, double );
|
||||
template <> __inline__ float cuGet<float >(double x, double y)
|
||||
{
|
||||
return float(x);
|
||||
}
|
||||
|
||||
template <> __inline__ double cuGet<double>(double x, double y)
|
||||
{
|
||||
return double(x);
|
||||
}
|
||||
|
||||
template <> __inline__ cuComplex cuGet<cuComplex>(double x, double y)
|
||||
{
|
||||
return (make_cuComplex( float(x), float(y) ));
|
||||
}
|
||||
|
||||
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x, double y)
|
||||
{
|
||||
return (make_cuDoubleComplex( double(x), double(y) ));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static void compress_index(
|
||||
const int *Ind,
|
||||
int nnz,
|
||||
int m,
|
||||
int *Ptr,
|
||||
int base)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* initialize everything to zero */
|
||||
for(i=0; i<m+1; i++){
|
||||
Ptr[i]=0;
|
||||
}
|
||||
/* count elements in every row */
|
||||
Ptr[0]=base;
|
||||
for(i=0; i<nnz; i++){
|
||||
Ptr[Ind[i]+(1-base)]++;
|
||||
}
|
||||
/* add all the values */
|
||||
for(i=0; i<m; i++){
|
||||
Ptr[i+1]+=Ptr[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct cooFormat {
|
||||
int i ;
|
||||
int j ;
|
||||
int p ; // permutation
|
||||
};
|
||||
|
||||
|
||||
int cmp_cooFormat_csr( struct cooFormat *s, struct cooFormat *t)
|
||||
{
|
||||
if ( s->i < t->i ){
|
||||
return -1 ;
|
||||
}
|
||||
else if ( s->i > t->i ){
|
||||
return 1 ;
|
||||
}
|
||||
else{
|
||||
return s->j - t->j ;
|
||||
}
|
||||
}
|
||||
|
||||
int cmp_cooFormat_csc( struct cooFormat *s, struct cooFormat *t)
|
||||
{
|
||||
if ( s->j < t->j ){
|
||||
return -1 ;
|
||||
}
|
||||
else if ( s->j > t->j ){
|
||||
return 1 ;
|
||||
}
|
||||
else{
|
||||
return s->i - t->i ;
|
||||
}
|
||||
}
|
||||
|
||||
typedef int (*FUNPTR) (const void*, const void*) ;
|
||||
typedef int (*FUNPTR2) ( struct cooFormat *s, struct cooFormat *t) ;
|
||||
|
||||
static FUNPTR2 fptr_array[2] = {
|
||||
cmp_cooFormat_csr,
|
||||
cmp_cooFormat_csc,
|
||||
};
|
||||
|
||||
|
||||
static int verify_pattern(
|
||||
int m,
|
||||
int nnz,
|
||||
int *csrRowPtr,
|
||||
int *csrColInd)
|
||||
{
|
||||
int i, col, start, end, base_index;
|
||||
int error_found = 0;
|
||||
|
||||
if (nnz != (csrRowPtr[m] - csrRowPtr[0])){
|
||||
fprintf(stderr, "Error (nnz check failed): (csrRowPtr[%d]=%d - csrRowPtr[%d]=%d) != (nnz=%d)\n", 0, csrRowPtr[0], m, csrRowPtr[m], nnz);
|
||||
error_found = 1;
|
||||
}
|
||||
|
||||
base_index = csrRowPtr[0];
|
||||
if ((0 != base_index) && (1 != base_index)){
|
||||
fprintf(stderr, "Error (base index check failed): base index = %d\n", base_index);
|
||||
error_found = 1;
|
||||
}
|
||||
|
||||
for (i=0; (!error_found) && (i<m); i++){
|
||||
start = csrRowPtr[i ] - base_index;
|
||||
end = csrRowPtr[i+1] - base_index;
|
||||
if (start > end){
|
||||
fprintf(stderr, "Error (corrupted row): csrRowPtr[%d] (=%d) > csrRowPtr[%d] (=%d)\n", i, start+base_index, i+1, end+base_index);
|
||||
error_found = 1;
|
||||
}
|
||||
for (col=start; col<end; col++){
|
||||
if (csrColInd[col] < base_index){
|
||||
fprintf(stderr, "Error (column vs. base index check failed): csrColInd[%d] < %d\n", col, base_index);
|
||||
error_found = 1;
|
||||
}
|
||||
if ((col < (end-1)) && (csrColInd[col] >= csrColInd[col+1])){
|
||||
fprintf(stderr, "Error (sorting of the column indecis check failed): (csrColInd[%d]=%d) >= (csrColInd[%d]=%d)\n", col, csrColInd[col], col+1, csrColInd[col+1]);
|
||||
error_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return error_found ;
|
||||
}
|
||||
|
||||
|
||||
template <typename T_ELEM>
|
||||
int loadMMSparseMatrix(
|
||||
char *filename,
|
||||
char elem_type,
|
||||
bool csrFormat,
|
||||
int *m,
|
||||
int *n,
|
||||
int *nnz,
|
||||
T_ELEM **aVal,
|
||||
int **aRowInd,
|
||||
int **aColInd,
|
||||
int extendSymMatrix)
|
||||
{
|
||||
MM_typecode matcode;
|
||||
double *tempVal;
|
||||
int *tempRowInd,*tempColInd;
|
||||
double *tval;
|
||||
int *trow,*tcol;
|
||||
int *csrRowPtr, *cscColPtr;
|
||||
int i,j,error,base,count;
|
||||
struct cooFormat *work;
|
||||
|
||||
/* read the matrix */
|
||||
error = mm_read_mtx_crd(filename, m, n, nnz, &trow, &tcol, &tval, &matcode);
|
||||
if (error) {
|
||||
fprintf(stderr, "!!!! can not open file: '%s'\n", filename);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* start error checking */
|
||||
if (mm_is_complex(matcode) && ((elem_type != 'z') && (elem_type != 'c'))) {
|
||||
fprintf(stderr, "!!!! complex matrix requires type 'z' or 'c'\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mm_is_dense(matcode) || mm_is_array(matcode) || mm_is_pattern(matcode) /*|| mm_is_integer(matcode)*/){
|
||||
fprintf(stderr, "!!!! dense, array, pattern and integer matrices are not supported\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* if necessary symmetrize the pattern (transform from triangular to full) */
|
||||
if ((extendSymMatrix) && (mm_is_symmetric(matcode) || mm_is_hermitian(matcode) || mm_is_skew(matcode))){
|
||||
//count number of non-diagonal elements
|
||||
count=0;
|
||||
for(i=0; i<(*nnz); i++){
|
||||
if (trow[i] != tcol[i]){
|
||||
count++;
|
||||
}
|
||||
}
|
||||
//allocate space for the symmetrized matrix
|
||||
tempRowInd = (int *)malloc((*nnz + count) * sizeof(int));
|
||||
tempColInd = (int *)malloc((*nnz + count) * sizeof(int));
|
||||
if (mm_is_real(matcode) || mm_is_integer(matcode)){
|
||||
tempVal = (double *)malloc((*nnz + count) * sizeof(double));
|
||||
}
|
||||
else{
|
||||
tempVal = (double *)malloc(2 * (*nnz + count) * sizeof(double));
|
||||
}
|
||||
//copy the elements regular and transposed locations
|
||||
for(j=0, i=0; i<(*nnz); i++){
|
||||
tempRowInd[j]=trow[i];
|
||||
tempColInd[j]=tcol[i];
|
||||
if (mm_is_real(matcode) || mm_is_integer(matcode)){
|
||||
tempVal[j]=tval[i];
|
||||
}
|
||||
else{
|
||||
tempVal[2*j] =tval[2*i];
|
||||
tempVal[2*j+1]=tval[2*i+1];
|
||||
}
|
||||
j++;
|
||||
if (trow[i] != tcol[i]){
|
||||
tempRowInd[j]=tcol[i];
|
||||
tempColInd[j]=trow[i];
|
||||
if (mm_is_real(matcode) || mm_is_integer(matcode)){
|
||||
if (mm_is_skew(matcode)){
|
||||
tempVal[j]=-tval[i];
|
||||
}
|
||||
else{
|
||||
tempVal[j]= tval[i];
|
||||
}
|
||||
}
|
||||
else{
|
||||
if(mm_is_hermitian(matcode)){
|
||||
tempVal[2*j] = tval[2*i];
|
||||
tempVal[2*j+1]=-tval[2*i+1];
|
||||
}
|
||||
else{
|
||||
tempVal[2*j] = tval[2*i];
|
||||
tempVal[2*j+1]= tval[2*i+1];
|
||||
}
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
(*nnz)+=count;
|
||||
//free temporary storage
|
||||
free(trow);
|
||||
free(tcol);
|
||||
free(tval);
|
||||
}
|
||||
else{
|
||||
tempRowInd=trow;
|
||||
tempColInd=tcol;
|
||||
tempVal =tval;
|
||||
}
|
||||
// life time of (trow, tcol, tval) is over.
|
||||
// please use COO format (tempRowInd, tempColInd, tempVal)
|
||||
|
||||
// use qsort to sort COO format
|
||||
work = (struct cooFormat *)malloc(sizeof(struct cooFormat)*(*nnz));
|
||||
if (NULL == work){
|
||||
fprintf(stderr, "!!!! allocation error, malloc failed\n");
|
||||
return 1;
|
||||
}
|
||||
for(i=0; i<(*nnz); i++){
|
||||
work[i].i = tempRowInd[i];
|
||||
work[i].j = tempColInd[i];
|
||||
work[i].p = i; // permutation is identity
|
||||
}
|
||||
|
||||
if (csrFormat){
|
||||
/* create row-major ordering of indices (sorted by row and within each row by column) */
|
||||
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[0] );
|
||||
}else{
|
||||
/* create column-major ordering of indices (sorted by column and within each column by row) */
|
||||
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[1] );
|
||||
|
||||
}
|
||||
|
||||
// (tempRowInd, tempColInd) is sorted either by row-major or by col-major
|
||||
for(i=0; i<(*nnz); i++){
|
||||
tempRowInd[i] = work[i].i;
|
||||
tempColInd[i] = work[i].j;
|
||||
}
|
||||
|
||||
// setup base
|
||||
// check if there is any row/col 0, if so base-0
|
||||
// check if there is any row/col equal to matrix dimension m/n, if so base-1
|
||||
int base0 = 0;
|
||||
int base1 = 0;
|
||||
for(i=0; i<(*nnz); i++){
|
||||
const int row = tempRowInd[i];
|
||||
const int col = tempColInd[i];
|
||||
if ( (0 == row) || (0 == col) ){
|
||||
base0 = 1;
|
||||
}
|
||||
if ( (*m == row) || (*n == col) ){
|
||||
base1 = 1;
|
||||
}
|
||||
}
|
||||
if ( base0 && base1 ){
|
||||
printf("Error: input matrix is base-0 and base-1 \n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
base = 0;
|
||||
if (base1){
|
||||
base = 1;
|
||||
}
|
||||
|
||||
/* compress the appropriate indices */
|
||||
if (csrFormat){
|
||||
/* CSR format (assuming row-major format) */
|
||||
csrRowPtr = (int *)malloc(((*m)+1) * sizeof(csrRowPtr[0]));
|
||||
if (!csrRowPtr) return 1;
|
||||
compress_index(tempRowInd, *nnz, *m, csrRowPtr, base);
|
||||
|
||||
*aRowInd = csrRowPtr;
|
||||
*aColInd = (int *)malloc((*nnz) * sizeof(int));
|
||||
}
|
||||
else {
|
||||
/* CSC format (assuming column-major format) */
|
||||
cscColPtr = (int *)malloc(((*n)+1) * sizeof(cscColPtr[0]));
|
||||
if (!cscColPtr) return 1;
|
||||
compress_index(tempColInd, *nnz, *n, cscColPtr, base);
|
||||
|
||||
*aColInd = cscColPtr;
|
||||
*aRowInd = (int *)malloc((*nnz) * sizeof(int));
|
||||
}
|
||||
|
||||
/* transfrom the matrix values of type double into one of the cusparse library types */
|
||||
*aVal = (T_ELEM *)malloc((*nnz) * sizeof(T_ELEM));
|
||||
|
||||
for (i=0; i<(*nnz); i++) {
|
||||
if (csrFormat){
|
||||
(*aColInd)[i] = tempColInd[i];
|
||||
}
|
||||
else{
|
||||
(*aRowInd)[i] = tempRowInd[i];
|
||||
}
|
||||
if (mm_is_real(matcode) || mm_is_integer(matcode)){
|
||||
(*aVal)[i] = cuGet<T_ELEM>( tempVal[ work[i].p ] );
|
||||
}
|
||||
else{
|
||||
(*aVal)[i] = cuGet<T_ELEM>(tempVal[2*work[i].p], tempVal[2*work[i].p+1]);
|
||||
}
|
||||
}
|
||||
|
||||
/* check for corruption */
|
||||
int error_found;
|
||||
if (csrFormat){
|
||||
error_found = verify_pattern(*m, *nnz, *aRowInd, *aColInd);
|
||||
}else{
|
||||
error_found = verify_pattern(*n, *nnz, *aColInd, *aRowInd);
|
||||
}
|
||||
if (error_found){
|
||||
fprintf(stderr, "!!!! verify_pattern failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* cleanup and exit */
|
||||
free(work);
|
||||
free(tempVal);
|
||||
free(tempColInd);
|
||||
free(tempRowInd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* specific instantiation */
|
||||
template int loadMMSparseMatrix<float>(
|
||||
char *filename,
|
||||
char elem_type,
|
||||
bool csrFormat,
|
||||
int *m,
|
||||
int *n,
|
||||
int *nnz,
|
||||
float **aVal,
|
||||
int **aRowInd,
|
||||
int **aColInd,
|
||||
int extendSymMatrix);
|
||||
|
||||
template int loadMMSparseMatrix<double>(
|
||||
char *filename,
|
||||
char elem_type,
|
||||
bool csrFormat,
|
||||
int *m,
|
||||
int *n,
|
||||
int *nnz,
|
||||
double **aVal,
|
||||
int **aRowInd,
|
||||
int **aColInd,
|
||||
int extendSymMatrix);
|
||||
|
||||
template int loadMMSparseMatrix<cuComplex>(
|
||||
char *filename,
|
||||
char elem_type,
|
||||
bool csrFormat,
|
||||
int *m,
|
||||
int *n,
|
||||
int *nnz,
|
||||
cuComplex **aVal,
|
||||
int **aRowInd,
|
||||
int **aColInd,
|
||||
int extendSymMatrix);
|
||||
|
||||
template int loadMMSparseMatrix<cuDoubleComplex>(
|
||||
char *filename,
|
||||
char elem_type,
|
||||
bool csrFormat,
|
||||
int *m,
|
||||
int *n,
|
||||
int *nnz,
|
||||
cuDoubleComplex **aVal,
|
||||
int **aRowInd,
|
||||
int **aColInd,
|
||||
int extendSymMatrix);
|
||||
|
||||
|
407
Samples/cudaNvSci/Makefile
Normal file
407
Samples/cudaNvSci/Makefile
Normal file
|
@ -0,0 +1,407 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),qnx)
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM
|
||||
LDFLAGS += -lsocket
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - cudaNvSci is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on ARMv7
|
||||
ifeq ($(TARGET_ARCH),armv7l)
|
||||
$(info >>> WARNING - cudaNvSci is not supported on ARMv7 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on QNX
|
||||
ifeq ($(TARGET_OS),qnx)
|
||||
$(info >>> WARNING - cudaNvSci is not supported on QNX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
#Detect if installed version of GCC supports C++11
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
empty :=
|
||||
space := $(empty) $(empty)
|
||||
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
|
||||
#Create version number without "."
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
|
||||
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
|
||||
# Make sure the version number has at least 3 decimals
|
||||
GCCVERSION += 00
|
||||
# Remove spaces from the version number
|
||||
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
|
||||
# Crop the version number to 3 decimals.
|
||||
GCCVERSION := $(shell expr `echo $(GCCVERSION)` | cut -b1-3)
|
||||
#$(warning $(GCCVERSION))
|
||||
|
||||
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 470)
|
||||
|
||||
ifeq ($(IS_MIN_VERSION), 1)
|
||||
$(info >>> GCC Version is greater or equal to 4.7.0 <<<)
|
||||
else
|
||||
$(info >>> Waiving build. Minimum GCC version required for C++11 is 4.7.0 <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 30 35 37 50 52 60 61 70 72 75
|
||||
else
|
||||
SMS ?= 30 35 37 50 52 60 61 70 75
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
||||
else
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
||||
endif
|
||||
|
||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
||||
ifeq ("$(CUDALIB)","")
|
||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
else
|
||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += --std=c++11
|
||||
|
||||
LIBRARIES += -lnvscibuf -lnvscisync
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: cudaNvSci
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
cudaNvSci.o:cudaNvSci.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
imageKernels.o:imageKernels.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
main.o:main.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
cudaNvSci: cudaNvSci.o imageKernels.o main.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./cudaNvSci
|
||||
|
||||
clean:
|
||||
rm -f cudaNvSci cudaNvSci.o imageKernels.o main.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cudaNvSci
|
||||
|
||||
clobber: clean
|
70
Samples/cudaNvSci/NsightEclipse.xml
Normal file
70
Samples/cudaNvSci/NsightEclipse.xml
Normal file
|
@ -0,0 +1,70 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>cudaNvSci</name>
|
||||
<cflags>
|
||||
<flag>--std=c++11</flag>
|
||||
</cflags>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaImportExternalMemory</toolkit>
|
||||
<toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
|
||||
<toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
|
||||
<toolkit>cudaImportExternalSemaphore</toolkit>
|
||||
<toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
|
||||
<toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
|
||||
<toolkit>cudaDestroyExternalSemaphore</toolkit>
|
||||
<toolkit>cudaDestroyExternalMemory</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread & rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../common/inc</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">CUDA NvSci Interop</concept>
|
||||
<concept level="advanced">Data Parallel Algorithms</concept>
|
||||
<concept level="advanced">Image Processing</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>CPP11</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library os="linux">cuda</library>
|
||||
<library framework="true" os="macosx">CUDA</library>
|
||||
<library>nvscibuf</library>
|
||||
<library>nvscisync</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>main.cpp</primary_file>
|
||||
<required_dependencies>
|
||||
<dependency>NVSCI</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Advanced Topics</scope>
|
||||
<scope>1:CUDA NvSciBuf/NvSciSync Interop</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<platform>aarch64</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<from>6.0</from>
|
||||
</supported_sm_architectures>
|
||||
<title>CUDA NvSciBuf/NvSciSync Interop</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
64
Samples/cudaNvSci/README.md
Normal file
64
Samples/cudaNvSci/README.md
Normal file
|
@ -0,0 +1,64 @@
|
|||
# cudaNvSci - CUDA NvSciBuf/NvSciSync Interop
|
||||
|
||||
## Description
|
||||
|
||||
This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread & rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
|
||||
|
||||
## Key Concepts
|
||||
|
||||
CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, aarch64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaExternalMemoryGetMappedMipmappedArray, cudaImportExternalSemaphore, cudaSignalExternalSemaphoresAsync, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalSemaphore, cudaDestroyExternalMemory
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[NVSCI](../../README.md#nvsci)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, aarch64.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=aarch64` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user