Add and update samples for cuda 10.2 support

This commit is contained in:
Mahesh Doijade 2019-10-23 19:00:39 +05:30
parent 489d9f7b1f
commit 6be514679b
305 changed files with 78239 additions and 518 deletions

160
Common/dynlink_d3d11.h Normal file
View File

@ -0,0 +1,160 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//--------------------------------------------------------------------------------------
// File: dynlink_d3d11.h
//
// Shortcut macros and functions for using DX objects
//
// Copyright (c) Microsoft Corporation. All rights reserved
//--------------------------------------------------------------------------------------
#ifndef _DYNLINK_D3D11_H_
#define _DYNLINK_D3D11_H_
// Standard Windows includes
#include <windows.h>
#include <initguid.h>
#include <assert.h>
#include <wchar.h>
#include <mmsystem.h>
#include <commctrl.h> // for InitCommonControls()
#include <shellapi.h> // for ExtractIcon()
#include <new.h> // for placement new
#include <shlobj.h>
#include <math.h>
#include <limits.h>
#include <stdio.h>
// CRT's memory leak detection
#if defined(DEBUG) || defined(_DEBUG)
#include <crtdbg.h>
#endif
// Direct3D10 includes
#include <dxgi.h>
#include <d3d11.h>
// #include <..\Samples\C++\Effects11\Inc\d3dx11effect.h>
// XInput includes
#include <xinput.h>
// strsafe.h deprecates old unsecure string functions. If you
// really do not want to it to (not recommended), then uncomment the next line
//#define STRSAFE_NO_DEPRECATE
#ifndef STRSAFE_NO_DEPRECATE
#pragma deprecated("strncpy")
#pragma deprecated("wcsncpy")
#pragma deprecated("_tcsncpy")
#pragma deprecated("wcsncat")
#pragma deprecated("strncat")
#pragma deprecated("_tcsncat")
#endif
#pragma warning( disable : 4996 ) // disable deprecated warning
#include <strsafe.h>
#pragma warning( default : 4996 )
typedef HRESULT(WINAPI *LPCREATEDXGIFACTORY)(REFIID, void **);
typedef HRESULT(WINAPI *LPD3D11CREATEDEVICEANDSWAPCHAIN)(__in_opt IDXGIAdapter *pAdapter, D3D_DRIVER_TYPE DriverType, HMODULE Software, UINT Flags, __in_ecount_opt(FeatureLevels) CONST D3D_FEATURE_LEVEL *pFeatureLevels, UINT FeatureLevels, UINT SDKVersion, __in_opt CONST DXGI_SWAP_CHAIN_DESC *pSwapChainDesc, __out_opt IDXGISwapChain **ppSwapChain, __out_opt ID3D11Device **ppDevice, __out_opt D3D_FEATURE_LEVEL *pFeatureLevel, __out_opt ID3D11DeviceContext **ppImmediateContext);
typedef HRESULT(WINAPI *LPD3D11CREATEDEVICE)(IDXGIAdapter *, D3D_DRIVER_TYPE, HMODULE, UINT32, D3D_FEATURE_LEVEL *, UINT, UINT32, ID3D11Device **, D3D_FEATURE_LEVEL *, ID3D11DeviceContext **);
static HMODULE s_hModDXGI = NULL;
static LPCREATEDXGIFACTORY sFnPtr_CreateDXGIFactory = NULL;
static HMODULE s_hModD3D11 = NULL;
static LPD3D11CREATEDEVICE sFnPtr_D3D11CreateDevice = NULL;
static LPD3D11CREATEDEVICEANDSWAPCHAIN sFnPtr_D3D11CreateDeviceAndSwapChain = NULL;
// unload the D3D10 DLLs
static bool dynlinkUnloadD3D11API(void)
{
if (s_hModDXGI)
{
FreeLibrary(s_hModDXGI);
s_hModDXGI = NULL;
}
if (s_hModD3D11)
{
FreeLibrary(s_hModD3D11);
s_hModD3D11 = NULL;
}
return true;
}
// Dynamically load the D3D11 DLLs loaded and map the function pointers
static bool dynlinkLoadD3D11API(void)
{
// If both modules are non-NULL, this function has already been called. Note
// that this doesn't guarantee that all ProcAddresses were found.
if (s_hModD3D11 != NULL && s_hModDXGI != NULL)
{
return true;
}
#if 1
// This may fail if Direct3D 11 isn't installed
s_hModD3D11 = LoadLibrary("d3d11.dll");
if (s_hModD3D11 != NULL)
{
sFnPtr_D3D11CreateDevice = (LPD3D11CREATEDEVICE)GetProcAddress(s_hModD3D11, "D3D11CreateDevice");
sFnPtr_D3D11CreateDeviceAndSwapChain = (LPD3D11CREATEDEVICEANDSWAPCHAIN)GetProcAddress(s_hModD3D11, "D3D11CreateDeviceAndSwapChain");
}
else
{
printf("\nLoad d3d11.dll failed\n");
fflush(0);
}
if (!sFnPtr_CreateDXGIFactory)
{
s_hModDXGI = LoadLibrary("dxgi.dll");
if (s_hModDXGI)
{
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY)GetProcAddress(s_hModDXGI, "CreateDXGIFactory1");
}
return (s_hModDXGI != NULL) && (s_hModD3D11 != NULL);
}
return (s_hModD3D11 != NULL);
#else
sFnPtr_D3D11CreateDevice = (LPD3D11CREATEDEVICE)D3D11CreateDeviceAndSwapChain;
sFnPtr_D3D11CreateDeviceAndSwapChain = (LPD3D11CREATEDEVICEANDSWAPCHAIN)D3D11CreateDeviceAndSwapChain;
//sFnPtr_D3DX11CreateEffectFromMemory = ( LPD3DX11CREATEEFFECTFROMMEMORY )D3DX11CreateEffectFromMemory;
sFnPtr_D3DX11CompileFromMemory = (LPD3DX11COMPILEFROMMEMORY)D3DX11CompileFromMemory;
sFnPtr_CreateDXGIFactory = (LPCREATEDXGIFACTORY)CreateDXGIFactory;
return true;
#endif
return true;
}
#endif

View File

@ -579,24 +579,12 @@ static const char *_cudaGetErrorEnum(NppStatus error) {
}
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template <typename T>
void check(T result, char const *const func, const char *const file,
int const line) {
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
@ -619,7 +607,6 @@ inline void __getLastCudaError(const char *errorMessage, const char *file,
" %s : (%d) %s.\n",
file, line, errorMessage, static_cast<int>(err),
cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}
@ -696,6 +683,50 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
return nGpuArchCoresPerSM[index - 1].Cores;
}
inline const char* _ConvertSMVer2ArchName(int major, int minor) {
// Defines for GPU Architecture types (using the SM version to determine
// the GPU Arch name)
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version,
// and m = SM minor version
const char* name;
} sSMtoArchName;
sSMtoArchName nGpuArchNameSM[] = {
{0x30, "Kepler"},
{0x32, "Kepler"},
{0x35, "Kepler"},
{0x37, "Kepler"},
{0x50, "Maxwell"},
{0x52, "Maxwell"},
{0x53, "Maxwell"},
{0x60, "Pascal"},
{0x61, "Pascal"},
{0x62, "Pascal"},
{0x70, "Volta"},
{0x72, "Xavier"},
{0x75, "Turing"},
{-1, "Graphics Device"}};
int index = 0;
while (nGpuArchNameSM[index].SM != -1) {
if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
return nGpuArchNameSM[index].name;
}
index++;
}
// If we don't find the values, we default use the previous one
// to run properly
printf(
"MapSMtoArchName for SM %d.%d is undefined."
" Default to use %s\n",
major, minor, nGpuArchNameSM[index - 1].name);
return nGpuArchNameSM[index - 1].name;
}
// end of GPU Architecture definitions
#ifdef __CUDA_RUNTIME_H__
@ -727,23 +758,24 @@ inline int gpuDeviceInit(int devID) {
return -devID;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
if (deviceProp.computeMode == cudaComputeModeProhibited) {
int computeMode = -1, major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
if (computeMode == cudaComputeModeProhibited) {
fprintf(stderr,
"Error: device is running in <Compute Mode "
"Prohibited>, no threads can use cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1) {
if (major < 1) {
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
return devID;
}
@ -756,7 +788,6 @@ inline int gpuGetMaxGflopsDeviceId() {
int devices_prohibited = 0;
uint64_t max_compute_perf = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
@ -770,20 +801,24 @@ inline int gpuGetMaxGflopsDeviceId() {
current_device = 0;
while (current_device < device_count) {
cudaGetDeviceProperties(&deviceProp, current_device);
int computeMode = -1, major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
// If this GPU is not running on Compute Mode prohibited,
// then we can add it to the list
if (deviceProp.computeMode != cudaComputeModeProhibited) {
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
if (computeMode != cudaComputeModeProhibited) {
if (major == 9999 && minor == 9999) {
sm_per_multiproc = 1;
} else {
sm_per_multiproc =
_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
_ConvertSMVer2Cores(major, minor);
}
uint64_t compute_perf = (uint64_t)deviceProp.multiProcessorCount *
sm_per_multiproc * deviceProp.clockRate;
int multiProcessorCount = 0, clockRate = 0;
checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
checkCudaErrors(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device));
uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
if (compute_perf > max_compute_perf) {
max_compute_perf = compute_perf;
@ -808,7 +843,6 @@ inline int gpuGetMaxGflopsDeviceId() {
// Initialization code to find the best CUDA Device
inline int findCudaDevice(int argc, const char **argv) {
cudaDeviceProp deviceProp;
int devID = 0;
// If the command-line has a device number specified, use it
@ -830,9 +864,12 @@ inline int findCudaDevice(int argc, const char **argv) {
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsDeviceId();
checkCudaErrors(cudaSetDevice(devID));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID,
deviceProp.name, deviceProp.major, deviceProp.minor);
int major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
devID, _ConvertSMVer2ArchName(major, minor), major, minor);
}
return devID;
@ -843,7 +880,6 @@ inline int findIntegratedGPU() {
int device_count = 0;
int devices_prohibited = 0;
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0) {
@ -853,17 +889,19 @@ inline int findIntegratedGPU() {
// Find the integrated GPU which is compute capable
while (current_device < device_count) {
cudaGetDeviceProperties(&deviceProp, current_device);
int computeMode = -1, integrated = -1;
checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
// If GPU is integrated and is not running on Compute Mode prohibited,
// then cuda can map to GLES resource
if (deviceProp.integrated &&
(deviceProp.computeMode != cudaComputeModeProhibited)) {
if (integrated && (computeMode != cudaComputeModeProhibited)) {
checkCudaErrors(cudaSetDevice(current_device));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, current_device));
int major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
current_device, deviceProp.name, deviceProp.major,
deviceProp.minor);
current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
return current_device;
} else {
@ -885,19 +923,18 @@ inline int findIntegratedGPU() {
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilities(int major_version, int minor_version) {
cudaDeviceProp deviceProp;
deviceProp.major = 0;
deviceProp.minor = 0;
int dev;
int major = 0, minor = 0;
checkCudaErrors(cudaGetDevice(&dev));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
if ((deviceProp.major > major_version) ||
(deviceProp.major == major_version &&
deviceProp.minor >= minor_version)) {
if ((major > major_version) ||
(major == major_version &&
minor >= minor_version)) {
printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
deviceProp.name, deviceProp.major, deviceProp.minor);
_ConvertSMVer2ArchName(major, minor), major, minor);
return true;
} else {
printf(

View File

@ -29,16 +29,11 @@
#include <cstdlib>
#include <string>
int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info)
{
int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) {
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
info->size = sz;
info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
NULL,
PAGE_READWRITE,
0,
(DWORD)sz,
name);
info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
PAGE_READWRITE, 0, (DWORD)sz, name);
if (info->shmHandle == 0) {
return GetLastError();
}
@ -73,8 +68,7 @@ int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info)
#endif
}
int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info)
{
int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info) {
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
info->size = sz;
@ -106,8 +100,7 @@ int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info)
#endif
}
void sharedMemoryClose(sharedMemoryInfo *info)
{
void sharedMemoryClose(sharedMemoryInfo *info) {
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
if (info->addr) {
UnmapViewOfFile(info->addr);
@ -125,8 +118,7 @@ void sharedMemoryClose(sharedMemoryInfo *info)
#endif
}
int spawnProcess(Process *process, const char *app, char * const *args)
{
int spawnProcess(Process *process, const char *app, char *const *args) {
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
STARTUPINFO si = {0};
BOOL status;
@ -140,7 +132,8 @@ int spawnProcess(Process *process, const char *app, char * const *args)
args++;
}
status = CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0, NULL, NULL, &si, process);
status = CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0,
NULL, NULL, &si, process);
return status ? 0 : GetLastError();
#else
@ -149,16 +142,14 @@ int spawnProcess(Process *process, const char *app, char * const *args)
if (0 > execvp(app, args)) {
return errno;
}
}
else if (*process < 0) {
} else if (*process < 0) {
return errno;
}
return 0;
#endif
}
int waitProcess(Process *process)
{
int waitProcess(Process *process) {
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
DWORD exitCode;
WaitForSingleObject(process->hProcess, INFINITE);
@ -176,3 +167,396 @@ int waitProcess(Process *process)
return WEXITSTATUS(status);
#endif
}
#if defined(__linux__)
int ipcCreateSocket(ipcHandle *&handle, const char *name,
const std::vector<Process> &processes) {
int server_fd;
struct sockaddr_un servaddr;
handle = new ipcHandle;
memset(handle, 0, sizeof(*handle));
handle->socket = -1;
handle->socketName = NULL;
// Creating socket file descriptor
if ((server_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == 0) {
perror("IPC failure: Socket creation failed");
return -1;
}
unlink(name);
bzero(&servaddr, sizeof(servaddr));
servaddr.sun_family = AF_UNIX;
size_t len = strlen(name);
if (len > (sizeof(servaddr.sun_path) - 1)) {
perror("IPC failure: Cannot bind provided name to socket. Name too large");
return -1;
}
strncpy(servaddr.sun_path, name, len);
if (bind(server_fd, (struct sockaddr *)&servaddr, SUN_LEN(&servaddr)) < 0) {
perror("IPC failure: Binding socket failed");
return -1;
}
handle->socketName = new char[strlen(name) + 1];
strcpy(handle->socketName, name);
handle->socket = server_fd;
return 0;
}
int ipcOpenSocket(ipcHandle *&handle) {
int sock = 0;
struct sockaddr_un cliaddr;
handle = new ipcHandle;
memset(handle, 0, sizeof(*handle));
if ((sock = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) {
perror("IPC failure:Socket creation error");
return -1;
}
bzero(&cliaddr, sizeof(cliaddr));
cliaddr.sun_family = AF_UNIX;
char temp[10];
// Create unique name for the socket.
sprintf(temp, "%u", getpid());
strcpy(cliaddr.sun_path, temp);
if (bind(sock, (struct sockaddr *)&cliaddr, sizeof(cliaddr)) < 0) {
perror("IPC failure: Binding socket failed");
return -1;
}
handle->socket = sock;
handle->socketName = new char[strlen(temp) + 1];
strcpy(handle->socketName, temp);
return 0;
}
int ipcCloseSocket(ipcHandle *handle) {
if (!handle) {
return -1;
}
if (handle->socketName) {
unlink(handle->socketName);
delete[] handle->socketName;
}
close(handle->socket);
delete handle;
return 0;
}
int ipcRecvShareableHandle(ipcHandle *handle, ShareableHandle *shHandle) {
struct msghdr msg = {0};
struct iovec iov[1];
struct cmsghdr cm;
// Union to guarantee alignment requirements for control array
union {
struct cmsghdr cm;
char control[CMSG_SPACE(sizeof(int))];
} control_un;
struct cmsghdr *cmptr;
ssize_t n;
int receivedfd;
char dummy_buffer[1];
ssize_t sendResult;
msg.msg_control = control_un.control;
msg.msg_controllen = sizeof(control_un.control);
iov[0].iov_base = (void *)dummy_buffer;
iov[0].iov_len = sizeof(dummy_buffer);
msg.msg_iov = iov;
msg.msg_iovlen = 1;
if ((n = recvmsg(handle->socket, &msg, 0)) <= 0) {
perror("IPC failure: Receiving data over socket failed");
return -1;
}
if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) &&
(cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) {
if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) {
return -1;
}
memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd));
*(int *)shHandle = receivedfd;
} else {
return -1;
}
return 0;
}
int ipcRecvDataFromClient(ipcHandle *serverHandle, void *data, size_t size) {
ssize_t readResult;
struct sockaddr_un cliaddr;
socklen_t len = sizeof(cliaddr);
readResult = recvfrom(serverHandle->socket, data, size, 0,
(struct sockaddr *)&cliaddr, &len);
if (readResult == -1) {
perror("IPC failure: Receiving data over socket failed");
return -1;
}
return 0;
}
int ipcSendDataToServer(ipcHandle *handle, const char *serverName,
const void *data, size_t size) {
ssize_t sendResult;
struct sockaddr_un serveraddr;
bzero(&serveraddr, sizeof(serveraddr));
serveraddr.sun_family = AF_UNIX;
strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1);
sendResult = sendto(handle->socket, data, size, 0,
(struct sockaddr *)&serveraddr, sizeof(serveraddr));
if (sendResult <= 0) {
perror("IPC failure: Sending data over socket failed");
}
return 0;
}
int ipcSendShareableHandle(ipcHandle *handle,
const std::vector<ShareableHandle> &shareableHandles,
Process process, int data) {
struct msghdr msg;
struct iovec iov[1];
union {
struct cmsghdr cm;
char control[CMSG_SPACE(sizeof(int))];
} control_un;
struct cmsghdr *cmptr;
ssize_t readResult;
struct sockaddr_un cliaddr;
socklen_t len = sizeof(cliaddr);
// Construct client address to send this SHareable handle to
bzero(&cliaddr, sizeof(cliaddr));
cliaddr.sun_family = AF_UNIX;
char temp[10];
sprintf(temp, "%u", process);
strcpy(cliaddr.sun_path, temp);
len = sizeof(cliaddr);
// Send corresponding shareable handle to the client
int sendfd = (int)shareableHandles[data];
msg.msg_control = control_un.control;
msg.msg_controllen = sizeof(control_un.control);
cmptr = CMSG_FIRSTHDR(&msg);
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
cmptr->cmsg_level = SOL_SOCKET;
cmptr->cmsg_type = SCM_RIGHTS;
memmove(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd));
msg.msg_name = (void *)&cliaddr;
msg.msg_namelen = sizeof(struct sockaddr_un);
iov[0].iov_base = (void *)"";
iov[0].iov_len = 1;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
ssize_t sendResult = sendmsg(handle->socket, &msg, 0);
if (sendResult <= 0) {
perror("IPC failure: Sending data over socket failed");
return -1;
}
return 0;
}
int ipcSendShareableHandles(
ipcHandle *handle, const std::vector<ShareableHandle> &shareableHandles,
const std::vector<Process> &processes) {
// Send all shareable handles to every single process.
for (int i = 0; i < shareableHandles.size(); i++) {
for (int j = 0; j < processes.size(); j++) {
checkIpcErrors(
ipcSendShareableHandle(handle, shareableHandles, processes[j], i));
}
}
return 0;
}
int ipcRecvShareableHandles(ipcHandle *handle,
std::vector<ShareableHandle> &shareableHandles) {
for (int i = 0; i < shareableHandles.size(); i++) {
checkIpcErrors(ipcRecvShareableHandle(handle, &shareableHandles[i]));
}
return 0;
}
int ipcCloseShareableHandle(ShareableHandle shHandle) {
return close(shHandle);
}
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Generic name to build individual Mailslot names by appending process ids.
LPTSTR SlotName = (LPTSTR)TEXT("\\\\.\\mailslot\\sample_mailslot_");
int ipcCreateSocket(ipcHandle *&handle, const char *name,
const std::vector<Process> &processes) {
handle = new ipcHandle;
handle->hMailslot.resize(processes.size());
// Open Mailslots of all clients and store respective handles.
for (int i = 0; i < handle->hMailslot.size(); ++i) {
std::basic_string<TCHAR> childSlotName(SlotName);
char tempBuf[20];
_itoa_s(processes[i].dwProcessId, tempBuf, 10);
childSlotName += TEXT(tempBuf);
HANDLE hFile =
CreateFile(TEXT(childSlotName.c_str()), GENERIC_WRITE, FILE_SHARE_READ,
(LPSECURITY_ATTRIBUTES)NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL);
if (hFile == INVALID_HANDLE_VALUE) {
printf("IPC failure: Opening Mailslot by CreateFile failed with %d\n",
GetLastError());
return -1;
}
handle->hMailslot[i] = hFile;
}
return 0;
}
int ipcOpenSocket(ipcHandle *&handle) {
handle = new ipcHandle;
HANDLE hSlot;
std::basic_string<TCHAR> clientSlotName(SlotName);
char tempBuf[20];
_itoa_s(GetCurrentProcessId(), tempBuf, 10);
clientSlotName += TEXT(tempBuf);
hSlot = CreateMailslot((LPSTR)clientSlotName.c_str(), 0,
MAILSLOT_WAIT_FOREVER, (LPSECURITY_ATTRIBUTES)NULL);
if (hSlot == INVALID_HANDLE_VALUE) {
printf("IPC failure: CreateMailslot failed for client with %d\n",
GetLastError());
return -1;
}
handle->hMailslot.push_back(hSlot);
return 0;
}
int ipcSendData(HANDLE mailslot, const void *data, size_t sz) {
BOOL result;
DWORD cbWritten;
result = WriteFile(mailslot, data, (DWORD)sz, &cbWritten, (LPOVERLAPPED)NULL);
if (!result) {
printf("IPC failure: WriteFile failed with %d.\n", GetLastError());
return -1;
}
return 0;
}
int ipcRecvData(ipcHandle *handle, void *data, size_t sz) {
DWORD cbMessage, cMessage, cbRead;
BOOL fResult;
cbMessage = cMessage = cbRead = 0;
HANDLE mailslot = handle->hMailslot[0];
pollMailSlot:
fResult = GetMailslotInfo(mailslot, (LPDWORD)NULL, &cbMessage, &cMessage,
(LPDWORD)NULL);
if (!fResult) {
printf("IPC failure: GetMailslotInfo failed with %d.\n", GetLastError());
return -1;
}
if (cbMessage == MAILSLOT_NO_MESSAGE) {
goto pollMailSlot;
}
while (cMessage != 0) {
fResult = ReadFile(mailslot, data, (DWORD)sz, &cbRead, NULL);
if (!fResult) {
printf("IPC failure: ReadFile failed with %d.\n", GetLastError());
return -1;
}
fResult = GetMailslotInfo(mailslot, (LPDWORD)NULL, &cbMessage, &cMessage,
(LPDWORD)NULL);
if (!fResult) {
printf("IPC failure: GetMailslotInfo failed (%d)\n", GetLastError());
return -1;
}
}
return 0;
}
int ipcSendShareableHandles(
ipcHandle *handle, const std::vector<ShareableHandle> &shareableHandles,
const std::vector<Process> &processes) {
// Send all shareable handles to every single process.
for (int i = 0; i < processes.size(); i++) {
HANDLE hProcess =
OpenProcess(PROCESS_DUP_HANDLE, FALSE, processes[i].dwProcessId);
if (hProcess == INVALID_HANDLE_VALUE) {
printf("IPC failure: OpenProcess failed (%d)\n", GetLastError());
return -1;
}
for (int j = 0; j < shareableHandles.size(); j++) {
HANDLE hDup = INVALID_HANDLE_VALUE;
// Duplicate the handle into the target process's space
if (!DuplicateHandle(GetCurrentProcess(), shareableHandles[j], hProcess,
&hDup, 0, FALSE, DUPLICATE_SAME_ACCESS)) {
printf("IPC failure: DuplicateHandle failed (%d)\n", GetLastError());
return -1;
}
checkIpcErrors(ipcSendData(handle->hMailslot[i], &hDup, sizeof(HANDLE)));
}
CloseHandle(hProcess);
}
return 0;
}
int ipcRecvShareableHandles(ipcHandle *handle,
std::vector<ShareableHandle> &shareableHandles) {
for (int i = 0; i < shareableHandles.size(); i++) {
checkIpcErrors(
ipcRecvData(handle, &shareableHandles[i], sizeof(shareableHandles[i])));
}
return 0;
}
int ipcCloseSocket(ipcHandle *handle) {
for (int i = 0; i < handle->hMailslot.size(); i++) {
CloseHandle(handle->hMailslot[i]);
}
delete handle;
return 0;
}
int ipcCloseShareableHandle(ShareableHandle shHandle) {
CloseHandle(shHandle);
return 0;
}
#endif

View File

@ -33,13 +33,26 @@
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <iostream>
#include <stdio.h>
#include <tchar.h>
#include <strsafe.h>
#include <sddl.h>
#include <aclapi.h>
#include <winternl.h>
#else
#include <stdio.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <memory.h>
#include <sys/un.h>
#endif
#include <vector>
typedef struct sharedMemoryInfo_st {
void *addr;
@ -68,4 +81,40 @@ int spawnProcess(Process *process, const char *app, char * const *args);
int waitProcess(Process *process);
#define checkIpcErrors(ipcFuncResult) \
if (ipcFuncResult == -1) { fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); exit(EXIT_FAILURE); }
#if defined(__linux__)
struct ipcHandle_st {
int socket;
char *socketName;
};
typedef int ShareableHandle;
#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
struct ipcHandle_st {
std::vector<HANDLE> hMailslot; // 1 Handle in case of child and `num children` Handles for parent.
};
typedef HANDLE ShareableHandle;
#endif
typedef struct ipcHandle_st ipcHandle;
int
ipcCreateSocket(ipcHandle *&handle, const char *name, const std::vector<Process>& processes);
int
ipcOpenSocket(ipcHandle *&handle);
int
ipcCloseSocket(ipcHandle *handle);
int
ipcRecvShareableHandles(ipcHandle *handle, std::vector<ShareableHandle>& shareableHandles);
int
ipcSendShareableHandles(ipcHandle *handle, const std::vector<ShareableHandle>& shareableHandles, const std::vector<Process>& processes);
int
ipcCloseShareableHandle(ShareableHandle shHandle);
#endif // HELPER_MULTIPROCESS_H

View File

@ -297,6 +297,19 @@ int writeBMPi(const char *filename, const unsigned char *d_RGB, int pitch,
return 0;
}
int inputDirExists(const char *pathname) {
struct stat info;
if (stat(pathname, &info) != 0) {
return 0; // Directory does not exists
} else if (info.st_mode & S_IFDIR) {
// is a directory
return 1;
} else {
// is not a directory
return 0;
}
}
int readInput(const std::string &sInputPath,
std::vector<std::string> &filelist) {
int error_code = 1;
@ -315,15 +328,18 @@ int readInput(const std::string &sInputPath,
if (dir_handle) {
error_code = 0;
while ((dir = readdir(dir_handle)) != NULL) {
if (dir->d_type == DT_REG) {
std::string sFileName = sInputPath + dir->d_name;
filelist.push_back(sFileName);
} else if (dir->d_type == DT_DIR) {
if (inputDirExists(sFileName.c_str()))
{
std::string sname = dir->d_name;
if (sname != "." && sname != "..") {
readInput(sInputPath + sname + "/", filelist);
}
}
else
{
filelist.push_back(sFileName);
}
}
closedir(dir_handle);
} else {
@ -360,18 +376,6 @@ int readInput(const std::string &sInputPath,
return 0;
}
int inputDirExists(const char *pathname) {
struct stat info;
if (stat(pathname, &info) != 0) {
return 0; // Directory does not exists
} else if (info.st_mode & S_IFDIR) {
// is a directory
return 1;
} else {
// is not a directory
return 0;
}
}
int getInputDir(std::string &input_dir, const char *executable_path) {
int found = 0;
@ -399,6 +403,7 @@ int getInputDir(std::string &input_dir, const char *executable_path) {
"../../../Samples/<executable_name>/images",
"../../Samples/<executable_name>/images"};
for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
std::string pathname(searchPath[i]);
size_t executable_name_pos = pathname.find("<executable_name>");

View File

@ -0,0 +1,124 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
////////////////////////////////////////////////////////////////////////////////
//
// Utility funcs to wrap up saving a surface or the back buffer as a PPM file
// In addition, wraps up a threshold comparision of two PPMs.
//
// These functions are designed to be used to implement an automated QA testing for SDK samples.
//
// Author: Bryan Dudash
// Email: sdkfeedback@nvidia.com
//
// Copyright (c) NVIDIA Corporation. All rights reserved.
////////////////////////////////////////////////////////////////////////////////
#include <helper_functions.h>
#include <rendercheck_d3d11.h>
HRESULT CheckRenderD3D11::ActiveRenderTargetToPPM(ID3D11Device *pDevice, const char *zFileName)
{
ID3D11DeviceContext *pDeviceCtxt;
pDevice->GetImmediateContext(&pDeviceCtxt);
ID3D11RenderTargetView *pRTV = NULL;
pDeviceCtxt->OMGetRenderTargets(1,&pRTV,NULL);
ID3D11Resource *pSourceResource = NULL;
pRTV->GetResource(&pSourceResource);
return ResourceToPPM(pDevice,pSourceResource,zFileName);
}
HRESULT CheckRenderD3D11::ResourceToPPM(ID3D11Device *pDevice, ID3D11Resource *pResource, const char *zFileName)
{
ID3D11DeviceContext *pDeviceCtxt;
pDevice->GetImmediateContext(&pDeviceCtxt);
D3D11_RESOURCE_DIMENSION rType;
pResource->GetType(&rType);
if (rType != D3D11_RESOURCE_DIMENSION_TEXTURE2D)
{
printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
return E_FAIL;
}
ID3D11Texture2D *pSourceTexture = (ID3D11Texture2D *)pResource;
ID3D11Texture2D *pTargetTexture = NULL;
D3D11_TEXTURE2D_DESC desc;
pSourceTexture->GetDesc(&desc);
desc.BindFlags = 0;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
desc.Usage = D3D11_USAGE_STAGING;
if (FAILED(pDevice->CreateTexture2D(&desc,NULL,&pTargetTexture)))
{
printf("SurfaceToPPM: Unable to create target Texture resoruce! Aborting... \n");
return E_FAIL;
}
pDeviceCtxt->CopyResource(pTargetTexture,pSourceTexture);
D3D11_MAPPED_SUBRESOURCE mappedTex2D;
pDeviceCtxt->Map(pTargetTexture, 0, D3D11_MAP_READ,0,&mappedTex2D);
// Need to convert from dx pitch to pitch=width
unsigned char *pPPMData = new unsigned char[desc.Width*desc.Height*4];
for (unsigned int iHeight = 0; iHeight<desc.Height; iHeight++)
{
memcpy(&(pPPMData[iHeight*desc.Width*4]),(unsigned char *)(mappedTex2D.pData)+iHeight*mappedTex2D.RowPitch,desc.Width*4);
}
pDeviceCtxt->Unmap(pTargetTexture, 0);
// Prepends the PPM header info and bumps byte data afterwards
sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);
delete [] pPPMData;
pTargetTexture->Release();
return S_OK;
}
bool CheckRenderD3D11::PPMvsPPM(const char *src_file, const char *ref_file, const char *exec_path,
const float epsilon, const float threshold)
{
char *ref_file_path = sdkFindFilePath(ref_file, exec_path);
if (ref_file_path == NULL)
{
printf("CheckRenderD3D11::PPMvsPPM unable to find <%s> in <%s> Aborting comparison!\n", ref_file, exec_path);
printf(">>> Check info.xml and [project//data] folder <%s> <<<\n", ref_file);
printf("Aborting comparison!\n");
printf(" FAILURE!\n");
return false;
}
return sdkComparePPM(src_file,ref_file_path,epsilon,threshold,true) == true;
}

View File

@ -0,0 +1,52 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#ifndef _RENDERCHECK_D3D11_H_
#define _RENDERCHECK_D3D11_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <d3d11.h>
class CheckRenderD3D11
{
public:
CheckRenderD3D11() {}
static HRESULT ActiveRenderTargetToPPM(ID3D11Device *pDevice, const char *zFileName);
static HRESULT ResourceToPPM(ID3D11Device *pDevice, ID3D11Resource *pResource, const char *zFileName);
static bool PPMvsPPM(const char *src_file, const char *ref_file, const char *exec_path,
const float epsilon, const float threshold = 0.0f);
};
#endif

View File

@ -1,11 +1,21 @@
# CUDA Samples
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 10.1 Update 2](https://developer.nvidia.com/cuda-downloads).
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads).
## Release Notes
This section describes the release notes for the CUDA Samples on GitHub only.
### CUDA 10.2
* Added `simpleD3D11`. Demonstrates CUDA-D3D11 External Resource Interoperability APIs for updating D3D11 buffers from CUDA and synchronization between D3D11 and CUDA with Keyed Mutexes.
* Added `simpleDrvRuntime`. Demonstrates CUDA Driver and Runtime APIs working together to load fatbinary of a CUDA kernel.
* Added `vectorAddMMAP`. Demonstrates how cuMemMap API allows the user to specify the physical properties of their memory while retaining the contiguous nature of their access.
* Added `memMapIPCDrv`. Demonstrates Inter Process Communication using cuMemMap APIs.
* Added `cudaNvSci`. Demonstrates CUDA-NvSciBuf/NvSciSync Interop.
* Added `jacobiCudaGraphs`. Demonstrates Instantiated CUDA Graph Update with Jacobi Iterative Method using different approaches.
* Added `cuSolverSp_LinearSolver`. Demonstrates cuSolverSP's LU, QR and Cholesky factorization.
* Added `MersenneTwisterGP11213`. Demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.
### CUDA 10.1 Update 2
* Added `vulkanImageCUDA`. Demonstrates how to perform Vulkan image - CUDA Interop.
* Added `nvJPEG_encoder`. Demonstrates encoding of jpeg images using NVJPEG Library.
@ -59,7 +69,7 @@ This is the first release of CUDA Samples on GitHub:
### Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html), and the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html).
### Getting the CUDA Samples
@ -85,7 +95,6 @@ Each individual sample has its own set of solution files at:
`<CUDA_SAMPLES_REPO>\Samples\<sample_dir>\`
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check [DirectX Dependencies](#directx) section for details."
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
@ -143,31 +152,36 @@ The samples makefiles can take advantage of certain options:
#### Linux
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** |
---|---|---|---|
**[simpleIPC](./Samples/simpleIPC)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** |
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
**[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** |
**[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** |
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
**[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
**[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
**[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
**[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
#### Windows
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** |
---|---|---|---|
**[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[nvJPEG](./Samples/nvJPEG)** | **[simpleD3D12](./Samples/simpleD3D12)** |
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** |
**[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |
**[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
**[nvJPEG](./Samples/nvJPEG)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** |
**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
**[simpleD3D11](./Samples/simpleD3D11)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
**[matrixMul](./Samples/matrixMul)** |
#### Mac OSX
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
---|---|---|---|
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
**[reduction](./Samples/reduction)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** |
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
**[bandwidthTest](./Samples/bandwidthTest)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[matrixMul](./Samples/matrixMul)** |
## Dependencies
@ -196,7 +210,7 @@ Some samples can only be run on a 64-bit operating system.
#### DirectX
DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install [Direct X SDK (June 2010 or newer)](http://www.microsoft.com/en-us/download/details.aspx?id=6812) , this is required to be installed on Windows 7, Windows 10 and Windows Server 2008, Other Windows OSes do not need to explicitly install the DirectX SDK.
DirectX is a collection of APIs designed to allow development of multimedia applications on Microsoft platforms. For Microsoft platforms, NVIDIA's CUDA Driver supports DirectX. Several CUDA Samples for Windows demonstrates CUDA-DirectX Interoperability, for building such samples one needs to install Microsoft Visual Studio 2012 or higher which provides Microsoft Windows SDK for Windows 8.
#### DirectX12
@ -238,6 +252,9 @@ EGLOutput is a set of EGL extensions which allow EGL to render directly to the d
EGLSync is a set of EGL extensions which provides sync objects that are synchronization primitive, representing events whose completion can be tested or waited upon.
#### NVSCI
NvSci is a set of communication interface libraries out of which CUDA interops with NvSciBuf and NvSciSync. NvSciBuf allows applications to allocate and exchange buffers in memory. NvSciSync allows applications to manage synchronization objects which coordinate when sequences of operations begin and end.
### CUDA Features
These CUDA features are needed by some CUDA samples. They are provided by either the CUDA Toolkit or CUDA Driver. Some features may not be available on your system.

View File

@ -30,7 +30,7 @@ cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount,
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -0,0 +1,289 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
endif
ifeq ($(TARGET_OS),qnx)
CCFLAGS += -DWIN_INTERFACE_CUSTOM
LDFLAGS += -lsocket
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
# Gencode arguments
SMS ?=
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
ifeq ($(SMS),)
# Generate PTX code from SM 30
GENCODE_FLAGS += -gencode arch=compute_30,code=compute_30
endif
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
LIBRARIES += -lcurand_static -lculibos
################################################################################
# Target rules
all: build
build: MersenneTwisterGP11213
MersenneTwister.o:MersenneTwister.cpp
$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
MersenneTwisterGP11213: MersenneTwister.o
$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
./MersenneTwisterGP11213
clean:
rm -f MersenneTwisterGP11213 MersenneTwister.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/MersenneTwisterGP11213
clobber: clean

View File

@ -0,0 +1,184 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This sample demonstrates the use of CURAND to generate
* random numbers on GPU and CPU.
*/
// Utilities and system includes
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <curand.h>
// Utilities and system includes
#include <helper_functions.h>
#include <helper_cuda.h>
#include <cuda_runtime.h>
#include <curand.h>
float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU);
const int DEFAULT_RAND_N = 2400000;
const unsigned int DEFAULT_SEED = 777;
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
// Start logs
printf("%s Starting...\n\n", argv[0]);
// initialize the GPU, either identified by --device
// or by picking the device with highest flop rate.
int devID = findCudaDevice(argc, (const char **)argv);
// parsing the number of random numbers to generate
int rand_n = DEFAULT_RAND_N;
if (checkCmdLineFlag(argc, (const char **) argv, "count"))
{
rand_n = getCmdLineArgumentInt(argc, (const char **) argv, "count");
}
printf("Allocating data for %i samples...\n", rand_n);
// parsing the seed
int seed = DEFAULT_SEED;
if (checkCmdLineFlag(argc, (const char **) argv, "seed"))
{
seed = getCmdLineArgumentInt(argc, (const char **) argv, "seed");
}
printf("Seeding with %i ...\n", seed);
cudaStream_t stream;
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
float *d_Rand;
checkCudaErrors(cudaMalloc((void **)&d_Rand, rand_n * sizeof(float)));
curandGenerator_t prngGPU;
checkCudaErrors(curandCreateGenerator(&prngGPU, CURAND_RNG_PSEUDO_MTGP32));
checkCudaErrors(curandSetStream(prngGPU, stream));
checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngGPU, seed));
curandGenerator_t prngCPU;
checkCudaErrors(curandCreateGeneratorHost(&prngCPU, CURAND_RNG_PSEUDO_MTGP32));
checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngCPU, seed));
//
// Example 1: Compare random numbers generated on GPU and CPU
float *h_RandGPU = (float *)malloc(rand_n * sizeof(float));
printf("Generating random numbers on GPU...\n\n");
checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
printf("\nReading back the results...\n");
checkCudaErrors(cudaMemcpyAsync(h_RandGPU, d_Rand, rand_n * sizeof(float), cudaMemcpyDeviceToHost, stream));
float *h_RandCPU = (float *)malloc(rand_n * sizeof(float));
printf("Generating random numbers on CPU...\n\n");
checkCudaErrors(curandGenerateUniform(prngCPU, (float *) h_RandCPU, rand_n));
checkCudaErrors(cudaStreamSynchronize(stream));
printf("Comparing CPU/GPU random numbers...\n\n");
float L1norm = compareResults(rand_n, h_RandGPU, h_RandCPU);
//
// Example 2: Timing of random number generation on GPU
const int numIterations = 10;
int i;
StopWatchInterface *hTimer;
sdkCreateTimer(&hTimer);
sdkResetTimer(&hTimer);
sdkStartTimer(&hTimer);
for (i = 0; i < numIterations; i++)
{
checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
}
checkCudaErrors(cudaStreamSynchronize(stream));
sdkStopTimer(&hTimer);
double gpuTime = 1.0e-3 * sdkGetTimerValue(&hTimer)/(double)numIterations;
printf("MersenneTwisterGP11213, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers\n",
1.0e-9 * rand_n / gpuTime, gpuTime, rand_n);
printf("Shutting down...\n");
checkCudaErrors(curandDestroyGenerator(prngGPU));
checkCudaErrors(curandDestroyGenerator(prngCPU));
checkCudaErrors(cudaStreamDestroy(stream));
checkCudaErrors(cudaFree(d_Rand));
sdkDeleteTimer(&hTimer);
free(h_RandGPU);
free(h_RandCPU);
exit(L1norm < 1e-6 ? EXIT_SUCCESS : EXIT_FAILURE);
}
float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU)
{
int i;
float rCPU, rGPU, delta;
float max_delta = 0.;
float sum_delta = 0.;
float sum_ref = 0.;
for (i = 0; i < rand_n; i++)
{
rCPU = h_RandCPU[i];
rGPU = h_RandGPU[i];
delta = fabs(rCPU - rGPU);
sum_delta += delta;
sum_ref += fabs(rCPU);
if (delta >= max_delta)
{
max_delta = delta;
}
}
float L1norm = (float)(sum_delta / sum_ref);
printf("Max absolute error: %E\n", max_delta);
printf("L1 norm: %E\n\n", L1norm);
return L1norm;
}

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,107 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>MersenneTwisterGP11213_vs2012</RootNamespace>
<ProjectName>MersenneTwisterGP11213</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="MersenneTwister.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 13.00
# Visual Studio 2013
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,107 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>MersenneTwisterGP11213_vs2013</RootNamespace>
<ProjectName>MersenneTwisterGP11213</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v120</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="MersenneTwister.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 14.00
# Visual Studio 2015
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,107 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>MersenneTwisterGP11213_vs2015</RootNamespace>
<ProjectName>MersenneTwisterGP11213</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="MersenneTwister.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2017
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,112 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>MersenneTwisterGP11213_vs2017</RootNamespace>
<ProjectName>MersenneTwisterGP11213</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="MersenneTwister.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2019
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MersenneTwisterGP11213", "MersenneTwisterGP11213_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,108 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>MersenneTwisterGP11213_vs2019</RootNamespace>
<ProjectName>MersenneTwisterGP11213</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,compute_30;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="MersenneTwister.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>MersenneTwisterGP11213</name>
<description><![CDATA[This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.]]></description>
<devicecompilation>whole</devicecompilation>
<fallback_min_ptx>true</fallback_min_ptx>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../common/inc</path>
</includepaths>
<keyconcepts>
<concept level="basic">CURAND Library</concept>
</keyconcepts>
<keywords>
<keyword>CUDA</keyword>
<keyword>CURAND</keyword>
<keyword>Monte-Carlo</keyword>
<keyword>random number generation</keyword>
<keyword>GSFR</keyword>
<keyword>Mersenne Twister</keyword>
</keywords>
<libraries>
<library>curand_static</library>
<library>culibos</library>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>MersenneTwister.cpp</primary_file>
<required_dependencies>
<dependency>CURAND</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Advanced Topics</scope>
</scopes>
<sm-arch>sm30</sm-arch>
<sm-arch>sm35</sm-arch>
<sm-arch>sm37</sm-arch>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<supported_envs>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
<env>
<platform>windows7</platform>
</env>
<env>
<arch>x86_64</arch>
<platform>macosx</platform>
</env>
<env>
<arch>arm</arch>
</env>
<env>
<arch>ppc64le</arch>
<platform>linux</platform>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>MersenneTwisterGP11213</title>
<type>exe</type>
</entry>

View File

@ -0,0 +1,95 @@
# MersenneTwisterGP11213 - MersenneTwisterGP11213
## Description
This sample demonstrates the Mersenne Twister random number generator GP11213 in cuRAND.
## Key Concepts
CURAND Library
## Supported SM Architectures
[SM 3.0 ](https://developer.nvidia.com/cuda-gpus) [SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux, Windows, MacOSX
## Supported CPU Architecture
x86_64, ppc64le, armv7l
## CUDA APIs involved
## Dependencies needed to build/run
[CURAND](../../README.md#curand)
## Prerequisites
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
### Windows
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
```
*_vs<version>.sln - for Visual Studio <version>
```
Each individual sample has its own set of solution files in its directory:
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
### Mac
The Mac samples are built using makefiles. To use the makefiles, change directory into the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where "A B ..." is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use SMS="50 60".
```
$ make SMS="A B ..."
```
* **HOST_COMPILER=<host_compiler>** - override the default clang host compiler. See the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=clang
```
## References (for more details)

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -108,6 +108,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -27,7 +27,7 @@ cudaMemcpy2D, cudaMallocManaged
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run

View File

@ -30,7 +30,7 @@ cudaMallocManaged, cudaStreamAttachMemAsync, cudaMemcpyAsync, cudaMallocHost, cu
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -105,6 +105,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -105,6 +105,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -105,6 +105,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -110,6 +110,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -106,6 +106,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -27,7 +27,7 @@ cudaSetDevice, cudaHostAlloc, cudaFree, cudaMallocHost, cudaFreeHost, cudaMemcpy
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -117,6 +117,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -113,6 +113,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -112,6 +112,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -117,6 +117,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -113,6 +113,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -30,7 +30,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -102,6 +102,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -107,6 +107,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -27,7 +27,7 @@ x86_64, ppc64le
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -108,6 +108,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -104,6 +104,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -30,7 +30,7 @@ cudaMemAdvise, cudaMemPrefetchAsync, cudaLaunchCooperativeKernelMultiDevice, cud
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -103,6 +103,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -108,6 +108,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -104,6 +104,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>cuSolverDn_LinearSolver</name>
<description><![CDATA[A CUDA Sample that demonstrates cuSolverDN's LU, QR and Cholesky factorization.]]></description>
<devicecompilation>whole</devicecompilation>
<files>
<file>gr_900_900_crg.mtx</file>
<file>lap3D_7pt_n20.mtx</file>
</files>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../common/inc</path>
</includepaths>
<keyconcepts>
<concept level="basic">Linear Algebra</concept>
<concept level="basic">CUSOLVER Library</concept>
</keyconcepts>
<keywords>
<keyword>CUSOLVER</keyword>
<keyword>Linear Algebra</keyword>
</keywords>
<libraries>
<library>cusolver</library>
<library>cublas</library>
<library>cusparse</library>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>cuSolverDn_LinearSolver.cpp</primary_file>
<qatests>
<qatest>-R=qr</qatest>
<qatest>-R=chol</qatest>
<qatest>-R=lu</qatest>
</qatests>
<required_dependencies>
<dependency>CUSOLVER</dependency>
<dependency>CUBLAS</dependency>
<dependency>CUSPARSE</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Basic Topics</scope>
<scope>3:Linear Algebra</scope>
</scopes>
<sm-arch>sm30</sm-arch>
<sm-arch>sm35</sm-arch>
<sm-arch>sm37</sm-arch>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<supported_envs>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
<env>
<platform>windows7</platform>
</env>
<env>
<arch>x86_64</arch>
<platform>macosx</platform>
</env>
<env>
<arch>ppc64le</arch>
<platform>linux</platform>
</env>
<env>
<arch>aarch64</arch>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>cuSolverDn Linear Solver </title>
<type>exe</type>
</entry>

View File

@ -27,7 +27,7 @@ x86_64, ppc64le, aarch64
## Prerequisites
Download and install the [CUDA Toolkit 10.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -104,6 +104,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -104,6 +104,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -33,7 +33,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -104,6 +104,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -38,7 +38,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -109,6 +109,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -34,7 +34,7 @@
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.props" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
@ -105,6 +105,6 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.1.targets" />
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,316 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
endif
ifeq ($(TARGET_OS),qnx)
CCFLAGS += -DWIN_INTERFACE_CUSTOM
LDFLAGS += -lsocket
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
ifeq ($(TARGET_OS),linux)
ALL_CCFLAGS += -Xcompiler \"-Wl,--no-as-needed\"
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
SMS ?= 30 35 37 50 52 60 61 70 72 75
else
SMS ?= 30 35 37 50 52 60 61 70 75
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
LIBRARIES += -lcusolver -lcusparse
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: cuSolverSp_LinearSolver
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
cuSolverSp_LinearSolver.o:cuSolverSp_LinearSolver.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
mmio.c.o:mmio.c
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
mmio_wrapper.o:mmio_wrapper.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
cuSolverSp_LinearSolver: cuSolverSp_LinearSolver.o mmio.c.o mmio_wrapper.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./cuSolverSp_LinearSolver
clean:
rm -f cuSolverSp_LinearSolver cuSolverSp_LinearSolver.o mmio.c.o mmio_wrapper.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cuSolverSp_LinearSolver
clobber: clean

View File

@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>cuSolverSp_LinearSolver</name>
<description><![CDATA[A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.]]></description>
<devicecompilation>whole</devicecompilation>
<files>
<file>lap2D_5pt_n100.mtx</file>
<file>lap3D_7pt_n20.mtx</file>
</files>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../common/inc</path>
</includepaths>
<keyconcepts>
<concept level="basic">Linear Algebra</concept>
<concept level="basic">CUSOLVER Library</concept>
</keyconcepts>
<keywords>
<keyword>CUSOLVER</keyword>
<keyword>Linear Algebra</keyword>
</keywords>
<libraries>
<library>cusolver</library>
<library>cusparse</library>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>cuSolverSp_LinearSolver.cpp</primary_file>
<qatests>
<qatest>-R=qr</qatest>
<qatest>-R=chol</qatest>
<qatest>-R=lu</qatest>
<qatest>-R=qr -P=symamd</qatest>
<qatest>-R=chol -P=symamd</qatest>
<qatest>-R=lu -P=symamd</qatest>
</qatests>
<required_dependencies>
<dependency>CUSOLVER</dependency>
<dependency>CUSPARSE</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Basic Topics</scope>
<scope>3:Linear Algebra</scope>
</scopes>
<sm-arch>sm30</sm-arch>
<sm-arch>sm35</sm-arch>
<sm-arch>sm37</sm-arch>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<supported_envs>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
<env>
<platform>windows7</platform>
</env>
<env>
<arch>x86_64</arch>
<platform>macosx</platform>
</env>
<env>
<arch>ppc64le</arch>
<platform>linux</platform>
</env>
<env>
<arch>arm</arch>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>cuSolverSp Linear Solver </title>
<type>exe</type>
</entry>

View File

@ -0,0 +1,95 @@
# cuSolverSp_LinearSolver - cuSolverSp Linear Solver
## Description
A CUDA Sample that demonstrates cuSolverSP's LU, QR and Cholesky factorization.
## Key Concepts
Linear Algebra, CUSOLVER Library
## Supported SM Architectures
[SM 3.0 ](https://developer.nvidia.com/cuda-gpus) [SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux, Windows, MacOSX
## Supported CPU Architecture
x86_64, ppc64le, armv7l
## CUDA APIs involved
## Dependencies needed to build/run
[CUSOLVER](../../README.md#cusolver), [CUSPARSE](../../README.md#cusparse)
## Prerequisites
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
### Windows
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
```
*_vs<version>.sln - for Visual Studio <version>
```
Each individual sample has its own set of solution files in its directory:
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
### Mac
The Mac samples are built using makefiles. To use the makefiles, change directory into the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where "A B ..." is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use SMS="50 60".
```
$ make SMS="A B ..."
```
* **HOST_COMPILER=<host_compiler>** - override the default clang host compiler. See the [Mac Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=clang
```
## References (for more details)

View File

@ -0,0 +1,654 @@
/* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Test three linear solvers, including Cholesky, LU and QR.
* The user has to prepare a sparse matrix of "matrix market format" (with extension .mtx).
* For example, the user can download matrices in Florida Sparse Matrix Collection.
* (http://www.cise.ufl.edu/research/sparse/matrices/)
*
* The user needs to choose a solver by the switch -R<solver> and
* to provide the path of the matrix by the switch -F<file>, then
* the program solves
* A*x = b
* and reports relative error
* |b-A*x|/(|A|*|x|+|b|)
*
* How does it work?
* The example solves A*x = b by the following steps
* step 1: B = A(Q,Q)
* Q is the ordering to minimize zero fill-in.
* The user can choose symrcm or symamd.
* step 2: solve B*z = Q*b
* step 3: x = inv(Q)*z
*
* Above three steps can be combined by the formula
* (Q*A*Q')*(Q*x) = (Q*b)
*
* The elapsed time is also reported so the user can compare efficiency of different solvers.
*
* How to use
/cuSolverSp_LinearSolver // Default: Cholesky, symrcm & file=lap2D_5pt_n100.mtx
* ./cuSolverSp_LinearSolver -R=chol -file=<file> // cholesky factorization
* ./cuSolverSp_LinearSolver -R=lu -P=symrcm -file=<file> // symrcm + LU with partial pivoting
* ./cuSolverSp_LinearSolver -R=qr -P=symamd -file=<file> // symamd + QR factorization
*
*
* Remark: the absolute error on solution x is meaningless without knowing condition number of A.
* The relative error on residual should be close to machine zero, i.e. 1.e-15.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <cuda_runtime.h>
#include "cusparse.h"
#include "cusolverSp.h"
#include "helper_cuda.h"
#include "helper_cusolver.h"
template <typename T_ELEM>
int loadMMSparseMatrix(char *filename, char elem_type, bool csrFormat, int *m,
int *n, int *nnz, T_ELEM **aVal, int **aRowInd,
int **aColInd, int extendSymMatrix);
void UsageSP(void) {
printf("<options>\n");
printf("-h : display this help\n");
printf("-R=<name> : choose a linear solver\n");
printf(" chol (cholesky factorization), this is default\n");
printf(" qr (QR factorization)\n");
printf(" lu (LU factorization)\n");
printf("-P=<name> : choose a reordering\n");
printf(" symrcm (Reverse Cuthill-McKee)\n");
printf(" symamd (Approximate Minimum Degree)\n");
printf(" metis (nested dissection)\n");
printf("-file=<filename> : filename containing a matrix in MM format\n");
printf("-device=<device_id> : <device_id> if want to run on specific GPU\n");
exit(0);
}
void parseCommandLineArguments(int argc, char *argv[], struct testOpts &opts) {
memset(&opts, 0, sizeof(opts));
if (checkCmdLineFlag(argc, (const char **)argv, "-h")) {
UsageSP();
}
if (checkCmdLineFlag(argc, (const char **)argv, "R")) {
char *solverType = NULL;
getCmdLineArgumentString(argc, (const char **)argv, "R", &solverType);
if (solverType) {
if ((STRCASECMP(solverType, "chol") != 0) &&
(STRCASECMP(solverType, "lu") != 0) &&
(STRCASECMP(solverType, "qr") != 0)) {
printf("\nIncorrect argument passed to -R option\n");
UsageSP();
} else {
opts.testFunc = solverType;
}
}
}
if (checkCmdLineFlag(argc, (const char **)argv, "P")) {
char *reorderType = NULL;
getCmdLineArgumentString(argc, (const char **)argv, "P", &reorderType);
if (reorderType) {
if ((STRCASECMP(reorderType, "symrcm") != 0) &&
(STRCASECMP(reorderType, "symamd") != 0) &&
(STRCASECMP(reorderType, "metis") != 0)) {
printf("\nIncorrect argument passed to -P option\n");
UsageSP();
} else {
opts.reorder = reorderType;
}
}
}
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
char *fileName = 0;
getCmdLineArgumentString(argc, (const char **)argv, "file", &fileName);
if (fileName) {
opts.sparse_mat_filename = fileName;
} else {
printf("\nIncorrect filename passed to -file \n ");
UsageSP();
}
}
}
int main(int argc, char *argv[]) {
struct testOpts opts;
cusolverSpHandle_t handle = NULL;
cusparseHandle_t cusparseHandle = NULL; /* used in residual evaluation */
cudaStream_t stream = NULL;
cusparseMatDescr_t descrA = NULL;
int rowsA = 0; /* number of rows of A */
int colsA = 0; /* number of columns of A */
int nnzA = 0; /* number of nonzeros of A */
int baseA = 0; /* base index in CSR format */
/* CSR(A) from I/O */
int *h_csrRowPtrA = NULL;
int *h_csrColIndA = NULL;
double *h_csrValA = NULL;
double *h_z = NULL; /* z = B \ (Q*b) */
double *h_x = NULL; /* x = A \ b */
double *h_b = NULL; /* b = ones(n,1) */
double *h_Qb = NULL; /* Q*b */
double *h_r = NULL; /* r = b - A*x */
int *h_Q = NULL; /* <int> n */
/* reorder to reduce zero fill-in */
/* Q = symrcm(A) or Q = symamd(A) */
/* B = Q*A*Q' or B = A(Q,Q) by MATLAB notation */
int *h_csrRowPtrB = NULL; /* <int> n+1 */
int *h_csrColIndB = NULL; /* <int> nnzA */
double *h_csrValB = NULL; /* <double> nnzA */
int *h_mapBfromA = NULL; /* <int> nnzA */
size_t size_perm = 0;
void *buffer_cpu = NULL; /* working space for permutation: B = Q*A*Q^T */
/* device copy of A: used in residual evaluation */
int *d_csrRowPtrA = NULL;
int *d_csrColIndA = NULL;
double *d_csrValA = NULL;
/* device copy of B: used in B*z = Q*b */
int *d_csrRowPtrB = NULL;
int *d_csrColIndB = NULL;
double *d_csrValB = NULL;
int *d_Q = NULL; /* device copy of h_Q */
double *d_z = NULL; /* z = B \ Q*b */
double *d_x = NULL; /* x = A \ b */
double *d_b = NULL; /* a copy of h_b */
double *d_Qb = NULL; /* a copy of h_Qb */
double *d_r = NULL; /* r = b - A*x */
double tol = 1.e-12;
const int reorder = 0; /* no reordering */
int singularity = 0; /* -1 if A is invertible under tol. */
/* the constants are used in residual evaluation, r = b - A*x */
const double minus_one = -1.0;
const double one = 1.0;
double b_inf = 0.0;
double x_inf = 0.0;
double r_inf = 0.0;
double A_inf = 0.0;
int errors = 0;
int issym = 0;
double start, stop;
double time_solve_cpu;
double time_solve_gpu;
parseCommandLineArguments(argc, argv, opts);
if (NULL == opts.testFunc) {
opts.testFunc =
"chol"; /* By default running Cholesky as NO solver selected with -R
option. */
}
findCudaDevice(argc, (const char **)argv);
if (opts.sparse_mat_filename == NULL) {
opts.sparse_mat_filename = sdkFindFilePath("lap2D_5pt_n100.mtx", argv[0]);
if (opts.sparse_mat_filename != NULL)
printf("Using default input file [%s]\n", opts.sparse_mat_filename);
else
printf("Could not find lap2D_5pt_n100.mtx\n");
} else {
printf("Using input file [%s]\n", opts.sparse_mat_filename);
}
printf("step 1: read matrix market format\n");
if (opts.sparse_mat_filename == NULL) {
fprintf(stderr, "Error: input matrix is not provided\n");
return EXIT_FAILURE;
}
if (loadMMSparseMatrix<double>(opts.sparse_mat_filename, 'd', true, &rowsA,
&colsA, &nnzA, &h_csrValA, &h_csrRowPtrA,
&h_csrColIndA, true)) {
exit(EXIT_FAILURE);
}
baseA = h_csrRowPtrA[0]; // baseA = {0,1}
printf("sparse matrix A is %d x %d with %d nonzeros, base=%d\n", rowsA, colsA,
nnzA, baseA);
if (rowsA != colsA) {
fprintf(stderr, "Error: only support square matrix\n");
return 1;
}
checkCudaErrors(cusolverSpCreate(&handle));
checkCudaErrors(cusparseCreate(&cusparseHandle));
checkCudaErrors(cudaStreamCreate(&stream));
/* bind stream to cusparse and cusolver*/
checkCudaErrors(cusolverSpSetStream(handle, stream));
checkCudaErrors(cusparseSetStream(cusparseHandle, stream));
/* configure matrix descriptor*/
checkCudaErrors(cusparseCreateMatDescr(&descrA));
checkCudaErrors(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
if (baseA) {
checkCudaErrors(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE));
} else {
checkCudaErrors(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
}
h_z = (double *)malloc(sizeof(double) * colsA);
h_x = (double *)malloc(sizeof(double) * colsA);
h_b = (double *)malloc(sizeof(double) * rowsA);
h_Qb = (double *)malloc(sizeof(double) * rowsA);
h_r = (double *)malloc(sizeof(double) * rowsA);
h_Q = (int *)malloc(sizeof(int) * colsA);
h_csrRowPtrB = (int *)malloc(sizeof(int) * (rowsA + 1));
h_csrColIndB = (int *)malloc(sizeof(int) * nnzA);
h_csrValB = (double *)malloc(sizeof(double) * nnzA);
h_mapBfromA = (int *)malloc(sizeof(int) * nnzA);
assert(NULL != h_z);
assert(NULL != h_x);
assert(NULL != h_b);
assert(NULL != h_Qb);
assert(NULL != h_r);
assert(NULL != h_Q);
assert(NULL != h_csrRowPtrB);
assert(NULL != h_csrColIndB);
assert(NULL != h_csrValB);
assert(NULL != h_mapBfromA);
checkCudaErrors(
cudaMalloc((void **)&d_csrRowPtrA, sizeof(int) * (rowsA + 1)));
checkCudaErrors(cudaMalloc((void **)&d_csrColIndA, sizeof(int) * nnzA));
checkCudaErrors(cudaMalloc((void **)&d_csrValA, sizeof(double) * nnzA));
checkCudaErrors(
cudaMalloc((void **)&d_csrRowPtrB, sizeof(int) * (rowsA + 1)));
checkCudaErrors(cudaMalloc((void **)&d_csrColIndB, sizeof(int) * nnzA));
checkCudaErrors(cudaMalloc((void **)&d_csrValB, sizeof(double) * nnzA));
checkCudaErrors(cudaMalloc((void **)&d_Q, sizeof(int) * colsA));
checkCudaErrors(cudaMalloc((void **)&d_z, sizeof(double) * colsA));
checkCudaErrors(cudaMalloc((void **)&d_x, sizeof(double) * colsA));
checkCudaErrors(cudaMalloc((void **)&d_b, sizeof(double) * rowsA));
checkCudaErrors(cudaMalloc((void **)&d_Qb, sizeof(double) * rowsA));
checkCudaErrors(cudaMalloc((void **)&d_r, sizeof(double) * rowsA));
/* verify if A has symmetric pattern or not */
checkCudaErrors(cusolverSpXcsrissymHost(handle, rowsA, nnzA, descrA,
h_csrRowPtrA, h_csrRowPtrA + 1,
h_csrColIndA, &issym));
if (0 == strcmp(opts.testFunc, "chol")) {
if (!issym) {
printf("Error: A has no symmetric pattern, please use LU or QR \n");
exit(EXIT_FAILURE);
}
}
printf("step 2: reorder the matrix A to minimize zero fill-in\n");
printf(
" if the user choose a reordering by -P=symrcm, -P=symamd or "
"-P=metis\n");
if (NULL != opts.reorder) {
if (0 == strcmp(opts.reorder, "symrcm")) {
printf("step 2.1: Q = symrcm(A) \n");
checkCudaErrors(cusolverSpXcsrsymrcmHost(
handle, rowsA, nnzA, descrA, h_csrRowPtrA, h_csrColIndA, h_Q));
} else if (0 == strcmp(opts.reorder, "symamd")) {
printf("step 2.1: Q = symamd(A) \n");
checkCudaErrors(cusolverSpXcsrsymamdHost(
handle, rowsA, nnzA, descrA, h_csrRowPtrA, h_csrColIndA, h_Q));
} else if (0 == strcmp(opts.reorder, "metis")) {
printf("step 2.1: Q = metis(A) \n");
checkCudaErrors(cusolverSpXcsrmetisndHost(handle, rowsA, nnzA, descrA,
h_csrRowPtrA, h_csrColIndA,
NULL, /* default setting. */
h_Q));
} else {
fprintf(stderr, "Error: %s is unknown reordering\n", opts.reorder);
return 1;
}
} else {
printf("step 2.1: no reordering is chosen, Q = 0:n-1 \n");
for (int j = 0; j < rowsA; j++) {
h_Q[j] = j;
}
}
printf("step 2.2: B = A(Q,Q) \n");
memcpy(h_csrRowPtrB, h_csrRowPtrA, sizeof(int) * (rowsA + 1));
memcpy(h_csrColIndB, h_csrColIndA, sizeof(int) * nnzA);
checkCudaErrors(cusolverSpXcsrperm_bufferSizeHost(
handle, rowsA, colsA, nnzA, descrA, h_csrRowPtrB, h_csrColIndB, h_Q, h_Q,
&size_perm));
if (buffer_cpu) {
free(buffer_cpu);
}
buffer_cpu = (void *)malloc(sizeof(char) * size_perm);
assert(NULL != buffer_cpu);
/* h_mapBfromA = Identity */
for (int j = 0; j < nnzA; j++) {
h_mapBfromA[j] = j;
}
checkCudaErrors(cusolverSpXcsrpermHost(handle, rowsA, colsA, nnzA, descrA,
h_csrRowPtrB, h_csrColIndB, h_Q, h_Q,
h_mapBfromA, buffer_cpu));
/* B = A( mapBfromA ) */
for (int j = 0; j < nnzA; j++) {
h_csrValB[j] = h_csrValA[h_mapBfromA[j]];
}
printf("step 3: b(j) = 1 + j/n \n");
for (int row = 0; row < rowsA; row++) {
h_b[row] = 1.0 + ((double)row) / ((double)rowsA);
}
/* h_Qb = b(Q) */
for (int row = 0; row < rowsA; row++) {
h_Qb[row] = h_b[h_Q[row]];
}
printf("step 4: prepare data on device\n");
checkCudaErrors(cudaMemcpyAsync(d_csrRowPtrA, h_csrRowPtrA,
sizeof(int) * (rowsA + 1),
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_csrColIndA, h_csrColIndA,
sizeof(int) * nnzA, cudaMemcpyHostToDevice,
stream));
checkCudaErrors(cudaMemcpyAsync(d_csrValA, h_csrValA, sizeof(double) * nnzA,
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_csrRowPtrB, h_csrRowPtrB,
sizeof(int) * (rowsA + 1),
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_csrColIndB, h_csrColIndB,
sizeof(int) * nnzA, cudaMemcpyHostToDevice,
stream));
checkCudaErrors(cudaMemcpyAsync(d_csrValB, h_csrValB, sizeof(double) * nnzA,
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_b, h_b, sizeof(double) * rowsA,
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_Qb, h_Qb, sizeof(double) * rowsA,
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_Q, h_Q, sizeof(int) * rowsA,
cudaMemcpyHostToDevice, stream));
printf("step 5: solve A*x = b on CPU \n");
start = second();
/* solve B*z = Q*b */
if (0 == strcmp(opts.testFunc, "chol")) {
checkCudaErrors(cusolverSpDcsrlsvcholHost(
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
h_Qb, tol, reorder, h_z, &singularity));
} else if (0 == strcmp(opts.testFunc, "lu")) {
checkCudaErrors(cusolverSpDcsrlsvluHost(
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
h_Qb, tol, reorder, h_z, &singularity));
} else if (0 == strcmp(opts.testFunc, "qr")) {
checkCudaErrors(cusolverSpDcsrlsvqrHost(
handle, rowsA, nnzA, descrA, h_csrValB, h_csrRowPtrB, h_csrColIndB,
h_Qb, tol, reorder, h_z, &singularity));
} else {
fprintf(stderr, "Error: %s is unknown function\n", opts.testFunc);
return 1;
}
/* Q*x = z */
for (int row = 0; row < rowsA; row++) {
h_x[h_Q[row]] = h_z[row];
}
if (0 <= singularity) {
printf("WARNING: the matrix is singular at row %d under tol (%E)\n",
singularity, tol);
}
stop = second();
time_solve_cpu = stop - start;
printf("step 6: evaluate residual r = b - A*x (result on CPU)\n");
checkCudaErrors(cudaMemcpyAsync(d_r, d_b, sizeof(double) * rowsA,
cudaMemcpyDeviceToDevice, stream));
checkCudaErrors(cudaMemcpyAsync(d_x, h_x, sizeof(double) * colsA,
cudaMemcpyHostToDevice, stream));
checkCudaErrors(cusparseDcsrmv(cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE, rowsA, colsA,
nnzA, &minus_one, descrA, d_csrValA,
d_csrRowPtrA, d_csrColIndA, d_x, &one, d_r));
checkCudaErrors(cudaMemcpyAsync(h_r, d_r, sizeof(double) * rowsA,
cudaMemcpyDeviceToHost, stream));
/* wait until h_r is ready */
checkCudaErrors(cudaDeviceSynchronize());
b_inf = vec_norminf(rowsA, h_b);
x_inf = vec_norminf(colsA, h_x);
r_inf = vec_norminf(rowsA, h_r);
A_inf = csr_mat_norminf(rowsA, colsA, nnzA, descrA, h_csrValA, h_csrRowPtrA,
h_csrColIndA);
printf("(CPU) |b - A*x| = %E \n", r_inf);
printf("(CPU) |A| = %E \n", A_inf);
printf("(CPU) |x| = %E \n", x_inf);
printf("(CPU) |b| = %E \n", b_inf);
printf("(CPU) |b - A*x|/(|A|*|x| + |b|) = %E \n",
r_inf / (A_inf * x_inf + b_inf));
printf("step 7: solve A*x = b on GPU\n");
start = second();
/* solve B*z = Q*b */
if (0 == strcmp(opts.testFunc, "chol")) {
checkCudaErrors(cusolverSpDcsrlsvchol(
handle, rowsA, nnzA, descrA, d_csrValB, d_csrRowPtrB, d_csrColIndB,
d_Qb, tol, reorder, d_z, &singularity));
} else if (0 == strcmp(opts.testFunc, "lu")) {
printf("WARNING: no LU available on GPU \n");
} else if (0 == strcmp(opts.testFunc, "qr")) {
checkCudaErrors(cusolverSpDcsrlsvqr(handle, rowsA, nnzA, descrA, d_csrValB,
d_csrRowPtrB, d_csrColIndB, d_Qb, tol,
reorder, d_z, &singularity));
} else {
fprintf(stderr, "Error: %s is unknow function\n", opts.testFunc);
return 1;
}
checkCudaErrors(cudaDeviceSynchronize());
if (0 <= singularity) {
printf("WARNING: the matrix is singular at row %d under tol (%E)\n",
singularity, tol);
}
/* Q*x = z */
checkCudaErrors(cusparseDsctr(cusparseHandle, rowsA, d_z, d_Q, d_x,
CUSPARSE_INDEX_BASE_ZERO));
checkCudaErrors(cudaDeviceSynchronize());
stop = second();
time_solve_gpu = stop - start;
printf("step 8: evaluate residual r = b - A*x (result on GPU)\n");
checkCudaErrors(cudaMemcpyAsync(d_r, d_b, sizeof(double) * rowsA,
cudaMemcpyDeviceToDevice, stream));
checkCudaErrors(cusparseDcsrmv(cusparseHandle,
CUSPARSE_OPERATION_NON_TRANSPOSE, rowsA, colsA,
nnzA, &minus_one, descrA, d_csrValA,
d_csrRowPtrA, d_csrColIndA, d_x, &one, d_r));
checkCudaErrors(cudaMemcpyAsync(h_x, d_x, sizeof(double) * colsA,
cudaMemcpyDeviceToHost, stream));
checkCudaErrors(cudaMemcpyAsync(h_r, d_r, sizeof(double) * rowsA,
cudaMemcpyDeviceToHost, stream));
/* wait until h_x and h_r are ready */
checkCudaErrors(cudaDeviceSynchronize());
b_inf = vec_norminf(rowsA, h_b);
x_inf = vec_norminf(colsA, h_x);
r_inf = vec_norminf(rowsA, h_r);
if (0 != strcmp(opts.testFunc, "lu")) {
// only cholesky and qr have GPU version
printf("(GPU) |b - A*x| = %E \n", r_inf);
printf("(GPU) |A| = %E \n", A_inf);
printf("(GPU) |x| = %E \n", x_inf);
printf("(GPU) |b| = %E \n", b_inf);
printf("(GPU) |b - A*x|/(|A|*|x| + |b|) = %E \n",
r_inf / (A_inf * x_inf + b_inf));
}
fprintf(stdout, "timing %s: CPU = %10.6f sec , GPU = %10.6f sec\n",
opts.testFunc, time_solve_cpu, time_solve_gpu);
if (0 != strcmp(opts.testFunc, "lu")) {
printf("show last 10 elements of solution vector (GPU) \n");
printf("consistent result for different reordering and solver \n");
for (int j = rowsA - 10; j < rowsA; j++) {
printf("x[%d] = %E\n", j, h_x[j]);
}
}
if (handle) {
checkCudaErrors(cusolverSpDestroy(handle));
}
if (cusparseHandle) {
checkCudaErrors(cusparseDestroy(cusparseHandle));
}
if (stream) {
checkCudaErrors(cudaStreamDestroy(stream));
}
if (descrA) {
checkCudaErrors(cusparseDestroyMatDescr(descrA));
}
if (h_csrValA) {
free(h_csrValA);
}
if (h_csrRowPtrA) {
free(h_csrRowPtrA);
}
if (h_csrColIndA) {
free(h_csrColIndA);
}
if (h_z) {
free(h_z);
}
if (h_x) {
free(h_x);
}
if (h_b) {
free(h_b);
}
if (h_Qb) {
free(h_Qb);
}
if (h_r) {
free(h_r);
}
if (h_Q) {
free(h_Q);
}
if (h_csrRowPtrB) {
free(h_csrRowPtrB);
}
if (h_csrColIndB) {
free(h_csrColIndB);
}
if (h_csrValB) {
free(h_csrValB);
}
if (h_mapBfromA) {
free(h_mapBfromA);
}
if (buffer_cpu) {
free(buffer_cpu);
}
if (d_csrValA) {
checkCudaErrors(cudaFree(d_csrValA));
}
if (d_csrRowPtrA) {
checkCudaErrors(cudaFree(d_csrRowPtrA));
}
if (d_csrColIndA) {
checkCudaErrors(cudaFree(d_csrColIndA));
}
if (d_csrValB) {
checkCudaErrors(cudaFree(d_csrValB));
}
if (d_csrRowPtrB) {
checkCudaErrors(cudaFree(d_csrRowPtrB));
}
if (d_csrColIndB) {
checkCudaErrors(cudaFree(d_csrColIndB));
}
if (d_Q) {
checkCudaErrors(cudaFree(d_Q));
}
if (d_z) {
checkCudaErrors(cudaFree(d_z));
}
if (d_x) {
checkCudaErrors(cudaFree(d_x));
}
if (d_b) {
checkCudaErrors(cudaFree(d_b));
}
if (d_Qb) {
checkCudaErrors(cudaFree(d_Qb));
}
if (d_r) {
checkCudaErrors(cudaFree(d_r));
}
return 0;
}

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>cuSolverSp_LinearSolver_vs2012</RootNamespace>
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v110</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
<ClCompile Include="mmio.c" />
<ClCompile Include="mmio_wrapper.cpp" />
<ClInclude Include="mmio.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 13.00
# Visual Studio 2013
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>cuSolverSp_LinearSolver_vs2013</RootNamespace>
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v120</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
<ClCompile Include="mmio.c" />
<ClCompile Include="mmio_wrapper.cpp" />
<ClInclude Include="mmio.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 14.00
# Visual Studio 2015
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>cuSolverSp_LinearSolver_vs2015</RootNamespace>
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v140</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
<ClCompile Include="mmio.c" />
<ClCompile Include="mmio_wrapper.cpp" />
<ClInclude Include="mmio.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2017
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,114 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>cuSolverSp_LinearSolver_vs2017</RootNamespace>
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v141</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
<ClCompile Include="mmio.c" />
<ClCompile Include="mmio_wrapper.cpp" />
<ClInclude Include="mmio.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

View File

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2019
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cuSolverSp_LinearSolver", "cuSolverSp_LinearSolver_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,110 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
</PropertyGroup>
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
<RootNamespace>cuSolverSp_LinearSolver_vs2019</RootNamespace>
<ProjectName>cuSolverSp_LinearSolver</ProjectName>
<CudaToolkitCustomDir />
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup>
<ConfigurationType>Application</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)'=='Release'">
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets">
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<IntDir>$(Platform)/$(Configuration)/</IntDir>
<IncludePath>$(IncludePath)</IncludePath>
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
<CodeAnalysisRules />
<CodeAnalysisRuleAssemblies />
</PropertyGroup>
<PropertyGroup Condition="'$(Platform)'=='x64'">
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>cusolver.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
<OutputFile>$(OutDir)/cuSolverSp_LinearSolver.exe</OutputFile>
</Link>
<CudaCompile>
<CodeGeneration>compute_30,sm_30;compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;</CodeGeneration>
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
<Include>./;../../Common</Include>
<Defines>WIN32</Defines>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<Optimization>Disabled</Optimization>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MTd</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>
<GenerateDebugInformation>false</GenerateDebugInformation>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
</Link>
<CudaCompile>
<Runtime>MT</Runtime>
<TargetMachinePlatform>64</TargetMachinePlatform>
</CudaCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="cuSolverSp_LinearSolver.cpp" />
<ClCompile Include="mmio.c" />
<ClCompile Include="mmio_wrapper.cpp" />
<ClInclude Include="mmio.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
<Import Project="$(CUDAPropsPath)\CUDA 10.2.targets" />
</ImportGroup>
</Project>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,517 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */
#if defined(_WIN32)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "mmio.h"
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_)
{
FILE *f;
MM_typecode matcode;
int M, N, nz;
int i;
double *val;
int *I, *J;
if ((f = fopen(fname, "r")) == NULL)
return -1;
if (mm_read_banner(f, &matcode) != 0)
{
printf("mm_read_unsymetric: Could not process Matrix Market banner ");
printf(" in file [%s]\n", fname);
return -1;
}
if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
mm_is_sparse(matcode)))
{
fprintf(stderr, "Sorry, this application does not support ");
fprintf(stderr, "Market Market type: [%s]\n",
mm_typecode_to_str(matcode));
return -1;
}
/* find out size of sparse matrix: M, N, nz .... */
if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
{
fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
return -1;
}
*M_ = M;
*N_ = N;
*nz_ = nz;
/* reserve memory for matrices */
I = (int *) malloc(nz * sizeof(int));
J = (int *) malloc(nz * sizeof(int));
val = (double *) malloc(nz * sizeof(double));
*val_ = val;
*I_ = I;
*J_ = J;
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
for (i=0; i<nz; i++)
{
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]) != 3) {
return -1;
}
I[i]--; /* adjust from 1-based to 0-based */
J[i]--;
}
fclose(f);
return 0;
}
int mm_is_valid(MM_typecode matcode)
{
if (!mm_is_matrix(matcode)) return 0;
if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0;
if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0;
if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) ||
mm_is_skew(matcode))) return 0;
return 1;
}
int mm_read_banner(FILE *f, MM_typecode *matcode)
{
char line[MM_MAX_LINE_LENGTH];
char banner[MM_MAX_TOKEN_LENGTH];
char mtx[MM_MAX_TOKEN_LENGTH];
char crd[MM_MAX_TOKEN_LENGTH];
char data_type[MM_MAX_TOKEN_LENGTH];
char storage_scheme[MM_MAX_TOKEN_LENGTH];
char *p;
mm_clear_typecode(matcode);
if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL)
return MM_PREMATURE_EOF;
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
storage_scheme) != 5)
return MM_PREMATURE_EOF;
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
/* check for banner */
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
return MM_NO_HEADER;
/* first field should be "mtx" */
if (strcmp(mtx, MM_MTX_STR) != 0)
return MM_UNSUPPORTED_TYPE;
mm_set_matrix(matcode);
/* second field describes whether this is a sparse matrix (in coordinate
storage) or a dense array */
if (strcmp(crd, MM_SPARSE_STR) == 0)
mm_set_sparse(matcode);
else
if (strcmp(crd, MM_DENSE_STR) == 0)
mm_set_dense(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* third field */
if (strcmp(data_type, MM_REAL_STR) == 0)
mm_set_real(matcode);
else
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
mm_set_complex(matcode);
else
if (strcmp(data_type, MM_PATTERN_STR) == 0)
mm_set_pattern(matcode);
else
if (strcmp(data_type, MM_INT_STR) == 0)
mm_set_integer(matcode);
else
return MM_UNSUPPORTED_TYPE;
/* fourth field */
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
mm_set_general(matcode);
else
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
mm_set_symmetric(matcode);
else
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
mm_set_hermitian(matcode);
else
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
mm_set_skew(matcode);
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz)
{
if (fprintf(f, "%d %d %d\n", M, N, nz) != 3)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = *nz = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
return 0;
else
do
{
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 3);
return 0;
}
int mm_read_mtx_array_size(FILE *f, int *M, int *N)
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d", M, N) == 2)
return 0;
else /* we have a blank line */
do
{
num_items_read = fscanf(f, "%d %d", M, N);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 2);
return 0;
}
int mm_write_mtx_array_size(FILE *f, int M, int N)
{
if (fprintf(f, "%d %d\n", M, N) != 2)
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
/*-------------------------------------------------------------------------*/
/******************************************************************/
/* use when I[], J[], and val[]J, and val[] are already allocated */
/******************************************************************/
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
int i;
if (mm_is_complex(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode) || mm_is_integer(matcode))
{
for (i=0; i<nz; i++)
{
if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
!= 3) return MM_PREMATURE_EOF;
}
}
else if (mm_is_pattern(matcode))
{
for (i=0; i<nz; i++)
if (fscanf(f, "%d %d", &I[i], &J[i])
!= 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
double *real, double *imag, MM_typecode matcode)
{
if (mm_is_complex(matcode))
{
if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
!= 4) return MM_PREMATURE_EOF;
}
else if (mm_is_real(matcode) || mm_is_integer(matcode))
{
if (fscanf(f, "%d %d %lg\n", I, J, real)
!= 3) return MM_PREMATURE_EOF;
}
else if (mm_is_pattern(matcode))
{
if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
}
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
/************************************************************************
mm_read_mtx_crd() fills M, N, nz, array of values, and return
type code, e.g. 'MCRS'
if matrix is complex, values[] is of size 2*nz,
(nz pairs of real/imaginary values)
************************************************************************/
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
double **val, MM_typecode *matcode)
{
int ret_code;
FILE *f;
if (strcmp(fname, "stdin") == 0) f=stdin;
else
if ((f = fopen(fname, "r")) == NULL)
return MM_COULD_NOT_READ_FILE;
if ((ret_code = mm_read_banner(f, matcode)) != 0)
return ret_code;
if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) &&
mm_is_matrix(*matcode)))
return MM_UNSUPPORTED_TYPE;
if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
return ret_code;
*I = (int *) malloc(*nz * sizeof(int));
*J = (int *) malloc(*nz * sizeof(int));
*val = NULL;
if (mm_is_complex(*matcode))
{
*val = (double *) malloc(*nz * 2 * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_real(*matcode) || mm_is_integer(*matcode))
{
*val = (double *) malloc(*nz * sizeof(double));
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
else if (mm_is_pattern(*matcode))
{
ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val,
*matcode);
if (ret_code != 0) return ret_code;
}
if (f != stdin) fclose(f);
return 0;
}
int mm_write_banner(FILE *f, MM_typecode matcode)
{
char *str = mm_typecode_to_str(matcode);
int ret_code;
ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
free(str);
if (ret_code !=2 )
return MM_COULD_NOT_WRITE_FILE;
else
return 0;
}
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode)
{
FILE *f;
int i;
if (strcmp(fname, "stdout") == 0)
f = stdout;
else
if ((f = fopen(fname, "w")) == NULL)
return MM_COULD_NOT_WRITE_FILE;
/* print banner followed by typecode */
fprintf(f, "%s ", MatrixMarketBanner);
fprintf(f, "%s\n", mm_typecode_to_str(matcode));
/* print matrix sizes and nonzeros */
fprintf(f, "%d %d %d\n", M, N, nz);
/* print values */
if (mm_is_pattern(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d\n", I[i], J[i]);
else
if (mm_is_integer(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %d\n", I[i], J[i], (int)val[i]);
else
if (mm_is_real(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
else
if (mm_is_complex(matcode))
for (i=0; i<nz; i++)
fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i],
val[2*i+1]);
else
{
if (f != stdout) fclose(f);
return MM_UNSUPPORTED_TYPE;
}
if (f !=stdout) fclose(f);
return 0;
}
/**
* Create a new copy of a string s. mm_strdup() is a common routine, but
* not part of ANSI C, so it is included here. Used by mm_typecode_to_str().
*
*/
static char *mm_strdup(const char *s)
{
size_t len = strlen(s);
char *s2 = (char *) malloc((len+1)*sizeof(char));
return strcpy(s2, s);
}
char *mm_typecode_to_str(MM_typecode matcode)
{
char buffer[MM_MAX_LINE_LENGTH];
char *types[4];
//char *mm_strdup(const char *);
//int error =0;
/* check for MTX type */
if (mm_is_matrix(matcode))
types[0] = MM_MTX_STR;
else
return NULL; // error=1;
/* check for CRD or ARR matrix */
if (mm_is_sparse(matcode))
types[1] = MM_SPARSE_STR;
else
if (mm_is_dense(matcode))
types[1] = MM_DENSE_STR;
else
return NULL;
/* check for element data type */
if (mm_is_real(matcode))
types[2] = MM_REAL_STR;
else
if (mm_is_complex(matcode))
types[2] = MM_COMPLEX_STR;
else
if (mm_is_pattern(matcode))
types[2] = MM_PATTERN_STR;
else
if (mm_is_integer(matcode))
types[2] = MM_INT_STR;
else
return NULL;
/* check for symmetry type */
if (mm_is_general(matcode))
types[3] = MM_GENERAL_STR;
else
if (mm_is_symmetric(matcode))
types[3] = MM_SYMM_STR;
else
if (mm_is_hermitian(matcode))
types[3] = MM_HERM_STR;
else
if (mm_is_skew(matcode))
types[3] = MM_SKEW_STR;
else
return NULL;
sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
return mm_strdup(buffer);
}

View File

@ -0,0 +1,141 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#ifndef MM_IO_H
#define MM_IO_H
#if defined(__cplusplus)
extern "C" {
#endif /* __cplusplus */
#define MM_MAX_LINE_LENGTH 1025
#define MatrixMarketBanner "%%MatrixMarket"
#define MM_MAX_TOKEN_LENGTH 64
typedef char MM_typecode[4];
char *mm_typecode_to_str(MM_typecode matcode);
int mm_read_banner(FILE *f, MM_typecode *matcode);
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
int mm_read_mtx_array_size(FILE *f, int *M, int *N);
int mm_write_banner(FILE *f, MM_typecode matcode);
int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
int mm_write_mtx_array_size(FILE *f, int M, int N);
/********************* MM_typecode query fucntions ***************************/
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
#define mm_is_dense(typecode) ((typecode)[1]=='A')
#define mm_is_array(typecode) ((typecode)[1]=='A')
#define mm_is_complex(typecode) ((typecode)[2]=='C')
#define mm_is_real(typecode) ((typecode)[2]=='R')
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
#define mm_is_integer(typecode) ((typecode)[2]=='I')
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
#define mm_is_general(typecode) ((typecode)[3]=='G')
#define mm_is_skew(typecode) ((typecode)[3]=='K')
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
int mm_is_valid(MM_typecode matcode); /* too complex for a macro */
/********************* MM_typecode modify fucntions ***************************/
#define mm_set_matrix(typecode) ((*typecode)[0]='M')
#define mm_set_coordinate(typecode) ((*typecode)[1]='C')
#define mm_set_array(typecode) ((*typecode)[1]='A')
#define mm_set_dense(typecode) mm_set_array(typecode)
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
#define mm_set_complex(typecode)((*typecode)[2]='C')
#define mm_set_real(typecode) ((*typecode)[2]='R')
#define mm_set_pattern(typecode)((*typecode)[2]='P')
#define mm_set_integer(typecode)((*typecode)[2]='I')
#define mm_set_symmetric(typecode)((*typecode)[3]='S')
#define mm_set_general(typecode)((*typecode)[3]='G')
#define mm_set_skew(typecode) ((*typecode)[3]='K')
#define mm_set_hermitian(typecode)((*typecode)[3]='H')
#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
(*typecode)[2]=' ',(*typecode)[3]='G')
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
/********************* Matrix Market error codes ***************************/
#define MM_COULD_NOT_READ_FILE 11
#define MM_PREMATURE_EOF 12
#define MM_NOT_MTX 13
#define MM_NO_HEADER 14
#define MM_UNSUPPORTED_TYPE 15
#define MM_LINE_TOO_LONG 16
#define MM_COULD_NOT_WRITE_FILE 17
/******************** Matrix Market internal definitions ********************
MM_matrix_typecode: 4-character sequence
ojbect sparse/ data storage
dense type scheme
string position: [0] [1] [2] [3]
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
A(array) C(omplex) H(ermitian)
P(attern) S(ymmetric)
I(nteger) K(kew)
***********************************************************************/
#define MM_MTX_STR "matrix"
#define MM_ARRAY_STR "array"
#define MM_DENSE_STR "array"
#define MM_COORDINATE_STR "coordinate"
#define MM_SPARSE_STR "coordinate"
#define MM_COMPLEX_STR "complex"
#define MM_REAL_STR "real"
#define MM_INT_STR "integer"
#define MM_GENERAL_STR "general"
#define MM_SYMM_STR "symmetric"
#define MM_HERM_STR "hermitian"
#define MM_SKEW_STR "skew-symmetric"
#define MM_PATTERN_STR "pattern"
/* high level routines */
int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J,
double **val, MM_typecode *matcode);
int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
double val[], MM_typecode matcode);
int mm_read_mtx_crd_entry(FILE *f, int *I, int *J, double *real, double *img,
MM_typecode matcode);
int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
double **val_, int **I_, int **J_);
#if defined(__cplusplus)
}
#endif /* __cplusplus */
#endif

View File

@ -0,0 +1,529 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mmio.h"
#include <cusolverDn.h>
/* avoid Windows warnings (for example: strcpy, fscanf, etc.) */
#if defined(_WIN32)
#define _CRT_SECURE_NO_WARNINGS
#endif
/* various __inline__ __device__ function to initialize a T_ELEM */
template <typename T_ELEM> __inline__ T_ELEM cuGet (int );
template <> __inline__ float cuGet<float >(int x)
{
return float(x);
}
template <> __inline__ double cuGet<double>(int x)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(int x)
{
return (make_cuComplex( float(x), 0.0f ));
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x)
{
return (make_cuDoubleComplex( double(x), 0.0 ));
}
template <typename T_ELEM> __inline__ T_ELEM cuGet (int , int );
template <> __inline__ float cuGet<float >(int x, int y)
{
return float(x);
}
template <> __inline__ double cuGet<double>(int x, int y)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(int x, int y)
{
return make_cuComplex( float(x), float(y) );
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(int x, int y)
{
return (make_cuDoubleComplex( double(x), double(y) ));
}
template <typename T_ELEM> __inline__ T_ELEM cuGet (float );
template <> __inline__ float cuGet<float >(float x)
{
return float(x);
}
template <> __inline__ double cuGet<double>(float x)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(float x)
{
return (make_cuComplex( float(x), 0.0f ));
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x)
{
return (make_cuDoubleComplex( double(x), 0.0 ));
}
template <typename T_ELEM> __inline__ T_ELEM cuGet (float, float );
template <> __inline__ float cuGet<float >(float x, float y)
{
return float(x);
}
template <> __inline__ double cuGet<double>(float x, float y)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(float x, float y)
{
return (make_cuComplex( float(x), float(y) ));
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(float x, float y)
{
return (make_cuDoubleComplex( double(x), double(y) ));
}
template <typename T_ELEM> __inline__ T_ELEM cuGet (double );
template <> __inline__ float cuGet<float >(double x)
{
return float(x);
}
template <> __inline__ double cuGet<double>(double x)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(double x)
{
return (make_cuComplex( float(x), 0.0f ));
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x)
{
return (make_cuDoubleComplex( double(x), 0.0 ));
}
template <typename T_ELEM> __inline__ T_ELEM cuGet (double, double );
template <> __inline__ float cuGet<float >(double x, double y)
{
return float(x);
}
template <> __inline__ double cuGet<double>(double x, double y)
{
return double(x);
}
template <> __inline__ cuComplex cuGet<cuComplex>(double x, double y)
{
return (make_cuComplex( float(x), float(y) ));
}
template <> __inline__ cuDoubleComplex cuGet<cuDoubleComplex>(double x, double y)
{
return (make_cuDoubleComplex( double(x), double(y) ));
}
static void compress_index(
const int *Ind,
int nnz,
int m,
int *Ptr,
int base)
{
int i;
/* initialize everything to zero */
for(i=0; i<m+1; i++){
Ptr[i]=0;
}
/* count elements in every row */
Ptr[0]=base;
for(i=0; i<nnz; i++){
Ptr[Ind[i]+(1-base)]++;
}
/* add all the values */
for(i=0; i<m; i++){
Ptr[i+1]+=Ptr[i];
}
}
struct cooFormat {
int i ;
int j ;
int p ; // permutation
};
int cmp_cooFormat_csr( struct cooFormat *s, struct cooFormat *t)
{
if ( s->i < t->i ){
return -1 ;
}
else if ( s->i > t->i ){
return 1 ;
}
else{
return s->j - t->j ;
}
}
int cmp_cooFormat_csc( struct cooFormat *s, struct cooFormat *t)
{
if ( s->j < t->j ){
return -1 ;
}
else if ( s->j > t->j ){
return 1 ;
}
else{
return s->i - t->i ;
}
}
typedef int (*FUNPTR) (const void*, const void*) ;
typedef int (*FUNPTR2) ( struct cooFormat *s, struct cooFormat *t) ;
static FUNPTR2 fptr_array[2] = {
cmp_cooFormat_csr,
cmp_cooFormat_csc,
};
static int verify_pattern(
int m,
int nnz,
int *csrRowPtr,
int *csrColInd)
{
int i, col, start, end, base_index;
int error_found = 0;
if (nnz != (csrRowPtr[m] - csrRowPtr[0])){
fprintf(stderr, "Error (nnz check failed): (csrRowPtr[%d]=%d - csrRowPtr[%d]=%d) != (nnz=%d)\n", 0, csrRowPtr[0], m, csrRowPtr[m], nnz);
error_found = 1;
}
base_index = csrRowPtr[0];
if ((0 != base_index) && (1 != base_index)){
fprintf(stderr, "Error (base index check failed): base index = %d\n", base_index);
error_found = 1;
}
for (i=0; (!error_found) && (i<m); i++){
start = csrRowPtr[i ] - base_index;
end = csrRowPtr[i+1] - base_index;
if (start > end){
fprintf(stderr, "Error (corrupted row): csrRowPtr[%d] (=%d) > csrRowPtr[%d] (=%d)\n", i, start+base_index, i+1, end+base_index);
error_found = 1;
}
for (col=start; col<end; col++){
if (csrColInd[col] < base_index){
fprintf(stderr, "Error (column vs. base index check failed): csrColInd[%d] < %d\n", col, base_index);
error_found = 1;
}
if ((col < (end-1)) && (csrColInd[col] >= csrColInd[col+1])){
fprintf(stderr, "Error (sorting of the column indecis check failed): (csrColInd[%d]=%d) >= (csrColInd[%d]=%d)\n", col, csrColInd[col], col+1, csrColInd[col+1]);
error_found = 1;
}
}
}
return error_found ;
}
template <typename T_ELEM>
int loadMMSparseMatrix(
char *filename,
char elem_type,
bool csrFormat,
int *m,
int *n,
int *nnz,
T_ELEM **aVal,
int **aRowInd,
int **aColInd,
int extendSymMatrix)
{
MM_typecode matcode;
double *tempVal;
int *tempRowInd,*tempColInd;
double *tval;
int *trow,*tcol;
int *csrRowPtr, *cscColPtr;
int i,j,error,base,count;
struct cooFormat *work;
/* read the matrix */
error = mm_read_mtx_crd(filename, m, n, nnz, &trow, &tcol, &tval, &matcode);
if (error) {
fprintf(stderr, "!!!! can not open file: '%s'\n", filename);
return 1;
}
/* start error checking */
if (mm_is_complex(matcode) && ((elem_type != 'z') && (elem_type != 'c'))) {
fprintf(stderr, "!!!! complex matrix requires type 'z' or 'c'\n");
return 1;
}
if (mm_is_dense(matcode) || mm_is_array(matcode) || mm_is_pattern(matcode) /*|| mm_is_integer(matcode)*/){
fprintf(stderr, "!!!! dense, array, pattern and integer matrices are not supported\n");
return 1;
}
/* if necessary symmetrize the pattern (transform from triangular to full) */
if ((extendSymMatrix) && (mm_is_symmetric(matcode) || mm_is_hermitian(matcode) || mm_is_skew(matcode))){
//count number of non-diagonal elements
count=0;
for(i=0; i<(*nnz); i++){
if (trow[i] != tcol[i]){
count++;
}
}
//allocate space for the symmetrized matrix
tempRowInd = (int *)malloc((*nnz + count) * sizeof(int));
tempColInd = (int *)malloc((*nnz + count) * sizeof(int));
if (mm_is_real(matcode) || mm_is_integer(matcode)){
tempVal = (double *)malloc((*nnz + count) * sizeof(double));
}
else{
tempVal = (double *)malloc(2 * (*nnz + count) * sizeof(double));
}
//copy the elements regular and transposed locations
for(j=0, i=0; i<(*nnz); i++){
tempRowInd[j]=trow[i];
tempColInd[j]=tcol[i];
if (mm_is_real(matcode) || mm_is_integer(matcode)){
tempVal[j]=tval[i];
}
else{
tempVal[2*j] =tval[2*i];
tempVal[2*j+1]=tval[2*i+1];
}
j++;
if (trow[i] != tcol[i]){
tempRowInd[j]=tcol[i];
tempColInd[j]=trow[i];
if (mm_is_real(matcode) || mm_is_integer(matcode)){
if (mm_is_skew(matcode)){
tempVal[j]=-tval[i];
}
else{
tempVal[j]= tval[i];
}
}
else{
if(mm_is_hermitian(matcode)){
tempVal[2*j] = tval[2*i];
tempVal[2*j+1]=-tval[2*i+1];
}
else{
tempVal[2*j] = tval[2*i];
tempVal[2*j+1]= tval[2*i+1];
}
}
j++;
}
}
(*nnz)+=count;
//free temporary storage
free(trow);
free(tcol);
free(tval);
}
else{
tempRowInd=trow;
tempColInd=tcol;
tempVal =tval;
}
// life time of (trow, tcol, tval) is over.
// please use COO format (tempRowInd, tempColInd, tempVal)
// use qsort to sort COO format
work = (struct cooFormat *)malloc(sizeof(struct cooFormat)*(*nnz));
if (NULL == work){
fprintf(stderr, "!!!! allocation error, malloc failed\n");
return 1;
}
for(i=0; i<(*nnz); i++){
work[i].i = tempRowInd[i];
work[i].j = tempColInd[i];
work[i].p = i; // permutation is identity
}
if (csrFormat){
/* create row-major ordering of indices (sorted by row and within each row by column) */
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[0] );
}else{
/* create column-major ordering of indices (sorted by column and within each column by row) */
qsort(work, *nnz, sizeof(struct cooFormat), (FUNPTR)fptr_array[1] );
}
// (tempRowInd, tempColInd) is sorted either by row-major or by col-major
for(i=0; i<(*nnz); i++){
tempRowInd[i] = work[i].i;
tempColInd[i] = work[i].j;
}
// setup base
// check if there is any row/col 0, if so base-0
// check if there is any row/col equal to matrix dimension m/n, if so base-1
int base0 = 0;
int base1 = 0;
for(i=0; i<(*nnz); i++){
const int row = tempRowInd[i];
const int col = tempColInd[i];
if ( (0 == row) || (0 == col) ){
base0 = 1;
}
if ( (*m == row) || (*n == col) ){
base1 = 1;
}
}
if ( base0 && base1 ){
printf("Error: input matrix is base-0 and base-1 \n");
return 1;
}
base = 0;
if (base1){
base = 1;
}
/* compress the appropriate indices */
if (csrFormat){
/* CSR format (assuming row-major format) */
csrRowPtr = (int *)malloc(((*m)+1) * sizeof(csrRowPtr[0]));
if (!csrRowPtr) return 1;
compress_index(tempRowInd, *nnz, *m, csrRowPtr, base);
*aRowInd = csrRowPtr;
*aColInd = (int *)malloc((*nnz) * sizeof(int));
}
else {
/* CSC format (assuming column-major format) */
cscColPtr = (int *)malloc(((*n)+1) * sizeof(cscColPtr[0]));
if (!cscColPtr) return 1;
compress_index(tempColInd, *nnz, *n, cscColPtr, base);
*aColInd = cscColPtr;
*aRowInd = (int *)malloc((*nnz) * sizeof(int));
}
/* transfrom the matrix values of type double into one of the cusparse library types */
*aVal = (T_ELEM *)malloc((*nnz) * sizeof(T_ELEM));
for (i=0; i<(*nnz); i++) {
if (csrFormat){
(*aColInd)[i] = tempColInd[i];
}
else{
(*aRowInd)[i] = tempRowInd[i];
}
if (mm_is_real(matcode) || mm_is_integer(matcode)){
(*aVal)[i] = cuGet<T_ELEM>( tempVal[ work[i].p ] );
}
else{
(*aVal)[i] = cuGet<T_ELEM>(tempVal[2*work[i].p], tempVal[2*work[i].p+1]);
}
}
/* check for corruption */
int error_found;
if (csrFormat){
error_found = verify_pattern(*m, *nnz, *aRowInd, *aColInd);
}else{
error_found = verify_pattern(*n, *nnz, *aColInd, *aRowInd);
}
if (error_found){
fprintf(stderr, "!!!! verify_pattern failed\n");
return 1;
}
/* cleanup and exit */
free(work);
free(tempVal);
free(tempColInd);
free(tempRowInd);
return 0;
}
/* specific instantiation */
template int loadMMSparseMatrix<float>(
char *filename,
char elem_type,
bool csrFormat,
int *m,
int *n,
int *nnz,
float **aVal,
int **aRowInd,
int **aColInd,
int extendSymMatrix);
template int loadMMSparseMatrix<double>(
char *filename,
char elem_type,
bool csrFormat,
int *m,
int *n,
int *nnz,
double **aVal,
int **aRowInd,
int **aColInd,
int extendSymMatrix);
template int loadMMSparseMatrix<cuComplex>(
char *filename,
char elem_type,
bool csrFormat,
int *m,
int *n,
int *nnz,
cuComplex **aVal,
int **aRowInd,
int **aColInd,
int extendSymMatrix);
template int loadMMSparseMatrix<cuDoubleComplex>(
char *filename,
char elem_type,
bool csrFormat,
int *m,
int *n,
int *nnz,
cuDoubleComplex **aVal,
int **aRowInd,
int **aColInd,
int extendSymMatrix);

407
Samples/cudaNvSci/Makefile Normal file
View File

@ -0,0 +1,407 @@
################################################################################
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L $(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L $(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
endif
ifeq ($(TARGET_OS),qnx)
CCFLAGS += -DWIN_INTERFACE_CUSTOM
LDFLAGS += -lsocket
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
SAMPLE_ENABLED := 1
# This sample is not supported on Mac OSX
ifeq ($(TARGET_OS),darwin)
$(info >>> WARNING - cudaNvSci is not supported on Mac OSX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on ARMv7
ifeq ($(TARGET_ARCH),armv7l)
$(info >>> WARNING - cudaNvSci is not supported on ARMv7 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on QNX
ifeq ($(TARGET_OS),qnx)
$(info >>> WARNING - cudaNvSci is not supported on QNX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../Common
LIBRARIES :=
################################################################################
#Detect if installed version of GCC supports C++11
ifeq ($(TARGET_OS),linux)
empty :=
space := $(empty) $(empty)
GCCVERSIONSTRING := $(shell expr `$(HOST_COMPILER) -dumpversion`)
#Create version number without "."
GCCVERSION := $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f1 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f2 -d.)
GCCVERSION += $(shell expr `echo $(GCCVERSIONSTRING)` | cut -f3 -d.)
# Make sure the version number has at least 3 decimals
GCCVERSION += 00
# Remove spaces from the version number
GCCVERSION := $(subst $(space),$(empty),$(GCCVERSION))
# Crop the version number to 3 decimals.
GCCVERSION := $(shell expr `echo $(GCCVERSION)` | cut -b1-3)
#$(warning $(GCCVERSION))
IS_MIN_VERSION := $(shell expr `echo $(GCCVERSION)` \>= 470)
ifeq ($(IS_MIN_VERSION), 1)
$(info >>> GCC Version is greater or equal to 4.7.0 <<<)
else
$(info >>> Waiving build. Minimum GCC version required for C++11 is 4.7.0 <<<)
SAMPLE_ENABLED := 0
endif
endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
SMS ?= 30 35 37 50 52 60 61 70 72 75
else
SMS ?= 30 35 37 50 52 60 61 70 75
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ifeq ($(TARGET_OS),darwin)
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
else
ifeq ($(TARGET_ARCH),x86_64)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
endif
ifeq ($(TARGET_ARCH),ppc64le)
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
endif
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
ifeq ("$(CUDALIB)","")
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
SAMPLE_ENABLED := 0
else
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
LIBRARIES += -L$(CUDALIB) -lcuda
endif
endif
ALL_CCFLAGS += --std=c++11
LIBRARIES += -lnvscibuf -lnvscisync
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: cudaNvSci
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
cudaNvSci.o:cudaNvSci.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
imageKernels.o:imageKernels.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
main.o:main.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
cudaNvSci: cudaNvSci.o imageKernels.o main.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./cudaNvSci
clean:
rm -f cudaNvSci cudaNvSci.o imageKernels.o main.o
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/cudaNvSci
clobber: clean

View File

@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>cudaNvSci</name>
<cflags>
<flag>--std=c++11</flag>
</cflags>
<cuda_api_list>
<toolkit>cudaImportExternalMemory</toolkit>
<toolkit>cudaExternalMemoryGetMappedBuffer</toolkit>
<toolkit>cudaExternalMemoryGetMappedMipmappedArray</toolkit>
<toolkit>cudaImportExternalSemaphore</toolkit>
<toolkit>cudaSignalExternalSemaphoresAsync</toolkit>
<toolkit>cudaWaitExternalSemaphoresAsync</toolkit>
<toolkit>cudaDestroyExternalSemaphore</toolkit>
<toolkit>cudaDestroyExternalMemory</toolkit>
</cuda_api_list>
<description><![CDATA[This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread & rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04]]></description>
<devicecompilation>whole</devicecompilation>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../common/inc</path>
</includepaths>
<keyconcepts>
<concept level="basic">CUDA NvSci Interop</concept>
<concept level="advanced">Data Parallel Algorithms</concept>
<concept level="advanced">Image Processing</concept>
</keyconcepts>
<keywords>
<keyword>CUDA</keyword>
<keyword>CPP11</keyword>
</keywords>
<libraries>
<library os="linux">cuda</library>
<library framework="true" os="macosx">CUDA</library>
<library>nvscibuf</library>
<library>nvscisync</library>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>main.cpp</primary_file>
<required_dependencies>
<dependency>NVSCI</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Advanced Topics</scope>
<scope>1:CUDA NvSciBuf/NvSciSync Interop</scope>
</scopes>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<supported_envs>
<env>
<platform>aarch64</platform>
</env>
<env>
<arch>x86_64</arch>
<platform>linux</platform>
</env>
</supported_envs>
<supported_sm_architectures>
<from>6.0</from>
</supported_sm_architectures>
<title>CUDA NvSciBuf/NvSciSync Interop</title>
<type>exe</type>
</entry>

View File

@ -0,0 +1,64 @@
# cudaNvSci - CUDA NvSciBuf/NvSciSync Interop
## Description
This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread & rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
## Key Concepts
CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
## Supported SM Architectures
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
Linux
## Supported CPU Architecture
x86_64, aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
cudaImportExternalMemory, cudaExternalMemoryGetMappedBuffer, cudaExternalMemoryGetMappedMipmappedArray, cudaImportExternalSemaphore, cudaSignalExternalSemaphoresAsync, cudaWaitExternalSemaphoresAsync, cudaDestroyExternalSemaphore, cudaDestroyExternalMemory
## Dependencies needed to build/run
[NVSCI](../../README.md#nvsci)
## Prerequisites
Download and install the [CUDA Toolkit 10.2](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## Build and Run
### Linux
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
```
$ cd <sample_dir>
$ make
```
The samples makefiles can take advantage of certain options:
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, aarch64.
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=aarch64` <br/>
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
* **dbg=1** - build with debug symbols
```
$ make dbg=1
```
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
```
$ make SMS="50 60"
```
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
```
$ make HOST_COMPILER=g++
```
## References (for more details)

Some files were not shown because too many files have changed in this diff Show More