cuda-samples/Common/helper_cuda_drvapi.h
2023-02-27 22:33:19 +00:00

409 lines
12 KiB
C++

/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Helper functions for CUDA Driver API error handling (make sure that CUDA_H is
// included in your projects)
#ifndef COMMON_HELPER_CUDA_DRVAPI_H_
#define COMMON_HELPER_CUDA_DRVAPI_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <cstring>
#include <sstream>
#include <helper_string.h>
#ifndef MAX
#define MAX(a, b) (a > b ? a : b)
#endif
#ifndef COMMON_HELPER_CUDA_H_
inline int ftoi(float value) {
return (value >= 0 ? static_cast<int>(value + 0.5)
: static_cast<int>(value - 0.5));
}
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions
// add a level of protection to the CUDA SDK samples, let's force samples to
// explicitly include CUDA.H
#ifdef __cuda_cuda_h__
// This will output the proper CUDA error strings in the event that a CUDA host
// call returns an error
#ifndef checkCudaErrors
#define checkCudaErrors(err) __checkCudaErrors(err, __FILE__, __LINE__)
// These are the inline versions for all of the SDK helper functions
inline void __checkCudaErrors(CUresult err, const char *file, const int line) {
if (CUDA_SUCCESS != err) {
const char *errorStr = NULL;
cuGetErrorString(err, &errorStr);
fprintf(stderr,
"checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, "
"line %i.\n",
err, errorStr, file, line);
exit(EXIT_FAILURE);
}
}
#endif
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
int device) {
checkCudaErrors(cuDeviceGetAttribute(attribute, device_attribute, device));
}
#endif
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2CoresDRV(int major, int minor) {
// Defines for GPU Architecture types (using the SM version to determine the #
// of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM
// minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] = {
{0x30, 192},
{0x32, 192},
{0x35, 192},
{0x37, 192},
{0x50, 128},
{0x52, 128},
{0x53, 128},
{0x60, 64},
{0x61, 128},
{0x62, 128},
{0x70, 64},
{0x72, 64},
{0x75, 64},
{0x80, 64},
{0x86, 128},
{0x87, 128},
{0x89, 128},
{0x90, 128},
{-1, -1}};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run
// properly
printf(
"MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n",
major, minor, nGpuArchCoresPerSM[index - 1].Cores);
return nGpuArchCoresPerSM[index - 1].Cores;
}
// end of GPU Architecture definitions
#ifdef __cuda_cuda_h__
// General GPU Device CUDA Initialization
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
int cuDevice = 0;
int deviceCount = 0;
checkCudaErrors(cuInit(0));
checkCudaErrors(cuDeviceGetCount(&deviceCount));
if (deviceCount == 0) {
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
int dev = 0;
dev = getCmdLineArgumentInt(ARGC, (const char **)ARGV, "device=");
if (dev < 0) {
dev = 0;
}
if (dev > deviceCount - 1) {
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
deviceCount);
fprintf(stderr,
">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n",
dev);
fprintf(stderr, "\n");
return -dev;
}
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
char name[100];
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
if (computeMode == CU_COMPUTEMODE_PROHIBITED) {
fprintf(stderr,
"Error: device is running in <CU_COMPUTEMODE_PROHIBITED>, no "
"threads can use this CUDA Device.\n");
return -1;
}
if (checkCmdLineFlag(ARGC, (const char **)ARGV, "quiet") == false) {
printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name);
}
return dev;
}
// This function returns the best GPU based on performance
inline int gpuGetMaxGflopsDeviceIdDRV() {
CUdevice current_device = 0;
CUdevice max_perf_device = 0;
int device_count = 0;
int sm_per_multiproc = 0;
unsigned long long max_compute_perf = 0;
int major = 0;
int minor = 0;
int multiProcessorCount;
int clockRate;
int devices_prohibited = 0;
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0) {
fprintf(stderr,
"gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count) {
checkCudaErrors(cuDeviceGetAttribute(
&multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
current_device));
checkCudaErrors(cuDeviceGetAttribute(
&clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, current_device));
checkCudaErrors(cuDeviceGetAttribute(
&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
checkCudaErrors(cuDeviceGetAttribute(
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,
current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED) {
if (major == 9999 && minor == 9999) {
sm_per_multiproc = 1;
} else {
sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
}
unsigned long long compute_perf =
(unsigned long long)(multiProcessorCount * sm_per_multiproc *
clockRate);
if (compute_perf > max_compute_perf) {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
} else {
devices_prohibited++;
}
++current_device;
}
if (devices_prohibited == device_count) {
fprintf(stderr,
"gpuGetMaxGflopsDeviceIdDRV error: all devices have compute mode "
"prohibited.\n");
exit(EXIT_FAILURE);
}
return max_perf_device;
}
// General initialization call to pick the best CUDA Device
inline CUdevice findCudaDeviceDRV(int argc, const char **argv) {
CUdevice cuDevice;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
devID = gpuDeviceInitDRV(argc, argv);
if (devID < 0) {
printf("exiting...\n");
exit(EXIT_SUCCESS);
}
} else {
// Otherwise pick the device with highest Gflops/s
char name[100];
devID = gpuGetMaxGflopsDeviceIdDRV();
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
cuDeviceGetName(name, 100, cuDevice);
printf("> Using CUDA Device [%d]: %s\n", devID, name);
}
cuDeviceGet(&cuDevice, devID);
return cuDevice;
}
inline CUdevice findIntegratedGPUDrv() {
CUdevice current_device = 0;
int device_count = 0;
int devices_prohibited = 0;
int isIntegrated;
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0) {
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the integrated GPU which is compute capable
while (current_device < device_count) {
int computeMode = -1;
checkCudaErrors(cuDeviceGetAttribute(
&isIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, current_device));
checkCudaErrors(cuDeviceGetAttribute(
&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device));
// If GPU is integrated and is not running on Compute Mode prohibited use
// that
if (isIntegrated && (computeMode != CU_COMPUTEMODE_PROHIBITED)) {
int major = 0, minor = 0;
char deviceName[256];
checkCudaErrors(cuDeviceGetAttribute(
&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
current_device));
checkCudaErrors(cuDeviceGetAttribute(
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
current_device));
checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
current_device, deviceName, major, minor);
return current_device;
} else {
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count) {
fprintf(stderr, "CUDA error: No Integrated CUDA capable GPU found.\n");
exit(EXIT_FAILURE);
}
return -1;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version,
int devID) {
CUdevice cuDevice;
char name[256];
int major = 0, minor = 0;
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
checkCudaErrors(cuDeviceGetAttribute(
&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
checkCudaErrors(cuDeviceGetAttribute(
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
if ((major > major_version) ||
(major == major_version && minor >= minor_version)) {
printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name,
major, minor);
return true;
} else {
printf(
"No GPU device was found that can support CUDA compute capability "
"%d.%d.\n",
major_version, minor_version);
return false;
}
}
#endif
bool inline findFatbinPath(const char *module_file, std::string &module_path, char **argv, std::ostringstream &ostrm)
{
char *actual_path = sdkFindFilePath(module_file, argv[0]);
if (actual_path)
{
module_path = actual_path;
}
else
{
printf("> findModulePath file not found: <%s> \n", module_file);
return false;
}
if (module_path.empty())
{
printf("> findModulePath could not find file: <%s> \n", module_file);
return false;
}
else
{
printf("> findModulePath found file at <%s>\n", module_path.c_str());
if (module_path.rfind("fatbin") != std::string::npos)
{
std::ifstream fileIn(module_path.c_str(), std::ios::binary);
ostrm << fileIn.rdbuf();
fileIn.close();
}
return true;
}
}
// end of CUDA Helper Functions
#endif // COMMON_HELPER_CUDA_DRVAPI_H_