mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 19:29:14 +08:00
Add and update samples for cuda 11.1 support
This commit is contained in:
parent
e6ce58fef4
commit
cd76533c3f
BIN
Common/data/CT_skull_512x512_8u.raw
Normal file
BIN
Common/data/CT_skull_512x512_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/CT_skull_512x512_8u_Gray.raw
Normal file
BIN
Common/data/CT_skull_512x512_8u_Gray.raw
Normal file
Binary file not shown.
1
Common/data/Lena_512x512_8u_Gray.raw
Normal file
1
Common/data/Lena_512x512_8u_Gray.raw
Normal file
File diff suppressed because one or more lines are too long
BIN
Common/data/PCB2_1024x683_8u.raw
Normal file
BIN
Common/data/PCB2_1024x683_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/PCB_1280x720_8u.raw
Normal file
BIN
Common/data/PCB_1280x720_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/PCB_METAL_509x335_8u.raw
Normal file
BIN
Common/data/PCB_METAL_509x335_8u.raw
Normal file
Binary file not shown.
285
Common/data/Rocks_512x512_8u_Gray.raw
Normal file
285
Common/data/Rocks_512x512_8u_Gray.raw
Normal file
File diff suppressed because one or more lines are too long
BIN
Common/data/lena_512x512_8u.raw
Normal file
BIN
Common/data/lena_512x512_8u.raw
Normal file
Binary file not shown.
|
@ -664,6 +664,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
||||||
{0x72, 64},
|
{0x72, 64},
|
||||||
{0x75, 64},
|
{0x75, 64},
|
||||||
{0x80, 64},
|
{0x80, 64},
|
||||||
|
{0x86, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
@ -709,6 +710,7 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
||||||
{0x72, "Xavier"},
|
{0x72, "Xavier"},
|
||||||
{0x75, "Turing"},
|
{0x75, "Turing"},
|
||||||
{0x80, "Ampere"},
|
{0x80, "Ampere"},
|
||||||
|
{0x86, "Ampere"},
|
||||||
{-1, "Graphics Device"}};
|
{-1, "Graphics Device"}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
|
@ -33,8 +33,10 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <cstring>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
#include <drvapi_error_string.h>
|
|
||||||
#include <helper_string.h>
|
#include <helper_string.h>
|
||||||
|
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
|
@ -66,48 +68,22 @@ inline int ftoi(float value) {
|
||||||
// These are the inline versions for all of the SDK helper functions
|
// These are the inline versions for all of the SDK helper functions
|
||||||
inline void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
inline void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
||||||
if (CUDA_SUCCESS != err) {
|
if (CUDA_SUCCESS != err) {
|
||||||
|
const char *errorStr = NULL;
|
||||||
|
cuGetErrorString(err, &errorStr);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, "
|
"checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, "
|
||||||
"line %i.\n",
|
"line %i.\n",
|
||||||
err, getCudaDrvErrorString(err), file, line);
|
err, errorStr, file, line);
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef getLastCudaDrvErrorMsg
|
|
||||||
#undef getLastCudaDrvErrorMsg
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define getLastCudaDrvErrorMsg(msg) \
|
|
||||||
__getLastCudaDrvErrorMsg(msg, __FILE__, __LINE__)
|
|
||||||
|
|
||||||
inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file,
|
|
||||||
const int line) {
|
|
||||||
CUresult err = cuCtxSynchronize();
|
|
||||||
|
|
||||||
if (CUDA_SUCCESS != err) {
|
|
||||||
fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
|
|
||||||
fprintf(stderr,
|
|
||||||
"getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d "
|
|
||||||
"\"%s\" in file <%s>, line %i.\n",
|
|
||||||
err, getCudaDrvErrorString(err), file, line);
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function wraps the CUDA Driver API into a template function
|
// This function wraps the CUDA Driver API into a template function
|
||||||
template <class T>
|
template <class T>
|
||||||
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
|
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
|
||||||
int device) {
|
int device) {
|
||||||
CUresult error_result =
|
checkCudaErrors(cuDeviceGetAttribute(attribute, device_attribute, device));
|
||||||
cuDeviceGetAttribute(attribute, device_attribute, device);
|
|
||||||
|
|
||||||
if (error_result != CUDA_SUCCESS) {
|
|
||||||
printf("cuDeviceGetAttribute returned %d\n-> %s\n",
|
|
||||||
static_cast<int>(error_result), getCudaDrvErrorString(error_result));
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -135,6 +111,8 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
||||||
{0x70, 64},
|
{0x70, 64},
|
||||||
{0x72, 64},
|
{0x72, 64},
|
||||||
{0x75, 64},
|
{0x75, 64},
|
||||||
|
{0x80, 64},
|
||||||
|
{0x86, 128},
|
||||||
{-1, -1}};
|
{-1, -1}};
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
@ -161,11 +139,9 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
||||||
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
|
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
|
||||||
int cuDevice = 0;
|
int cuDevice = 0;
|
||||||
int deviceCount = 0;
|
int deviceCount = 0;
|
||||||
CUresult err = cuInit(0);
|
checkCudaErrors(cuInit(0));
|
||||||
|
|
||||||
if (CUDA_SUCCESS == err) {
|
|
||||||
checkCudaErrors(cuDeviceGetCount(&deviceCount));
|
checkCudaErrors(cuDeviceGetCount(&deviceCount));
|
||||||
}
|
|
||||||
|
|
||||||
if (deviceCount == 0) {
|
if (deviceCount == 0) {
|
||||||
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
|
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
|
||||||
|
@ -192,7 +168,7 @@ inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
|
||||||
|
|
||||||
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
|
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
|
||||||
char name[100];
|
char name[100];
|
||||||
cuDeviceGetName(name, 100, cuDevice);
|
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
|
||||||
|
|
||||||
int computeMode;
|
int computeMode;
|
||||||
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
|
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
|
||||||
|
@ -218,7 +194,6 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
||||||
int device_count = 0;
|
int device_count = 0;
|
||||||
int sm_per_multiproc = 0;
|
int sm_per_multiproc = 0;
|
||||||
unsigned long long max_compute_perf = 0;
|
unsigned long long max_compute_perf = 0;
|
||||||
int best_SM_arch = 0;
|
|
||||||
int major = 0;
|
int major = 0;
|
||||||
int minor = 0;
|
int minor = 0;
|
||||||
int multiProcessorCount;
|
int multiProcessorCount;
|
||||||
|
@ -234,19 +209,6 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the best major SM Architecture GPU device
|
|
||||||
while (current_device < device_count) {
|
|
||||||
checkCudaErrors(cuDeviceGetAttribute(
|
|
||||||
&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
|
|
||||||
checkCudaErrors(cuDeviceGetAttribute(
|
|
||||||
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
|
|
||||||
if (major > 0 && major < 9999) {
|
|
||||||
best_SM_arch = MAX(best_SM_arch, major);
|
|
||||||
}
|
|
||||||
|
|
||||||
current_device++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the best CUDA capable GPU device
|
// Find the best CUDA capable GPU device
|
||||||
current_device = 0;
|
current_device = 0;
|
||||||
|
|
||||||
|
@ -277,18 +239,9 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
||||||
clockRate);
|
clockRate);
|
||||||
|
|
||||||
if (compute_perf > max_compute_perf) {
|
if (compute_perf > max_compute_perf) {
|
||||||
// If we find GPU with SM major > 2, search only these
|
|
||||||
if (best_SM_arch > 2) {
|
|
||||||
// If our device==dest_SM_arch, choose this, or else pass
|
|
||||||
if (major == best_SM_arch) {
|
|
||||||
max_compute_perf = compute_perf;
|
max_compute_perf = compute_perf;
|
||||||
max_perf_device = current_device;
|
max_perf_device = current_device;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
max_compute_perf = compute_perf;
|
|
||||||
max_perf_device = current_device;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
devices_prohibited++;
|
devices_prohibited++;
|
||||||
}
|
}
|
||||||
|
@ -414,7 +367,39 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
bool inline findFatbinPath(const char *module_file, std::string &module_path, char **argv, std::ostringstream &ostrm)
|
||||||
|
{
|
||||||
|
char *actual_path = sdkFindFilePath(module_file, argv[0]);
|
||||||
|
|
||||||
|
if (actual_path)
|
||||||
|
{
|
||||||
|
module_path = actual_path;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("> findModulePath file not found: <%s> \n", module_file);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (module_path.empty())
|
||||||
|
{
|
||||||
|
printf("> findModulePath could not find file: <%s> \n", module_file);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("> findModulePath found file at <%s>\n", module_path.c_str());
|
||||||
|
if (module_path.rfind("fatbin") != std::string::npos)
|
||||||
|
{
|
||||||
|
std::ifstream fileIn(module_path.c_str(), std::ios::binary);
|
||||||
|
ostrm << fileIn.rdbuf();
|
||||||
|
fileIn.close();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// end of CUDA Helper Functions
|
// end of CUDA Helper Functions
|
||||||
|
|
||||||
#endif // COMMON_HELPER_CUDA_DRVAPI_H_
|
#endif // COMMON_HELPER_CUDA_DRVAPI_H_
|
||||||
|
|
||||||
|
|
|
@ -293,6 +293,9 @@ inline char *sdkFindFilePath(const char *filename,
|
||||||
"../../../../Samples/<executable_name>/data/", // up 4 in tree
|
"../../../../Samples/<executable_name>/data/", // up 4 in tree
|
||||||
"../../../Samples/<executable_name>/data/", // up 3 in tree
|
"../../../Samples/<executable_name>/data/", // up 3 in tree
|
||||||
"../../Samples/<executable_name>/data/", // up 2 in tree
|
"../../Samples/<executable_name>/data/", // up 2 in tree
|
||||||
|
"../../../../Common/data/", // up 4 in tree
|
||||||
|
"../../../Common/data", // up 3 in tree
|
||||||
|
"../../Common/data/" // up 2 in tree
|
||||||
};
|
};
|
||||||
|
|
||||||
// Extract the executable name
|
// Extract the executable name
|
||||||
|
|
|
@ -47,8 +47,8 @@
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
void compileFileToCUBIN(char *filename, int argc, char **argv, char **cubinResult,
|
||||||
size_t *ptxResultSize, int requiresCGheaders) {
|
size_t *cubinResultSize, int requiresCGheaders) {
|
||||||
std::ifstream inputFile(filename,
|
std::ifstream inputFile(filename,
|
||||||
std::ios::in | std::ios::binary | std::ios::ate);
|
std::ios::in | std::ios::binary | std::ios::ate);
|
||||||
|
|
||||||
|
@ -83,9 +83,9 @@ void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
||||||
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||||
|
|
||||||
{
|
{
|
||||||
// Compile for the GPU arch on which are going to run cuda kernel.
|
// Compile cubin for the GPU arch on which are going to run cuda kernel.
|
||||||
std::string compileOptions;
|
std::string compileOptions;
|
||||||
compileOptions = "--gpu-architecture=compute_";
|
compileOptions = "--gpu-architecture=sm_";
|
||||||
|
|
||||||
compileParams[numCompileOptions] = reinterpret_cast<char *>(
|
compileParams[numCompileOptions] = reinterpret_cast<char *>(
|
||||||
malloc(sizeof(char) * (compileOptions.length() + 10)));
|
malloc(sizeof(char) * (compileOptions.length() + 10)));
|
||||||
|
@ -158,21 +158,20 @@ void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
||||||
free(log);
|
free(log);
|
||||||
|
|
||||||
NVRTC_SAFE_CALL("nvrtcCompileProgram", res);
|
NVRTC_SAFE_CALL("nvrtcCompileProgram", res);
|
||||||
// fetch PTX
|
|
||||||
size_t ptxSize;
|
size_t codeSize;
|
||||||
NVRTC_SAFE_CALL("nvrtcGetPTXSize", nvrtcGetPTXSize(prog, &ptxSize));
|
NVRTC_SAFE_CALL("nvrtcGetCUBINSize", nvrtcGetCUBINSize(prog, &codeSize));
|
||||||
char *ptx = reinterpret_cast<char *>(malloc(sizeof(char) * ptxSize));
|
char *code = new char[codeSize];
|
||||||
NVRTC_SAFE_CALL("nvrtcGetPTX", nvrtcGetPTX(prog, ptx));
|
NVRTC_SAFE_CALL("nvrtcGetCUBIN", nvrtcGetCUBIN(prog, code));
|
||||||
NVRTC_SAFE_CALL("nvrtcDestroyProgram", nvrtcDestroyProgram(&prog));
|
*cubinResult = code;
|
||||||
*ptxResult = ptx;
|
*cubinResultSize = codeSize;
|
||||||
*ptxResultSize = ptxSize;
|
|
||||||
|
|
||||||
for (int i = 0; i < numCompileOptions; i++) {
|
for (int i = 0; i < numCompileOptions; i++) {
|
||||||
free(compileParams[i]);
|
free(compileParams[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CUmodule loadPTX(char *ptx, int argc, char **argv) {
|
CUmodule loadCUBIN(char *cubin, int argc, char **argv) {
|
||||||
CUmodule module;
|
CUmodule module;
|
||||||
CUcontext context;
|
CUcontext context;
|
||||||
int major = 0, minor = 0;
|
int major = 0, minor = 0;
|
||||||
|
@ -190,11 +189,10 @@ CUmodule loadPTX(char *ptx, int argc, char **argv) {
|
||||||
printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
|
printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
|
||||||
|
|
||||||
checkCudaErrors(cuInit(0));
|
checkCudaErrors(cuInit(0));
|
||||||
checkCudaErrors(cuDeviceGet(&cuDevice, 0));
|
|
||||||
checkCudaErrors(cuCtxCreate(&context, 0, cuDevice));
|
checkCudaErrors(cuCtxCreate(&context, 0, cuDevice));
|
||||||
|
|
||||||
checkCudaErrors(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
|
checkCudaErrors(cuModuleLoadData(&module, cubin));
|
||||||
free(ptx);
|
free(cubin);
|
||||||
|
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
55
README.md
55
README.md
|
@ -1,11 +1,20 @@
|
||||||
# CUDA Samples
|
# CUDA Samples
|
||||||
|
|
||||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads).
|
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads).
|
||||||
|
|
||||||
## Release Notes
|
## Release Notes
|
||||||
|
|
||||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||||
|
|
||||||
|
### CUDA 11.1
|
||||||
|
* Added `watershedSegmentationNPP`. Demonstrates how to use the NPP watershed segmentation function.
|
||||||
|
* Added `batchedLabelMarkersAndLabelCompressionNPP`. Demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.
|
||||||
|
* Dropped Visual Studio 2012, 2013 support from all the windows supported samples.
|
||||||
|
* Added kernel performing warp aggregated atomic max in multi buckets using cg::labeled_partition & cg::reduce in `warpAggregatedAtomicsCG`.
|
||||||
|
* Added extended CG shuffle mechanics to `shfl_scan` sample.
|
||||||
|
* Added `cudaOpenMP`. Demonstrates how to use OpenMP API to write an application for multiple GPUs.
|
||||||
|
* Added `simpleZeroCopy`. Demonstrates how to use zero copy, kernels can read and write directly to pinned system memory.
|
||||||
|
|
||||||
### CUDA 11.0
|
### CUDA 11.0
|
||||||
* Added `dmmaTensorCoreGemm`. Demonstrates double precision GEMM computation using the Double precision Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores.
|
* Added `dmmaTensorCoreGemm`. Demonstrates double precision GEMM computation using the Double precision Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores.
|
||||||
* Added `bf16TensorCoreGemm`. Demonstrates __nv_bfloat16 (e8m7) GEMM computation using the __nv_bfloat16 WMMA API introduced with CUDA 11 in Ampere chip family tensor cores.
|
* Added `bf16TensorCoreGemm`. Demonstrates __nv_bfloat16 (e8m7) GEMM computation using the __nv_bfloat16 WMMA API introduced with CUDA 11 in Ampere chip family tensor cores.
|
||||||
|
@ -84,7 +93,7 @@ This is the first release of CUDA Samples on GitHub:
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||||
|
|
||||||
### Getting the CUDA Samples
|
### Getting the CUDA Samples
|
||||||
|
@ -144,31 +153,33 @@ The samples makefiles can take advantage of certain options:
|
||||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
||||||
---|---|---|---|
|
---|---|---|---|
|
||||||
**[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** |
|
**[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** |
|
||||||
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
|
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
|
||||||
**[nvJPEG](./Samples/nvJPEG)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
|
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[nvJPEG](./Samples/nvJPEG)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
|
||||||
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** |
|
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
|
**[simpleAttributes](./Samples/simpleAttributes)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
|
||||||
**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
|
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** |
|
||||||
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
|
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||||
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
**[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||||
**[concurrentKernels](./Samples/concurrentKernels)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
|
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||||
**[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||||
|
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
||||||
|
|
||||||
#### Windows
|
#### Windows
|
||||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
||||||
---|---|---|---|
|
---|---|---|---|
|
||||||
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** |
|
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** |
|
||||||
**[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[nvJPEG](./Samples/nvJPEG)** |
|
**[shfl_scan](./Samples/shfl_scan)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
|
||||||
**[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
|
**[nvJPEG](./Samples/nvJPEG)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
|
||||||
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
|
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||||
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** |
|
**[simpleAttributes](./Samples/simpleAttributes)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** |
|
||||||
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
**[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
|
||||||
**[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[simpleD3D11](./Samples/simpleD3D11)** |
|
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** |
|
||||||
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||||
**[concurrentKernels](./Samples/concurrentKernels)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
|
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||||
**[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[matrixMul](./Samples/matrixMul)** |
|
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||||
|
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[matrixMul](./Samples/matrixMul)** |
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ EGLStreams Interop
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount,
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -103,6 +103,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ CURAND Library
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -274,9 +274,9 @@ LIBRARIES :=
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NV12toBGRandResize", "NV12toBGRandResize_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,112 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>NV12toBGRandResize_vs2013</RootNamespace>
|
|
||||||
<ProjectName>NV12toBGRandResize</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="bgr_resize.cu" />
|
|
||||||
<CudaCompile Include="nv12_resize.cu" />
|
|
||||||
<CudaCompile Include="nv12_to_bgr_planar.cu" />
|
|
||||||
<ClCompile Include="resize_convert_main.cpp" />
|
|
||||||
<CudaCompile Include="utils.cu" />
|
|
||||||
<ClInclude Include="resize_convert.h" />
|
|
||||||
<ClInclude Include="utils.h" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -112,6 +112,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -108,6 +108,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ cudaMemcpy2D, cudaMallocManaged
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
||||||
|
|
|
@ -274,9 +274,9 @@ LIBRARIES :=
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -28,7 +28,7 @@ cudaMallocManaged, cudaStreamAttachMemAsync, cudaMemcpyAsync, cudaMallocHost, cu
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryPerf", "UnifiedMemoryPerf_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryPerf", "UnifiedMemoryPerf_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,110 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>UnifiedMemoryPerf_vs2013</RootNamespace>
|
|
||||||
<ProjectName>UnifiedMemoryPerf</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="commonKernels.cu" />
|
|
||||||
<ClCompile Include="helperFunctions.cpp" />
|
|
||||||
<CudaCompile Include="matrixMultiplyPerf.cu" />
|
|
||||||
<ClInclude Include="commonDefs.hpp" />
|
|
||||||
<ClInclude Include="commonKernels.hpp" />
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -105,6 +105,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -110,6 +110,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -106,6 +106,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
#define VERIFY_GPU_CORRECTNESS 0
|
#define VERIFY_GPU_CORRECTNESS 0
|
||||||
|
|
||||||
size_t maxSampleSizeInMb = 64;
|
size_t maxSampleSizeInMb = 64;
|
||||||
int numKernelRuns = 100;
|
int numKernelRuns = 20;
|
||||||
int verboseResults = 0;
|
int verboseResults = 0;
|
||||||
|
|
||||||
const char *memAllocTypeStr[MEMALLOC_TYPE_COUNT] = {
|
const char *memAllocTypeStr[MEMALLOC_TYPE_COUNT] = {
|
||||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -51,6 +51,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ cudaSetDevice, cudaHostAlloc, cudaFree, cudaMallocHost, cudaFreeHost, cudaMemcpy
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidthTest", "bandwidthTest_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidthTest", "bandwidthTest_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -103,6 +103,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
329
Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
Normal file
329
Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
Normal file
|
@ -0,0 +1,329 @@
|
||||||
|
################################################################################
|
||||||
|
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions
|
||||||
|
# are met:
|
||||||
|
# * Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in the
|
||||||
|
# documentation and/or other materials provided with the distribution.
|
||||||
|
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived
|
||||||
|
# from this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
#
|
||||||
|
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||||
|
#
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# Location of the CUDA Toolkit
|
||||||
|
CUDA_PATH ?= /usr/local/cuda
|
||||||
|
|
||||||
|
##############################
|
||||||
|
# start deprecated interface #
|
||||||
|
##############################
|
||||||
|
ifeq ($(x86_64),1)
|
||||||
|
$(info WARNING - x86_64 variable has been deprecated)
|
||||||
|
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||||
|
TARGET_ARCH ?= x86_64
|
||||||
|
endif
|
||||||
|
ifeq ($(ARMv7),1)
|
||||||
|
$(info WARNING - ARMv7 variable has been deprecated)
|
||||||
|
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||||
|
TARGET_ARCH ?= armv7l
|
||||||
|
endif
|
||||||
|
ifeq ($(aarch64),1)
|
||||||
|
$(info WARNING - aarch64 variable has been deprecated)
|
||||||
|
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||||
|
TARGET_ARCH ?= aarch64
|
||||||
|
endif
|
||||||
|
ifeq ($(ppc64le),1)
|
||||||
|
$(info WARNING - ppc64le variable has been deprecated)
|
||||||
|
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||||
|
TARGET_ARCH ?= ppc64le
|
||||||
|
endif
|
||||||
|
ifneq ($(GCC),)
|
||||||
|
$(info WARNING - GCC variable has been deprecated)
|
||||||
|
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||||
|
HOST_COMPILER ?= $(GCC)
|
||||||
|
endif
|
||||||
|
ifneq ($(abi),)
|
||||||
|
$(error ERROR - abi variable has been removed)
|
||||||
|
endif
|
||||||
|
############################
|
||||||
|
# end deprecated interface #
|
||||||
|
############################
|
||||||
|
|
||||||
|
# architecture
|
||||||
|
HOST_ARCH := $(shell uname -m)
|
||||||
|
TARGET_ARCH ?= $(HOST_ARCH)
|
||||||
|
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||||
|
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||||
|
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||||
|
TARGET_SIZE := 64
|
||||||
|
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||||
|
TARGET_SIZE := 32
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||||
|
ifeq ($(HOST_ARCH),aarch64)
|
||||||
|
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux))
|
||||||
|
HOST_ARCH := sbsa
|
||||||
|
TARGET_ARCH := sbsa
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||||
|
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||||
|
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||||
|
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||||
|
TARGET_ARCH = armv7l
|
||||||
|
endif
|
||||||
|
|
||||||
|
# operating system
|
||||||
|
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||||
|
TARGET_OS ?= $(HOST_OS)
|
||||||
|
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||||
|
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||||
|
endif
|
||||||
|
|
||||||
|
# host compiler
|
||||||
|
ifeq ($(TARGET_OS),darwin)
|
||||||
|
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||||
|
HOST_COMPILER ?= clang++
|
||||||
|
endif
|
||||||
|
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||||
|
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||||
|
ifeq ($(TARGET_OS),linux)
|
||||||
|
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||||
|
else ifeq ($(TARGET_OS),qnx)
|
||||||
|
ifeq ($(QNX_HOST),)
|
||||||
|
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||||
|
endif
|
||||||
|
ifeq ($(QNX_TARGET),)
|
||||||
|
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||||
|
endif
|
||||||
|
export QNX_HOST
|
||||||
|
export QNX_TARGET
|
||||||
|
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||||
|
else ifeq ($(TARGET_OS),android)
|
||||||
|
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||||
|
endif
|
||||||
|
else ifeq ($(TARGET_ARCH),aarch64)
|
||||||
|
ifeq ($(TARGET_OS), linux)
|
||||||
|
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||||
|
else ifeq ($(TARGET_OS),qnx)
|
||||||
|
ifeq ($(QNX_HOST),)
|
||||||
|
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||||
|
endif
|
||||||
|
ifeq ($(QNX_TARGET),)
|
||||||
|
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||||
|
endif
|
||||||
|
export QNX_HOST
|
||||||
|
export QNX_TARGET
|
||||||
|
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
|
||||||
|
else ifeq ($(TARGET_OS), android)
|
||||||
|
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||||
|
endif
|
||||||
|
else ifeq ($(TARGET_ARCH),sbsa)
|
||||||
|
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||||
|
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||||
|
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
HOST_COMPILER ?= g++
|
||||||
|
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||||
|
|
||||||
|
# internal flags
|
||||||
|
NVCCFLAGS := -m${TARGET_SIZE}
|
||||||
|
CCFLAGS :=
|
||||||
|
LDFLAGS :=
|
||||||
|
|
||||||
|
# build flags
|
||||||
|
ifeq ($(TARGET_OS),darwin)
|
||||||
|
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||||
|
CCFLAGS += -arch $(HOST_ARCH)
|
||||||
|
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||||
|
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||||
|
CCFLAGS += -mfloat-abi=hard
|
||||||
|
else ifeq ($(TARGET_OS),android)
|
||||||
|
LDFLAGS += -pie
|
||||||
|
CCFLAGS += -fpie -fpic -fexceptions
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||||
|
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||||
|
ifneq ($(TARGET_FS),)
|
||||||
|
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||||
|
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||||
|
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||||
|
endif
|
||||||
|
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||||
|
ifneq ($(TARGET_FS),)
|
||||||
|
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||||
|
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||||
|
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||||
|
endif
|
||||||
|
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||||
|
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||||
|
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||||
|
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include
|
||||||
|
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||||
|
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||||
|
LDFLAGS += -lsocket
|
||||||
|
LDFLAGS += -rpath=/usr/lib/aarch64-qnx-gnu -L/usr/lib/aarch64-qnx-gnu
|
||||||
|
ifneq ($(TARGET_FS),)
|
||||||
|
LDFLAGS += -rpath=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
|
||||||
|
LDFLAGS += -rpath=$(TARGET_FS)/usr/libnvidia -L $(TARGET_FS)/usr/libnvidia
|
||||||
|
endif
|
||||||
|
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||||
|
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Install directory of different arch
|
||||||
|
CUDA_INSTALL_TARGET_DIR :=
|
||||||
|
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||||
|
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||||
|
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||||
|
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Debug build flags
|
||||||
|
ifeq ($(dbg),1)
|
||||||
|
NVCCFLAGS += -g -G
|
||||||
|
BUILD_TYPE := debug
|
||||||
|
else
|
||||||
|
BUILD_TYPE := release
|
||||||
|
endif
|
||||||
|
|
||||||
|
ALL_CCFLAGS :=
|
||||||
|
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||||
|
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||||
|
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||||
|
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||||
|
|
||||||
|
SAMPLE_ENABLED := 1
|
||||||
|
|
||||||
|
# This sample is not supported on Mac OSX
|
||||||
|
ifeq ($(TARGET_OS),darwin)
|
||||||
|
$(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on Mac OSX - waiving sample <<<)
|
||||||
|
SAMPLE_ENABLED := 0
|
||||||
|
endif
|
||||||
|
|
||||||
|
ALL_LDFLAGS :=
|
||||||
|
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||||
|
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||||
|
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||||
|
|
||||||
|
# Common includes and paths for CUDA
|
||||||
|
INCLUDES := -I../../Common
|
||||||
|
LIBRARIES :=
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# Gencode arguments
|
||||||
|
SMS ?=
|
||||||
|
|
||||||
|
ifeq ($(GENCODE_FLAGS),)
|
||||||
|
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||||
|
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||||
|
|
||||||
|
ifeq ($(SMS),)
|
||||||
|
# Generate PTX code from SM 35
|
||||||
|
GENCODE_FLAGS += -gencode arch=compute_35,code=compute_35
|
||||||
|
endif
|
||||||
|
|
||||||
|
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||||
|
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||||
|
ifneq ($(HIGHEST_SM),)
|
||||||
|
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos
|
||||||
|
|
||||||
|
ifeq ($(SAMPLE_ENABLED),0)
|
||||||
|
EXEC ?= @echo "[@]"
|
||||||
|
endif
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
# Target rules
|
||||||
|
all: build
|
||||||
|
|
||||||
|
build: batchedLabelMarkersAndLabelCompressionNPP
|
||||||
|
|
||||||
|
check.deps:
|
||||||
|
ifeq ($(SAMPLE_ENABLED),0)
|
||||||
|
@echo "Sample will be waived due to the above missing dependencies"
|
||||||
|
else
|
||||||
|
@echo "Sample is ready - all dependencies have been met"
|
||||||
|
endif
|
||||||
|
|
||||||
|
batchedLabelMarkersAndLabelCompressionNPP.o:batchedLabelMarkersAndLabelCompressionNPP.cpp
|
||||||
|
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
batchedLabelMarkersAndLabelCompressionNPP: batchedLabelMarkersAndLabelCompressionNPP.o
|
||||||
|
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||||
|
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||||
|
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||||
|
|
||||||
|
run: build
|
||||||
|
$(EXEC) ./batchedLabelMarkersAndLabelCompressionNPP
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f batchedLabelMarkersAndLabelCompressionNPP batchedLabelMarkersAndLabelCompressionNPP.o
|
||||||
|
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchedLabelMarkersAndLabelCompressionNPP
|
||||||
|
|
||||||
|
clobber: clean
|
|
@ -0,0 +1,76 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||||
|
<entry>
|
||||||
|
<name>batchedLabelMarkersAndLabelCompressionNPP</name>
|
||||||
|
<description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both
|
||||||
|
single image and batched image versions.]]></description>
|
||||||
|
<devicecompilation>whole</devicecompilation>
|
||||||
|
<fallback_min_ptx>true</fallback_min_ptx>
|
||||||
|
<includepaths>
|
||||||
|
<path>./</path>
|
||||||
|
<path>../</path>
|
||||||
|
<path>../../common/inc</path>
|
||||||
|
</includepaths>
|
||||||
|
<keyconcepts>
|
||||||
|
<concept level="basic">Performance Strategies</concept>
|
||||||
|
<concept level="basic">Image Processing</concept>
|
||||||
|
<concept level="basic">NPP Library</concept>
|
||||||
|
<concept level="basic">Using NPP Batch Functions</concept>
|
||||||
|
</keyconcepts>
|
||||||
|
<keywords>
|
||||||
|
<keyword>CUDA</keyword>
|
||||||
|
<keyword>NPP</keyword>
|
||||||
|
<keyword>Image Processing</keyword>
|
||||||
|
</keywords>
|
||||||
|
<libraries>
|
||||||
|
<library>nppisu_static</library>
|
||||||
|
<library>nppif_static</library>
|
||||||
|
<library>nppc_static</library>
|
||||||
|
<library>culibos</library>
|
||||||
|
</libraries>
|
||||||
|
<librarypaths>
|
||||||
|
</librarypaths>
|
||||||
|
<nsight_eclipse>true</nsight_eclipse>
|
||||||
|
<primary_file>batchedLabelMarkersAndLabelCompressionNPP.cpp</primary_file>
|
||||||
|
<required_dependencies>
|
||||||
|
<dependency>NPP</dependency>
|
||||||
|
</required_dependencies>
|
||||||
|
<scopes>
|
||||||
|
<scope>1:CUDA Basic Topics</scope>
|
||||||
|
<scope>1:Performance Strategies</scope>
|
||||||
|
<scope>2:Image Processing</scope>
|
||||||
|
<scope>2:Computer Vision</scope>
|
||||||
|
</scopes>
|
||||||
|
<sm-arch>sm35</sm-arch>
|
||||||
|
<sm-arch>sm37</sm-arch>
|
||||||
|
<sm-arch>sm50</sm-arch>
|
||||||
|
<sm-arch>sm52</sm-arch>
|
||||||
|
<sm-arch>sm60</sm-arch>
|
||||||
|
<sm-arch>sm61</sm-arch>
|
||||||
|
<sm-arch>sm70</sm-arch>
|
||||||
|
<sm-arch>sm72</sm-arch>
|
||||||
|
<sm-arch>sm75</sm-arch>
|
||||||
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
|
<supported_envs>
|
||||||
|
<env>
|
||||||
|
<arch>x86_64</arch>
|
||||||
|
<platform>linux</platform>
|
||||||
|
</env>
|
||||||
|
<env>
|
||||||
|
<platform>windows7</platform>
|
||||||
|
</env>
|
||||||
|
<env>
|
||||||
|
<arch>arm</arch>
|
||||||
|
</env>
|
||||||
|
<env>
|
||||||
|
<arch>ppc64le</arch>
|
||||||
|
<platform>linux</platform>
|
||||||
|
</env>
|
||||||
|
</supported_envs>
|
||||||
|
<supported_sm_architectures>
|
||||||
|
<include>all</include>
|
||||||
|
</supported_sm_architectures>
|
||||||
|
<title>Batched Label Markers And Label Compression NPP</title>
|
||||||
|
<type>exe</type>
|
||||||
|
</entry>
|
72
Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
Normal file
72
Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
# batchedLabelMarkersAndLabelCompressionNPP - Batched Label Markers And Label Compression NPP
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both
|
||||||
|
single image and batched image versions.
|
||||||
|
|
||||||
|
## Key Concepts
|
||||||
|
|
||||||
|
Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
|
||||||
|
|
||||||
|
## Supported SM Architectures
|
||||||
|
|
||||||
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
|
## Supported OSes
|
||||||
|
|
||||||
|
Linux, Windows
|
||||||
|
|
||||||
|
## Supported CPU Architecture
|
||||||
|
|
||||||
|
x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
|
## CUDA APIs involved
|
||||||
|
|
||||||
|
## Dependencies needed to build/run
|
||||||
|
[NPP](../../README.md#npp)
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
|
## Build and Run
|
||||||
|
|
||||||
|
### Windows
|
||||||
|
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||||
|
```
|
||||||
|
*_vs<version>.sln - for Visual Studio <version>
|
||||||
|
```
|
||||||
|
Each individual sample has its own set of solution files in its directory:
|
||||||
|
|
||||||
|
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||||
|
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||||
|
|
||||||
|
### Linux
|
||||||
|
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||||
|
```
|
||||||
|
$ cd <sample_dir>
|
||||||
|
$ make
|
||||||
|
```
|
||||||
|
The samples makefiles can take advantage of certain options:
|
||||||
|
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
||||||
|
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||||
|
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||||
|
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||||
|
* **dbg=1** - build with debug symbols
|
||||||
|
```
|
||||||
|
$ make dbg=1
|
||||||
|
```
|
||||||
|
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||||
|
```
|
||||||
|
$ make SMS="50 60"
|
||||||
|
```
|
||||||
|
|
||||||
|
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||||
|
```
|
||||||
|
$ make HOST_COMPILER=g++
|
||||||
|
```
|
||||||
|
|
||||||
|
## References (for more details)
|
||||||
|
|
|
@ -0,0 +1,798 @@
|
||||||
|
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||||
|
#define WINDOWS_LEAN_AND_MEAN
|
||||||
|
#define NOMINMAX
|
||||||
|
#include <windows.h>
|
||||||
|
#pragma warning(disable : 4819)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include <helper_cuda.h>
|
||||||
|
#include <npp.h>
|
||||||
|
|
||||||
|
// Note: If you want to view these images we HIGHLY recommend using imagej
|
||||||
|
// which is free on the internet and works on most platforms
|
||||||
|
// because it is one of the few image viewing apps that can display 32
|
||||||
|
// bit integer image data. While it normalizes the data to floating
|
||||||
|
// point values for viewing it still provides a good representation of
|
||||||
|
// the relative brightness of each label value. Note that label
|
||||||
|
// compression output results in smaller differences between label values
|
||||||
|
// making it visually more difficult to detect differences in labeled
|
||||||
|
// regions. If you have an editor that can display hex values you can
|
||||||
|
// see what the exact values of each label is, every 4 bytes represents 1
|
||||||
|
// 32 bit integer label value.
|
||||||
|
//
|
||||||
|
// The files read and written by this sample app use RAW image format,
|
||||||
|
// that is, only the image data itself exists in the files with no image
|
||||||
|
// format information. When viewing RAW files with imagej just enter
|
||||||
|
// the image size and bit depth values that are part of the file name
|
||||||
|
// when requested by imagej.
|
||||||
|
//
|
||||||
|
// This sample app works in 2 stages, first it processes all of the
|
||||||
|
// images individually then it processes them all again in 1 batch using
|
||||||
|
// the Batch_Advanced versions of the NPP batch functions which allow
|
||||||
|
// each image to have it's own ROI. The 2 stages are completely
|
||||||
|
// separable but in this sample the second stage takes advantage of some
|
||||||
|
// of the data that has already been initialized.
|
||||||
|
//
|
||||||
|
// Note that there is a small amount of variability in the number of
|
||||||
|
// unique label markers generated from one run to the next by the UF
|
||||||
|
// algorithm.
|
||||||
|
//
|
||||||
|
// Performance of ALL NPP image batch functions is limited by the maximum
|
||||||
|
// ROI height in the list of images.
|
||||||
|
|
||||||
|
// Batched label compression support is only available on NPP versions > 11.0,
|
||||||
|
// comment out if using NPP 11.0
|
||||||
|
#define USE_BATCHED_LABEL_COMPRESSION 1
|
||||||
|
|
||||||
|
#define NUMBER_OF_IMAGES 5
|
||||||
|
|
||||||
|
Npp8u *pInputImageDev[NUMBER_OF_IMAGES];
|
||||||
|
Npp8u *pInputImageHost[NUMBER_OF_IMAGES];
|
||||||
|
Npp8u *pUFGenerateLabelsScratchBufferDev[NUMBER_OF_IMAGES];
|
||||||
|
Npp8u *pUFCompressedLabelsScratchBufferDev[NUMBER_OF_IMAGES];
|
||||||
|
Npp32u *pUFLabelDev[NUMBER_OF_IMAGES];
|
||||||
|
Npp32u *pUFLabelHost[NUMBER_OF_IMAGES];
|
||||||
|
NppiImageDescriptor *pUFBatchSrcImageListDev = 0;
|
||||||
|
NppiImageDescriptor *pUFBatchSrcDstImageListDev = 0;
|
||||||
|
NppiImageDescriptor *pUFBatchSrcImageListHost = 0;
|
||||||
|
NppiImageDescriptor *pUFBatchSrcDstImageListHost = 0;
|
||||||
|
NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListDev =
|
||||||
|
0; // from nppi_filtering_functions.h
|
||||||
|
NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListHost = 0;
|
||||||
|
Npp32u *pUFBatchPerImageCompressedCountListDev = 0;
|
||||||
|
Npp32u *pUFBatchPerImageCompressedCountListHost = 0;
|
||||||
|
|
||||||
|
void tearDown() // Clean up and tear down
|
||||||
|
{
|
||||||
|
if (pUFBatchPerImageCompressedCountListDev != 0)
|
||||||
|
cudaFree(pUFBatchPerImageCompressedCountListDev);
|
||||||
|
if (pUFBatchSrcDstScratchBufferListDev != 0)
|
||||||
|
cudaFree(pUFBatchSrcDstScratchBufferListDev);
|
||||||
|
if (pUFBatchSrcDstImageListDev != 0) cudaFree(pUFBatchSrcDstImageListDev);
|
||||||
|
if (pUFBatchSrcImageListDev != 0) cudaFree(pUFBatchSrcImageListDev);
|
||||||
|
if (pUFBatchPerImageCompressedCountListHost != 0)
|
||||||
|
free(pUFBatchPerImageCompressedCountListHost);
|
||||||
|
if (pUFBatchSrcDstScratchBufferListHost != 0)
|
||||||
|
free(pUFBatchSrcDstScratchBufferListHost);
|
||||||
|
if (pUFBatchSrcDstImageListHost != 0) free(pUFBatchSrcDstImageListHost);
|
||||||
|
if (pUFBatchSrcImageListHost != 0) free(pUFBatchSrcImageListHost);
|
||||||
|
|
||||||
|
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
||||||
|
if (pUFCompressedLabelsScratchBufferDev[j] != 0)
|
||||||
|
cudaFree(pUFCompressedLabelsScratchBufferDev[j]);
|
||||||
|
if (pUFGenerateLabelsScratchBufferDev[j] != 0)
|
||||||
|
cudaFree(pUFGenerateLabelsScratchBufferDev[j]);
|
||||||
|
if (pUFLabelDev[j] != 0) cudaFree(pUFLabelDev[j]);
|
||||||
|
if (pInputImageDev[j] != 0) cudaFree(pInputImageDev[j]);
|
||||||
|
if (pUFLabelHost[j] != 0) free(pUFLabelHost[j]);
|
||||||
|
if (pInputImageHost[j] != 0) free(pInputImageHost[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string &LabelMarkersOutputFile0 =
|
||||||
|
"Lena_LabelMarkersUF_8Way_512x512_32u.raw";
|
||||||
|
const std::string &LabelMarkersOutputFile1 =
|
||||||
|
"CT_skull_LabelMarkersUF_8Way_512x512_32u.raw";
|
||||||
|
const std::string &LabelMarkersOutputFile2 =
|
||||||
|
"PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw";
|
||||||
|
const std::string &LabelMarkersOutputFile3 =
|
||||||
|
"PCB2_LabelMarkersUF_8Way_1024x683_32u.raw";
|
||||||
|
const std::string &LabelMarkersOutputFile4 =
|
||||||
|
"PCB_LabelMarkersUF_8Way_1280x720_32u.raw";
|
||||||
|
|
||||||
|
const std::string &CompressedMarkerLabelsOutputFile0 =
|
||||||
|
"Lena_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsOutputFile1 =
|
||||||
|
"CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsOutputFile2 =
|
||||||
|
"PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsOutputFile3 =
|
||||||
|
"PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsOutputFile4 =
|
||||||
|
"PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw";
|
||||||
|
|
||||||
|
const std::string &LabelMarkersBatchOutputFile0 =
|
||||||
|
"Lena_LabelMarkersUFBatch_8Way_512x512_32u.raw";
|
||||||
|
const std::string &LabelMarkersBatchOutputFile1 =
|
||||||
|
"CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw";
|
||||||
|
const std::string &LabelMarkersBatchOutputFile2 =
|
||||||
|
"PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw";
|
||||||
|
const std::string &LabelMarkersBatchOutputFile3 =
|
||||||
|
"PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw";
|
||||||
|
const std::string &LabelMarkersBatchOutputFile4 =
|
||||||
|
"PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw";
|
||||||
|
|
||||||
|
const std::string &CompressedMarkerLabelsBatchOutputFile0 =
|
||||||
|
"Lena_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsBatchOutputFile1 =
|
||||||
|
"CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsBatchOutputFile2 =
|
||||||
|
"PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsBatchOutputFile3 =
|
||||||
|
"PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw";
|
||||||
|
const std::string &CompressedMarkerLabelsBatchOutputFile4 =
|
||||||
|
"PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw";
|
||||||
|
|
||||||
|
int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage) {
|
||||||
|
FILE *bmpFile;
|
||||||
|
size_t nSize;
|
||||||
|
|
||||||
|
if (nImage == 0) {
|
||||||
|
if (nWidth != 512 || nHeight != 512) return -1;
|
||||||
|
const char *fileName = "lena_512x512_8u.raw";
|
||||||
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||||
|
if (InputFile == NULL) {
|
||||||
|
printf("%s file not found.. exiting\n", fileName);
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bmpFile = fopen(InputFile, "rb");
|
||||||
|
} else if (nImage == 1) {
|
||||||
|
if (nWidth != 512 || nHeight != 512) return -1;
|
||||||
|
const char *fileName = "CT_skull_512x512_8u.raw";
|
||||||
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||||
|
if (InputFile == NULL) {
|
||||||
|
printf("%s file not found.. exiting\n", fileName);
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bmpFile = fopen(InputFile, "rb");
|
||||||
|
} else if (nImage == 2) {
|
||||||
|
if (nWidth != 509 || nHeight != 335) return -1;
|
||||||
|
const char *fileName = "PCB_METAL_509x335_8u.raw";
|
||||||
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||||
|
if (InputFile == NULL) {
|
||||||
|
printf("%s file not found.. exiting\n", fileName);
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bmpFile = fopen(InputFile, "rb");
|
||||||
|
} else if (nImage == 3) {
|
||||||
|
if (nWidth != 1024 || nHeight != 683) return -1;
|
||||||
|
const char *fileName = "PCB2_1024x683_8u.raw";
|
||||||
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||||
|
if (InputFile == NULL) {
|
||||||
|
printf("%s file not found.. exiting\n", fileName);
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bmpFile = fopen(InputFile, "rb");
|
||||||
|
} else if (nImage == 4) {
|
||||||
|
if (nWidth != 1280 || nHeight != 720) return -1;
|
||||||
|
const char *fileName = "PCB_1280x720_8u.raw";
|
||||||
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||||
|
if (InputFile == NULL) {
|
||||||
|
printf("%s file not found.. exiting\n", fileName);
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
|
||||||
|
bmpFile = fopen(InputFile, "rb");
|
||||||
|
} else {
|
||||||
|
printf("Input file load failed.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bmpFile == NULL) return -1;
|
||||||
|
nSize = fread(pImage, 1, nWidth * nHeight, bmpFile);
|
||||||
|
if (nSize < nWidth * nHeight) {
|
||||||
|
fclose(bmpFile);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fclose(bmpFile);
|
||||||
|
|
||||||
|
printf("Input file load succeeded.\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
int aGenerateLabelsScratchBufferSize[NUMBER_OF_IMAGES];
|
||||||
|
int aCompressLabelsScratchBufferSize[NUMBER_OF_IMAGES];
|
||||||
|
|
||||||
|
int nCompressedLabelCount = 0;
|
||||||
|
cudaError_t cudaError;
|
||||||
|
NppStatus nppStatus;
|
||||||
|
NppStreamContext nppStreamCtx;
|
||||||
|
FILE *bmpFile;
|
||||||
|
|
||||||
|
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
||||||
|
pInputImageDev[j] = 0;
|
||||||
|
pInputImageHost[j] = 0;
|
||||||
|
pUFGenerateLabelsScratchBufferDev[j] = 0;
|
||||||
|
pUFCompressedLabelsScratchBufferDev[j] = 0;
|
||||||
|
pUFLabelDev[j] = 0;
|
||||||
|
pUFLabelHost[j] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever
|
||||||
|
// your stream ID is if not the NULL stream.
|
||||||
|
|
||||||
|
cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess) {
|
||||||
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||||
|
libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||||
|
(driverVersion % 100) / 10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000,
|
||||||
|
(runtimeVersion % 100) / 10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(
|
||||||
|
&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor, nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(
|
||||||
|
&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor, nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError =
|
||||||
|
cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError =
|
||||||
|
cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor =
|
||||||
|
oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
|
NppiSize oSizeROI[NUMBER_OF_IMAGES];
|
||||||
|
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
if (nImage == 0) {
|
||||||
|
oSizeROI[nImage].width = 512;
|
||||||
|
oSizeROI[nImage].height = 512;
|
||||||
|
} else if (nImage == 1) {
|
||||||
|
oSizeROI[nImage].width = 512;
|
||||||
|
oSizeROI[nImage].height = 512;
|
||||||
|
} else if (nImage == 2) {
|
||||||
|
oSizeROI[nImage].width = 509;
|
||||||
|
oSizeROI[nImage].height = 335;
|
||||||
|
} else if (nImage == 3) {
|
||||||
|
oSizeROI[nImage].width = 1024;
|
||||||
|
oSizeROI[nImage].height = 683;
|
||||||
|
} else if (nImage == 4) {
|
||||||
|
oSizeROI[nImage].width = 1280;
|
||||||
|
oSizeROI[nImage].height = 720;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: While using cudaMallocPitch() to allocate device memory for NPP can
|
||||||
|
// significantly improve the performance of many NPP functions, for UF
|
||||||
|
// function label markers generation or compression DO NOT USE
|
||||||
|
// cudaMallocPitch(). Doing so could result in incorrect output.
|
||||||
|
|
||||||
|
cudaError = cudaMalloc(
|
||||||
|
(void **)&pInputImageDev[nImage],
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
// For images processed with UF label markers functions ROI width and height
|
||||||
|
// for label markers generation output AND marker compression functions MUST
|
||||||
|
// be the same AND line pitch MUST be equal to ROI.width * sizeof(Npp32u).
|
||||||
|
// Also the image pointer used for label markers generation output must
|
||||||
|
// start at the same position in the image as it does in the marker
|
||||||
|
// compression function. Also note that actual input image size and ROI do
|
||||||
|
// not necessarily need to be related other than ROI being less than or
|
||||||
|
// equal to image size and image starting position does not necessarily have
|
||||||
|
// to be at pixel 0 in the input image.
|
||||||
|
|
||||||
|
cudaError = cudaMalloc(
|
||||||
|
(void **)&pUFLabelDev[nImage],
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
pInputImageHost[nImage] = reinterpret_cast<Npp8u *>(malloc(
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height));
|
||||||
|
pUFLabelHost[nImage] = reinterpret_cast<Npp32u *>(malloc(
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
|
||||||
|
|
||||||
|
// Use UF functions throughout this sample.
|
||||||
|
|
||||||
|
nppStatus = nppiLabelMarkersUFGetBufferSize_32u_C1R(
|
||||||
|
oSizeROI[nImage], &aGenerateLabelsScratchBufferSize[nImage]);
|
||||||
|
|
||||||
|
// One at a time image processing
|
||||||
|
|
||||||
|
cudaError = cudaMalloc((void **)&pUFGenerateLabelsScratchBufferDev[nImage],
|
||||||
|
aGenerateLabelsScratchBufferSize[nImage]);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
if (loadRaw8BitImage(pInputImageHost[nImage],
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
||||||
|
oSizeROI[nImage].height, nImage) == 0) {
|
||||||
|
cudaError = cudaMemcpy2DAsync(
|
||||||
|
pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||||
|
pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height,
|
||||||
|
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||||
|
|
||||||
|
nppStatus = nppiLabelMarkersUF_8u32u_C1R_Ctx(
|
||||||
|
pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage], nppiNormInf,
|
||||||
|
pUFGenerateLabelsScratchBufferDev[nImage], nppStreamCtx);
|
||||||
|
|
||||||
|
if (nppStatus != NPP_SUCCESS) {
|
||||||
|
if (nImage == 0)
|
||||||
|
printf("Lena_LabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||||
|
else if (nImage == 1)
|
||||||
|
printf("CT_skull_LabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||||
|
else if (nImage == 2)
|
||||||
|
printf("PCB_METAL_LabelMarkersUF_8Way_509x335_32u failed.\n");
|
||||||
|
else if (nImage == 3)
|
||||||
|
printf("PCB2_LabelMarkersUF_8Way_1024x683_32u failed.\n");
|
||||||
|
else if (nImage == 4)
|
||||||
|
printf("PCB_LabelMarkersUF_8Way_1280x720_32u failed.\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaError = cudaMemcpy2DAsync(
|
||||||
|
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||||
|
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||||
|
|
||||||
|
// Wait host image read backs to complete, not necessary if no need to
|
||||||
|
// synchronize
|
||||||
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||||
|
cudaSuccess) {
|
||||||
|
printf("Post label generation cudaStreamSynchronize failed\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nImage == 0)
|
||||||
|
bmpFile = fopen(LabelMarkersOutputFile0.c_str(), "wb");
|
||||||
|
else if (nImage == 1)
|
||||||
|
bmpFile = fopen(LabelMarkersOutputFile1.c_str(), "wb");
|
||||||
|
else if (nImage == 2)
|
||||||
|
bmpFile = fopen(LabelMarkersOutputFile2.c_str(), "wb");
|
||||||
|
else if (nImage == 3)
|
||||||
|
bmpFile = fopen(LabelMarkersOutputFile3.c_str(), "wb");
|
||||||
|
else if (nImage == 4)
|
||||||
|
bmpFile = fopen(LabelMarkersOutputFile4.c_str(), "wb");
|
||||||
|
|
||||||
|
if (bmpFile == NULL) return -1;
|
||||||
|
size_t nSize = 0;
|
||||||
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||||
|
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||||
|
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||||
|
}
|
||||||
|
fclose(bmpFile);
|
||||||
|
|
||||||
|
nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(
|
||||||
|
oSizeROI[nImage].width * oSizeROI[nImage].height,
|
||||||
|
&aCompressLabelsScratchBufferSize[nImage]);
|
||||||
|
if (nppStatus != NPP_NO_ERROR) return nppStatus;
|
||||||
|
|
||||||
|
cudaError =
|
||||||
|
cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[nImage],
|
||||||
|
aCompressLabelsScratchBufferSize[nImage]);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
nCompressedLabelCount = 0;
|
||||||
|
|
||||||
|
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage], oSizeROI[nImage].width * oSizeROI[nImage].height,
|
||||||
|
&nCompressedLabelCount, pUFCompressedLabelsScratchBufferDev[nImage]);
|
||||||
|
|
||||||
|
if (nppStatus != NPP_SUCCESS) {
|
||||||
|
if (nImage == 0)
|
||||||
|
printf("Lena_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||||
|
else if (nImage == 1)
|
||||||
|
printf(
|
||||||
|
"CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||||
|
else if (nImage == 2)
|
||||||
|
printf(
|
||||||
|
"PCB_METAL_CompressedLabelMarkersUF_8Way_509x335_32u failed.\n");
|
||||||
|
else if (nImage == 3)
|
||||||
|
printf("PCB2_CompressedLabelMarkersUF_8Way_1024x683_32u failed.\n");
|
||||||
|
else if (nImage == 4)
|
||||||
|
printf("PCB_CompressedLabelMarkersUF_8Way_1280x720_32u failed.\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaError = cudaMemcpy2DAsync(
|
||||||
|
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||||
|
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||||
|
|
||||||
|
// Wait for host image read backs to finish, not necessary if no need to
|
||||||
|
// synchronize
|
||||||
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||||
|
cudaSuccess ||
|
||||||
|
nCompressedLabelCount == 0) {
|
||||||
|
printf("Post label compression cudaStreamSynchronize failed\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nImage == 0)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsOutputFile0.c_str(), "wb");
|
||||||
|
else if (nImage == 1)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsOutputFile1.c_str(), "wb");
|
||||||
|
else if (nImage == 2)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsOutputFile2.c_str(), "wb");
|
||||||
|
else if (nImage == 3)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsOutputFile3.c_str(), "wb");
|
||||||
|
else if (nImage == 4)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsOutputFile4.c_str(), "wb");
|
||||||
|
|
||||||
|
if (bmpFile == NULL) return -1;
|
||||||
|
nSize = 0;
|
||||||
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||||
|
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||||
|
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||||
|
}
|
||||||
|
fclose(bmpFile);
|
||||||
|
|
||||||
|
if (nImage == 0)
|
||||||
|
printf(
|
||||||
|
"Lena_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
nCompressedLabelCount);
|
||||||
|
else if (nImage == 1)
|
||||||
|
printf(
|
||||||
|
"CT_Skull_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
nCompressedLabelCount);
|
||||||
|
else if (nImage == 2)
|
||||||
|
printf(
|
||||||
|
"PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
nCompressedLabelCount);
|
||||||
|
else if (nImage == 3)
|
||||||
|
printf(
|
||||||
|
"PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
nCompressedLabelCount);
|
||||||
|
else if (nImage == 4)
|
||||||
|
printf(
|
||||||
|
"PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
nCompressedLabelCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch image processing
|
||||||
|
|
||||||
|
// We want to allocate scratch buffers more efficiently for batch processing
|
||||||
|
// so first we free up the scratch buffers for image 0 and reallocate them.
|
||||||
|
// This is not required but helps cudaMalloc to work more efficiently.
|
||||||
|
|
||||||
|
cudaFree(pUFCompressedLabelsScratchBufferDev[0]);
|
||||||
|
|
||||||
|
int nTotalBatchedUFCompressLabelsScratchBufferDevSize = 0;
|
||||||
|
|
||||||
|
for (int k = 0; k < NUMBER_OF_IMAGES; k++)
|
||||||
|
nTotalBatchedUFCompressLabelsScratchBufferDevSize +=
|
||||||
|
aCompressLabelsScratchBufferSize[k];
|
||||||
|
|
||||||
|
cudaError = cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[0],
|
||||||
|
nTotalBatchedUFCompressLabelsScratchBufferDevSize);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
// Now allocate batch lists
|
||||||
|
|
||||||
|
int nBatchImageListBytes = NUMBER_OF_IMAGES * sizeof(NppiImageDescriptor);
|
||||||
|
|
||||||
|
cudaError =
|
||||||
|
cudaMalloc((void **)&pUFBatchSrcImageListDev, nBatchImageListBytes);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
cudaError =
|
||||||
|
cudaMalloc((void **)&pUFBatchSrcDstImageListDev, nBatchImageListBytes);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
pUFBatchSrcImageListHost =
|
||||||
|
reinterpret_cast<NppiImageDescriptor *>(malloc(nBatchImageListBytes));
|
||||||
|
pUFBatchSrcDstImageListHost =
|
||||||
|
reinterpret_cast<NppiImageDescriptor *>(malloc(nBatchImageListBytes));
|
||||||
|
|
||||||
|
NppiSize oMaxROISize = {0, 0};
|
||||||
|
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
pUFBatchSrcImageListHost[nImage].pData = pInputImageDev[nImage];
|
||||||
|
pUFBatchSrcImageListHost[nImage].nStep =
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp8u);
|
||||||
|
// src image oSize parameter is ignored in these NPP functions
|
||||||
|
pUFBatchSrcDstImageListHost[nImage].pData = pUFLabelDev[nImage];
|
||||||
|
pUFBatchSrcDstImageListHost[nImage].nStep =
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u);
|
||||||
|
pUFBatchSrcDstImageListHost[nImage].oSize = oSizeROI[nImage];
|
||||||
|
if (oSizeROI[nImage].width > oMaxROISize.width)
|
||||||
|
oMaxROISize.width = oSizeROI[nImage].width;
|
||||||
|
if (oSizeROI[nImage].height > oMaxROISize.height)
|
||||||
|
oMaxROISize.height = oSizeROI[nImage].height;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy label generation batch lists from CPU to GPU
|
||||||
|
cudaError = cudaMemcpyAsync(pUFBatchSrcImageListDev, pUFBatchSrcImageListHost,
|
||||||
|
nBatchImageListBytes, cudaMemcpyHostToDevice,
|
||||||
|
nppStreamCtx.hStream);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||||
|
|
||||||
|
cudaError = cudaMemcpyAsync(pUFBatchSrcDstImageListDev,
|
||||||
|
pUFBatchSrcDstImageListHost, nBatchImageListBytes,
|
||||||
|
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||||
|
|
||||||
|
// We use 8-way neighbor search throughout this example
|
||||||
|
nppStatus = nppiLabelMarkersUFBatch_8u32u_C1R_Advanced_Ctx(
|
||||||
|
pUFBatchSrcImageListDev, pUFBatchSrcDstImageListDev, NUMBER_OF_IMAGES,
|
||||||
|
oMaxROISize, nppiNormInf, nppStreamCtx);
|
||||||
|
|
||||||
|
if (nppStatus != NPP_SUCCESS) {
|
||||||
|
printf("LabelMarkersUFBatch_8Way_8u32u failed.\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now read back generated device images to the host
|
||||||
|
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
cudaError = cudaMemcpy2DAsync(
|
||||||
|
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||||
|
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for host image read backs to complete, not necessary if no need to
|
||||||
|
// synchronize
|
||||||
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||||
|
cudaSuccess) {
|
||||||
|
printf("Post label generation cudaStreamSynchronize failed\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save output to files
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
if (nImage == 0)
|
||||||
|
bmpFile = fopen(LabelMarkersBatchOutputFile0.c_str(), "wb");
|
||||||
|
else if (nImage == 1)
|
||||||
|
bmpFile = fopen(LabelMarkersBatchOutputFile1.c_str(), "wb");
|
||||||
|
else if (nImage == 2)
|
||||||
|
bmpFile = fopen(LabelMarkersBatchOutputFile2.c_str(), "wb");
|
||||||
|
else if (nImage == 3)
|
||||||
|
bmpFile = fopen(LabelMarkersBatchOutputFile3.c_str(), "wb");
|
||||||
|
else if (nImage == 4)
|
||||||
|
bmpFile = fopen(LabelMarkersBatchOutputFile4.c_str(), "wb");
|
||||||
|
|
||||||
|
if (bmpFile == NULL) return -1;
|
||||||
|
size_t nSize = 0;
|
||||||
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||||
|
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||||
|
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||||
|
}
|
||||||
|
fclose(bmpFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef USE_BATCHED_LABEL_COMPRESSION
|
||||||
|
|
||||||
|
// Now allocate scratch buffer memory for batched label compression
|
||||||
|
cudaError = cudaMalloc((void **)&pUFBatchSrcDstScratchBufferListDev,
|
||||||
|
NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor));
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
cudaError = cudaMalloc((void **)&pUFBatchPerImageCompressedCountListDev,
|
||||||
|
NUMBER_OF_IMAGES * sizeof(Npp32u));
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||||
|
|
||||||
|
// Allocate host side scratch buffer point and size list and initialize with
|
||||||
|
// device scratch buffer pointers
|
||||||
|
pUFBatchSrcDstScratchBufferListHost =
|
||||||
|
reinterpret_cast<NppiBufferDescriptor *>(
|
||||||
|
malloc(NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor)));
|
||||||
|
|
||||||
|
pUFBatchPerImageCompressedCountListHost =
|
||||||
|
reinterpret_cast<Npp32u *>(malloc(NUMBER_OF_IMAGES * sizeof(Npp32u)));
|
||||||
|
|
||||||
|
// Start buffer pointer at beginning of full per image buffer list sized
|
||||||
|
// pUFCompressedLabelsScratchBufferDev[0]
|
||||||
|
Npp32u *pCurUFCompressedLabelsScratchBufferDev =
|
||||||
|
reinterpret_cast<Npp32u *>(pUFCompressedLabelsScratchBufferDev[0]);
|
||||||
|
|
||||||
|
int nMaxUFCompressedLabelsScratchBufferSize = 0;
|
||||||
|
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
// This particular function works on in-place data and SrcDst image batch
|
||||||
|
// list has already been initialized in batched label generation function
|
||||||
|
// setup
|
||||||
|
|
||||||
|
// Initialize each per image buffer descriptor
|
||||||
|
pUFBatchSrcDstScratchBufferListHost[nImage].pData =
|
||||||
|
reinterpret_cast<void *>(pCurUFCompressedLabelsScratchBufferDev);
|
||||||
|
pUFBatchSrcDstScratchBufferListHost[nImage].nBufferSize =
|
||||||
|
aCompressLabelsScratchBufferSize[nImage];
|
||||||
|
|
||||||
|
if (aCompressLabelsScratchBufferSize[nImage] >
|
||||||
|
nMaxUFCompressedLabelsScratchBufferSize)
|
||||||
|
nMaxUFCompressedLabelsScratchBufferSize =
|
||||||
|
aCompressLabelsScratchBufferSize[nImage];
|
||||||
|
|
||||||
|
// Offset buffer pointer to next per image buffer
|
||||||
|
Npp8u *pTempBuffer =
|
||||||
|
reinterpret_cast<Npp8u *>(pCurUFCompressedLabelsScratchBufferDev);
|
||||||
|
pTempBuffer += aCompressLabelsScratchBufferSize[nImage];
|
||||||
|
pCurUFCompressedLabelsScratchBufferDev =
|
||||||
|
reinterpret_cast<Npp32u *>((void *)(pTempBuffer));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy compression batch scratch buffer list from CPU to GPU
|
||||||
|
cudaError = cudaMemcpyAsync(pUFBatchSrcDstScratchBufferListDev,
|
||||||
|
pUFBatchSrcDstScratchBufferListHost,
|
||||||
|
NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor),
|
||||||
|
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||||
|
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||||
|
|
||||||
|
nppStatus = nppiCompressMarkerLabelsUFBatch_32u_C1IR_Advanced_Ctx(
|
||||||
|
pUFBatchSrcDstImageListDev, pUFBatchSrcDstScratchBufferListDev,
|
||||||
|
pUFBatchPerImageCompressedCountListDev, NUMBER_OF_IMAGES, oMaxROISize,
|
||||||
|
nMaxUFCompressedLabelsScratchBufferSize, nppStreamCtx);
|
||||||
|
if (nppStatus != NPP_SUCCESS) {
|
||||||
|
printf("BatchCompressedLabelMarkersUF_8Way_32u failed.\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy output compressed label images back to host
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
cudaError = cudaMemcpy2DAsync(
|
||||||
|
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||||
|
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||||
|
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for host image read backs to complete, not necessary if no need to
|
||||||
|
// synchronize
|
||||||
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||||
|
cudaSuccess) {
|
||||||
|
printf("Post label compression cudaStreamSynchronize failed\n");
|
||||||
|
tearDown();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save compressed label images into files
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
if (nImage == 0)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile0.c_str(), "wb");
|
||||||
|
else if (nImage == 1)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile1.c_str(), "wb");
|
||||||
|
else if (nImage == 2)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile2.c_str(), "wb");
|
||||||
|
else if (nImage == 3)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile3.c_str(), "wb");
|
||||||
|
else if (nImage == 4)
|
||||||
|
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile4.c_str(), "wb");
|
||||||
|
|
||||||
|
if (bmpFile == NULL) return -1;
|
||||||
|
size_t nSize = 0;
|
||||||
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||||
|
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||||
|
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||||
|
}
|
||||||
|
fclose(bmpFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read back per image compressed label count.
|
||||||
|
cudaError = cudaMemcpyAsync(pUFBatchPerImageCompressedCountListHost,
|
||||||
|
pUFBatchPerImageCompressedCountListDev,
|
||||||
|
NUMBER_OF_IMAGES * sizeof(Npp32u),
|
||||||
|
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||||
|
if (cudaError != cudaSuccess) {
|
||||||
|
tearDown();
|
||||||
|
return NPP_MEMCPY_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for host read back to complete
|
||||||
|
cudaError = cudaStreamSynchronize(nppStreamCtx.hStream);
|
||||||
|
|
||||||
|
printf("\n\n");
|
||||||
|
|
||||||
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||||
|
if (nImage == 0)
|
||||||
|
printf(
|
||||||
|
"Lena_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||||
|
else if (nImage == 1)
|
||||||
|
printf(
|
||||||
|
"CT_Skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||||
|
else if (nImage == 2)
|
||||||
|
printf(
|
||||||
|
"PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||||
|
else if (nImage == 3)
|
||||||
|
printf(
|
||||||
|
"PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||||
|
else if (nImage == 4)
|
||||||
|
printf(
|
||||||
|
"PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u succeeded, "
|
||||||
|
"compressed label count is %d.\n",
|
||||||
|
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // USE_BATCHED_LABEL_COMPRESSION
|
||||||
|
|
||||||
|
tearDown();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
Microsoft Visual Studio Solution File, Format Version 14.00
|
||||||
# Visual Studio 2013
|
# Visual Studio 2015
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrixMul", "matrixMul_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,15 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||||
<RootNamespace>MersenneTwisterGP11213_vs2012</RootNamespace>
|
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2015</RootNamespace>
|
||||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||||
<CudaToolkitCustomDir />
|
<CudaToolkitCustomDir />
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
<PlatformToolset>v110</PlatformToolset>
|
<PlatformToolset>v140</PlatformToolset>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -57,9 +57,9 @@
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||||
|
@ -97,11 +97,11 @@
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="MersenneTwister.cpp" />
|
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||||
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio 2012
|
# Visual Studio 2017
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrixMul", "matrixMul_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,20 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||||
<RootNamespace>cannyEdgeDetectorNPP_vs2012</RootNamespace>
|
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2017</RootNamespace>
|
||||||
<ProjectName>cannyEdgeDetectorNPP</ProjectName>
|
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||||
<CudaToolkitCustomDir />
|
<CudaToolkitCustomDir />
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||||
|
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||||
|
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||||
|
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||||
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
<PlatformToolset>v110</PlatformToolset>
|
<PlatformToolset>v141</PlatformToolset>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
@ -33,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -53,13 +58,13 @@
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<WarningLevel>Level3</WarningLevel>
|
<WarningLevel>Level3</WarningLevel>
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||||
<OutputFile>$(OutDir)/cannyEdgeDetectorNPP.exe</OutputFile>
|
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||||
|
@ -96,22 +101,12 @@
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
|
||||||
<Link>
|
|
||||||
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<Link>
|
|
||||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="cannyEdgeDetectorNPP.cpp" />
|
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||||
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -1,7 +1,7 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio 2012
|
# Visual Studio 2019
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reduction", "reduction_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,16 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||||
<RootNamespace>MersenneTwisterGP11213_vs2013</RootNamespace>
|
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2019</RootNamespace>
|
||||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||||
<CudaToolkitCustomDir />
|
<CudaToolkitCustomDir />
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
<PlatformToolset>v142</PlatformToolset>
|
||||||
|
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
@ -33,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -57,9 +58,9 @@
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<Link>
|
<Link>
|
||||||
<SubSystem>Console</SubSystem>
|
<SubSystem>Console</SubSystem>
|
||||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||||
|
@ -97,11 +98,11 @@
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="MersenneTwister.cpp" />
|
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||||
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
|
@ -306,7 +306,7 @@ ifeq ($(TARGET_OS),linux)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
SMS ?= 80
|
SMS ?= 80 86
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
<scope>1:CUDA Basic Topics</scope>
|
<scope>1:CUDA Basic Topics</scope>
|
||||||
</scopes>
|
</scopes>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -103,6 +103,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -301,9 +301,9 @@ endif
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Cooperative Groups
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -103,6 +103,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "boxFilterNPP", "boxFilterNPP_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "boxFilterNPP", "boxFilterNPP_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,117 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>boxFilterNPP_vs2013</RootNamespace>
|
|
||||||
<ProjectName>boxFilterNPP</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/boxFilterNPP.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
|
||||||
<Link>
|
|
||||||
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<Link>
|
|
||||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<ClCompile Include="boxFilterNPP.cpp" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -112,6 +112,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -117,6 +117,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -113,6 +113,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -55,6 +55,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cannyEdgeDetectorNPP", "cannyEdgeDetectorNPP_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cannyEdgeDetectorNPP", "cannyEdgeDetectorNPP_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,117 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>cannyEdgeDetectorNPP_vs2013</RootNamespace>
|
|
||||||
<ProjectName>cannyEdgeDetectorNPP</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/cannyEdgeDetectorNPP.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
|
||||||
<Link>
|
|
||||||
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<Link>
|
|
||||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<ClCompile Include="cannyEdgeDetectorNPP.cpp" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -112,6 +112,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -117,6 +117,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -113,6 +113,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
||||||
|
|
|
@ -163,7 +163,7 @@ int main(int argc, char **argv) {
|
||||||
#if defined(__arm__) || defined(__aarch64__)
|
#if defined(__arm__) || defined(__aarch64__)
|
||||||
// the kernel takes more time than the channel reset time on arm archs, so to
|
// the kernel takes more time than the channel reset time on arm archs, so to
|
||||||
// prevent hangs reduce time_clocks.
|
// prevent hangs reduce time_clocks.
|
||||||
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 1000));
|
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 100));
|
||||||
#else
|
#else
|
||||||
clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate);
|
clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -67,7 +67,7 @@
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -107,6 +107,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -63,7 +63,7 @@
|
||||||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -103,6 +103,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
||||||
|
|
||||||
# Gencode arguments
|
# Gencode arguments
|
||||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||||
else
|
else
|
||||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(SMS),)
|
ifeq ($(SMS),)
|
||||||
|
|
|
@ -58,6 +58,7 @@
|
||||||
<sm-arch>sm72</sm-arch>
|
<sm-arch>sm72</sm-arch>
|
||||||
<sm-arch>sm75</sm-arch>
|
<sm-arch>sm75</sm-arch>
|
||||||
<sm-arch>sm80</sm-arch>
|
<sm-arch>sm80</sm-arch>
|
||||||
|
<sm-arch>sm86</sm-arch>
|
||||||
<supported_envs>
|
<supported_envs>
|
||||||
<env>
|
<env>
|
||||||
<arch>x86_64</arch>
|
<arch>x86_64</arch>
|
||||||
|
|
|
@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
|
||||||
|
|
||||||
## Supported SM Architectures
|
## Supported SM Architectures
|
||||||
|
|
||||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
## Supported OSes
|
## Supported OSes
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
## Build and Run
|
## Build and Run
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
|
||||||
# Visual Studio 2012
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "conjugateGradientCudaGraphs", "conjugateGradientCudaGraphs_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,107 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>conjugateGradientCudaGraphs_vs2012</RootNamespace>
|
|
||||||
<ProjectName>conjugateGradientCudaGraphs</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v110</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cublas.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="conjugateGradientCudaGraphs.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -1,20 +0,0 @@
|
||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
|
||||||
# Visual Studio 2013
|
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "conjugateGradientCudaGraphs", "conjugateGradientCudaGraphs_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
|
||||||
EndProject
|
|
||||||
Global
|
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
||||||
Debug|x64 = Debug|x64
|
|
||||||
Release|x64 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
|
||||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
|
||||||
EndGlobalSection
|
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
|
||||||
HideSolutionNode = FALSE
|
|
||||||
EndGlobalSection
|
|
||||||
EndGlobal
|
|
|
@ -1,107 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup>
|
|
||||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
|
||||||
<ProjectConfiguration Include="Debug|x64">
|
|
||||||
<Configuration>Debug</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
<ProjectConfiguration Include="Release|x64">
|
|
||||||
<Configuration>Release</Configuration>
|
|
||||||
<Platform>x64</Platform>
|
|
||||||
</ProjectConfiguration>
|
|
||||||
</ItemGroup>
|
|
||||||
<PropertyGroup Label="Globals">
|
|
||||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
|
||||||
<RootNamespace>conjugateGradientCudaGraphs_vs2013</RootNamespace>
|
|
||||||
<ProjectName>conjugateGradientCudaGraphs</ProjectName>
|
|
||||||
<CudaToolkitCustomDir />
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<ConfigurationType>Application</ConfigurationType>
|
|
||||||
<CharacterSet>MultiByte</CharacterSet>
|
|
||||||
<PlatformToolset>v120</PlatformToolset>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<UseDebugLibraries>true</UseDebugLibraries>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
|
||||||
<ImportGroup Label="ExtensionSettings">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<ImportGroup Label="PropertySheets">
|
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
|
||||||
</ImportGroup>
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
|
||||||
<IncludePath>$(IncludePath)</IncludePath>
|
|
||||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
|
||||||
<CodeAnalysisRules />
|
|
||||||
<CodeAnalysisRuleAssemblies />
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
|
||||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<WarningLevel>Level3</WarningLevel>
|
|
||||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<AdditionalDependencies>cublas.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
|
||||||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
<Include>./;../../Common</Include>
|
|
||||||
<Defines>WIN32</Defines>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>Disabled</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MTd</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
|
||||||
<ClCompile>
|
|
||||||
<Optimization>MaxSpeed</Optimization>
|
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
<CudaCompile>
|
|
||||||
<Runtime>MT</Runtime>
|
|
||||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
|
||||||
</CudaCompile>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CudaCompile Include="conjugateGradientCudaGraphs.cu" />
|
|
||||||
|
|
||||||
</ItemGroup>
|
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
|
||||||
<ImportGroup Label="ExtensionTargets">
|
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
|
||||||
</ImportGroup>
|
|
||||||
</Project>
|
|
|
@ -33,7 +33,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings">
|
<ImportGroup Label="ExtensionSettings">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="PropertySheets">
|
<ImportGroup Label="PropertySheets">
|
||||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||||
|
@ -62,7 +62,7 @@
|
||||||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
||||||
</Link>
|
</Link>
|
||||||
<CudaCompile>
|
<CudaCompile>
|
||||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<Include>./;../../Common</Include>
|
<Include>./;../../Common</Include>
|
||||||
<Defines>WIN32</Defines>
|
<Defines>WIN32</Defines>
|
||||||
|
@ -102,6 +102,6 @@
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user