mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 17:39:16 +08:00
Add and update samples for cuda 11.1 support
This commit is contained in:
parent
e6ce58fef4
commit
cd76533c3f
BIN
Common/data/CT_skull_512x512_8u.raw
Normal file
BIN
Common/data/CT_skull_512x512_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/CT_skull_512x512_8u_Gray.raw
Normal file
BIN
Common/data/CT_skull_512x512_8u_Gray.raw
Normal file
Binary file not shown.
1
Common/data/Lena_512x512_8u_Gray.raw
Normal file
1
Common/data/Lena_512x512_8u_Gray.raw
Normal file
File diff suppressed because one or more lines are too long
BIN
Common/data/PCB2_1024x683_8u.raw
Normal file
BIN
Common/data/PCB2_1024x683_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/PCB_1280x720_8u.raw
Normal file
BIN
Common/data/PCB_1280x720_8u.raw
Normal file
Binary file not shown.
BIN
Common/data/PCB_METAL_509x335_8u.raw
Normal file
BIN
Common/data/PCB_METAL_509x335_8u.raw
Normal file
Binary file not shown.
285
Common/data/Rocks_512x512_8u_Gray.raw
Normal file
285
Common/data/Rocks_512x512_8u_Gray.raw
Normal file
File diff suppressed because one or more lines are too long
BIN
Common/data/lena_512x512_8u.raw
Normal file
BIN
Common/data/lena_512x512_8u.raw
Normal file
Binary file not shown.
|
@ -664,6 +664,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) {
|
|||
{0x72, 64},
|
||||
{0x75, 64},
|
||||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
@ -709,6 +710,7 @@ inline const char* _ConvertSMVer2ArchName(int major, int minor) {
|
|||
{0x72, "Xavier"},
|
||||
{0x75, "Turing"},
|
||||
{0x80, "Ampere"},
|
||||
{0x86, "Ampere"},
|
||||
{-1, "Graphics Device"}};
|
||||
|
||||
int index = 0;
|
||||
|
|
|
@ -33,8 +33,10 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
|
||||
#include <drvapi_error_string.h>
|
||||
#include <helper_string.h>
|
||||
|
||||
#ifndef MAX
|
||||
|
@ -66,48 +68,22 @@ inline int ftoi(float value) {
|
|||
// These are the inline versions for all of the SDK helper functions
|
||||
inline void __checkCudaErrors(CUresult err, const char *file, const int line) {
|
||||
if (CUDA_SUCCESS != err) {
|
||||
const char *errorStr = NULL;
|
||||
cuGetErrorString(err, &errorStr);
|
||||
fprintf(stderr,
|
||||
"checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, "
|
||||
"line %i.\n",
|
||||
err, getCudaDrvErrorString(err), file, line);
|
||||
err, errorStr, file, line);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef getLastCudaDrvErrorMsg
|
||||
#undef getLastCudaDrvErrorMsg
|
||||
#endif
|
||||
|
||||
#define getLastCudaDrvErrorMsg(msg) \
|
||||
__getLastCudaDrvErrorMsg(msg, __FILE__, __LINE__)
|
||||
|
||||
inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file,
|
||||
const int line) {
|
||||
CUresult err = cuCtxSynchronize();
|
||||
|
||||
if (CUDA_SUCCESS != err) {
|
||||
fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
|
||||
fprintf(stderr,
|
||||
"getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d "
|
||||
"\"%s\" in file <%s>, line %i.\n",
|
||||
err, getCudaDrvErrorString(err), file, line);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
// This function wraps the CUDA Driver API into a template function
|
||||
template <class T>
|
||||
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute,
|
||||
int device) {
|
||||
CUresult error_result =
|
||||
cuDeviceGetAttribute(attribute, device_attribute, device);
|
||||
|
||||
if (error_result != CUDA_SUCCESS) {
|
||||
printf("cuDeviceGetAttribute returned %d\n-> %s\n",
|
||||
static_cast<int>(error_result), getCudaDrvErrorString(error_result));
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
checkCudaErrors(cuDeviceGetAttribute(attribute, device_attribute, device));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -135,6 +111,8 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||
{0x70, 64},
|
||||
{0x72, 64},
|
||||
{0x75, 64},
|
||||
{0x80, 64},
|
||||
{0x86, 128},
|
||||
{-1, -1}};
|
||||
|
||||
int index = 0;
|
||||
|
@ -161,11 +139,9 @@ inline int _ConvertSMVer2CoresDRV(int major, int minor) {
|
|||
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
|
||||
int cuDevice = 0;
|
||||
int deviceCount = 0;
|
||||
CUresult err = cuInit(0);
|
||||
checkCudaErrors(cuInit(0));
|
||||
|
||||
if (CUDA_SUCCESS == err) {
|
||||
checkCudaErrors(cuDeviceGetCount(&deviceCount));
|
||||
}
|
||||
checkCudaErrors(cuDeviceGetCount(&deviceCount));
|
||||
|
||||
if (deviceCount == 0) {
|
||||
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
|
||||
|
@ -192,7 +168,7 @@ inline int gpuDeviceInitDRV(int ARGC, const char **ARGV) {
|
|||
|
||||
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
|
||||
char name[100];
|
||||
cuDeviceGetName(name, 100, cuDevice);
|
||||
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
|
||||
|
||||
int computeMode;
|
||||
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
|
||||
|
@ -218,7 +194,6 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
|||
int device_count = 0;
|
||||
int sm_per_multiproc = 0;
|
||||
unsigned long long max_compute_perf = 0;
|
||||
int best_SM_arch = 0;
|
||||
int major = 0;
|
||||
int minor = 0;
|
||||
int multiProcessorCount;
|
||||
|
@ -234,19 +209,6 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
|||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Find the best major SM Architecture GPU device
|
||||
while (current_device < device_count) {
|
||||
checkCudaErrors(cuDeviceGetAttribute(
|
||||
&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, current_device));
|
||||
checkCudaErrors(cuDeviceGetAttribute(
|
||||
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, current_device));
|
||||
if (major > 0 && major < 9999) {
|
||||
best_SM_arch = MAX(best_SM_arch, major);
|
||||
}
|
||||
|
||||
current_device++;
|
||||
}
|
||||
|
||||
// Find the best CUDA capable GPU device
|
||||
current_device = 0;
|
||||
|
||||
|
@ -277,17 +239,8 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
|||
clockRate);
|
||||
|
||||
if (compute_perf > max_compute_perf) {
|
||||
// If we find GPU with SM major > 2, search only these
|
||||
if (best_SM_arch > 2) {
|
||||
// If our device==dest_SM_arch, choose this, or else pass
|
||||
if (major == best_SM_arch) {
|
||||
max_compute_perf = compute_perf;
|
||||
max_perf_device = current_device;
|
||||
}
|
||||
} else {
|
||||
max_compute_perf = compute_perf;
|
||||
max_perf_device = current_device;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
devices_prohibited++;
|
||||
|
@ -414,7 +367,39 @@ inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version,
|
|||
}
|
||||
}
|
||||
#endif
|
||||
bool inline findFatbinPath(const char *module_file, std::string &module_path, char **argv, std::ostringstream &ostrm)
|
||||
{
|
||||
char *actual_path = sdkFindFilePath(module_file, argv[0]);
|
||||
|
||||
// end of CUDA Helper Functions
|
||||
if (actual_path)
|
||||
{
|
||||
module_path = actual_path;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("> findModulePath file not found: <%s> \n", module_file);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (module_path.empty())
|
||||
{
|
||||
printf("> findModulePath could not find file: <%s> \n", module_file);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("> findModulePath found file at <%s>\n", module_path.c_str());
|
||||
if (module_path.rfind("fatbin") != std::string::npos)
|
||||
{
|
||||
std::ifstream fileIn(module_path.c_str(), std::ios::binary);
|
||||
ostrm << fileIn.rdbuf();
|
||||
fileIn.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// end of CUDA Helper Functions
|
||||
|
||||
#endif // COMMON_HELPER_CUDA_DRVAPI_H_
|
||||
|
||||
|
|
|
@ -293,6 +293,9 @@ inline char *sdkFindFilePath(const char *filename,
|
|||
"../../../../Samples/<executable_name>/data/", // up 4 in tree
|
||||
"../../../Samples/<executable_name>/data/", // up 3 in tree
|
||||
"../../Samples/<executable_name>/data/", // up 2 in tree
|
||||
"../../../../Common/data/", // up 4 in tree
|
||||
"../../../Common/data", // up 3 in tree
|
||||
"../../Common/data/" // up 2 in tree
|
||||
};
|
||||
|
||||
// Extract the executable name
|
||||
|
|
|
@ -47,8 +47,8 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
||||
size_t *ptxResultSize, int requiresCGheaders) {
|
||||
void compileFileToCUBIN(char *filename, int argc, char **argv, char **cubinResult,
|
||||
size_t *cubinResultSize, int requiresCGheaders) {
|
||||
std::ifstream inputFile(filename,
|
||||
std::ios::in | std::ios::binary | std::ios::ate);
|
||||
|
||||
|
@ -83,9 +83,9 @@ void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
|||
&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
|
||||
|
||||
{
|
||||
// Compile for the GPU arch on which are going to run cuda kernel.
|
||||
// Compile cubin for the GPU arch on which are going to run cuda kernel.
|
||||
std::string compileOptions;
|
||||
compileOptions = "--gpu-architecture=compute_";
|
||||
compileOptions = "--gpu-architecture=sm_";
|
||||
|
||||
compileParams[numCompileOptions] = reinterpret_cast<char *>(
|
||||
malloc(sizeof(char) * (compileOptions.length() + 10)));
|
||||
|
@ -158,21 +158,20 @@ void compileFileToPTX(char *filename, int argc, char **argv, char **ptxResult,
|
|||
free(log);
|
||||
|
||||
NVRTC_SAFE_CALL("nvrtcCompileProgram", res);
|
||||
// fetch PTX
|
||||
size_t ptxSize;
|
||||
NVRTC_SAFE_CALL("nvrtcGetPTXSize", nvrtcGetPTXSize(prog, &ptxSize));
|
||||
char *ptx = reinterpret_cast<char *>(malloc(sizeof(char) * ptxSize));
|
||||
NVRTC_SAFE_CALL("nvrtcGetPTX", nvrtcGetPTX(prog, ptx));
|
||||
NVRTC_SAFE_CALL("nvrtcDestroyProgram", nvrtcDestroyProgram(&prog));
|
||||
*ptxResult = ptx;
|
||||
*ptxResultSize = ptxSize;
|
||||
|
||||
size_t codeSize;
|
||||
NVRTC_SAFE_CALL("nvrtcGetCUBINSize", nvrtcGetCUBINSize(prog, &codeSize));
|
||||
char *code = new char[codeSize];
|
||||
NVRTC_SAFE_CALL("nvrtcGetCUBIN", nvrtcGetCUBIN(prog, code));
|
||||
*cubinResult = code;
|
||||
*cubinResultSize = codeSize;
|
||||
|
||||
for (int i = 0; i < numCompileOptions; i++) {
|
||||
free(compileParams[i]);
|
||||
}
|
||||
}
|
||||
|
||||
CUmodule loadPTX(char *ptx, int argc, char **argv) {
|
||||
CUmodule loadCUBIN(char *cubin, int argc, char **argv) {
|
||||
CUmodule module;
|
||||
CUcontext context;
|
||||
int major = 0, minor = 0;
|
||||
|
@ -190,11 +189,10 @@ CUmodule loadPTX(char *ptx, int argc, char **argv) {
|
|||
printf("> GPU Device has SM %d.%d compute capability\n", major, minor);
|
||||
|
||||
checkCudaErrors(cuInit(0));
|
||||
checkCudaErrors(cuDeviceGet(&cuDevice, 0));
|
||||
checkCudaErrors(cuCtxCreate(&context, 0, cuDevice));
|
||||
|
||||
checkCudaErrors(cuModuleLoadDataEx(&module, ptx, 0, 0, 0));
|
||||
free(ptx);
|
||||
checkCudaErrors(cuModuleLoadData(&module, cubin));
|
||||
free(cubin);
|
||||
|
||||
return module;
|
||||
}
|
||||
|
|
55
README.md
55
README.md
|
@ -1,11 +1,20 @@
|
|||
# CUDA Samples
|
||||
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads).
|
||||
Samples for CUDA Developers which demonstrates features in CUDA Toolkit. This version supports [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads).
|
||||
|
||||
## Release Notes
|
||||
|
||||
This section describes the release notes for the CUDA Samples on GitHub only.
|
||||
|
||||
### CUDA 11.1
|
||||
* Added `watershedSegmentationNPP`. Demonstrates how to use the NPP watershed segmentation function.
|
||||
* Added `batchedLabelMarkersAndLabelCompressionNPP`. Demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.
|
||||
* Dropped Visual Studio 2012, 2013 support from all the windows supported samples.
|
||||
* Added kernel performing warp aggregated atomic max in multi buckets using cg::labeled_partition & cg::reduce in `warpAggregatedAtomicsCG`.
|
||||
* Added extended CG shuffle mechanics to `shfl_scan` sample.
|
||||
* Added `cudaOpenMP`. Demonstrates how to use OpenMP API to write an application for multiple GPUs.
|
||||
* Added `simpleZeroCopy`. Demonstrates how to use zero copy, kernels can read and write directly to pinned system memory.
|
||||
|
||||
### CUDA 11.0
|
||||
* Added `dmmaTensorCoreGemm`. Demonstrates double precision GEMM computation using the Double precision Warp Matrix Multiply and Accumulate (WMMA) API introduced with CUDA 11 in Ampere chip family tensor cores.
|
||||
* Added `bf16TensorCoreGemm`. Demonstrates __nv_bfloat16 (e8m7) GEMM computation using the __nv_bfloat16 WMMA API introduced with CUDA 11 in Ampere chip family tensor cores.
|
||||
|
@ -84,7 +93,7 @@ This is the first release of CUDA Samples on GitHub:
|
|||
|
||||
### Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
For system requirements and installation instructions of cuda toolkit, please refer to the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/), and the [Windows Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html).
|
||||
|
||||
### Getting the CUDA Samples
|
||||
|
@ -144,31 +153,33 @@ The samples makefiles can take advantage of certain options:
|
|||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
||||
---|---|---|---|
|
||||
**[EGLStream_CUDA_Interop](./Samples/EGLStream_CUDA_Interop)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** |
|
||||
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
|
||||
**[nvJPEG](./Samples/nvJPEG)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
|
||||
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** |
|
||||
**[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** |
|
||||
**[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** |
|
||||
**[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** |
|
||||
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[concurrentKernels](./Samples/concurrentKernels)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
|
||||
**[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
||||
**[vectorAddMMAP](./Samples/vectorAddMMAP)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** |
|
||||
**[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[nvJPEG](./Samples/nvJPEG)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
|
||||
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||
**[simpleAttributes](./Samples/simpleAttributes)** | **[cudaNvSci](./Samples/cudaNvSci)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
|
||||
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** |
|
||||
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||
**[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[matrixMul](./Samples/matrixMul)** | **[systemWideAtomics](./Samples/systemWideAtomics)** |
|
||||
|
||||
#### Windows
|
||||
**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[boxFilterNPP](./Samples/boxFilterNPP)** | **[binaryPartitionCG](./Samples/binaryPartitionCG)** | **[dmmaTensorCoreGemm](./Samples/dmmaTensorCoreGemm)** |
|
||||
---|---|---|---|
|
||||
**[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[simpleIPC](./Samples/simpleIPC)** | **[memMapIPCDrv](./Samples/memMapIPCDrv)** | **[vectorAddMMAP](./Samples/vectorAddMMAP)** |
|
||||
**[shfl_scan](./Samples/shfl_scan)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** | **[nvJPEG](./Samples/nvJPEG)** |
|
||||
**[simpleD3D12](./Samples/simpleD3D12)** | **[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
|
||||
**[simpleCUBLASXT](./Samples/simpleCUBLASXT)** | **[simpleAttributes](./Samples/simpleAttributes)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** |
|
||||
**[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** | **[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** |
|
||||
**[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
|
||||
**[reduction](./Samples/reduction)** | **[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[simpleD3D11](./Samples/simpleD3D11)** |
|
||||
**[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** | **[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** |
|
||||
**[concurrentKernels](./Samples/concurrentKernels)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** |
|
||||
**[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** | **[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** |
|
||||
**[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** | **[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
**[shfl_scan](./Samples/shfl_scan)** | **[simpleZeroCopy](./Samples/simpleZeroCopy)** | **[conjugateGradientCudaGraphs](./Samples/conjugateGradientCudaGraphs)** | **[globalToShmemAsyncCopy](./Samples/globalToShmemAsyncCopy)** |
|
||||
**[nvJPEG](./Samples/nvJPEG)** | **[batchedLabelMarkersAndLabelCompressionNPP](./Samples/batchedLabelMarkersAndLabelCompressionNPP)** | **[simpleD3D12](./Samples/simpleD3D12)** | **[watershedSegmentationNPP](./Samples/watershedSegmentationNPP)** |
|
||||
**[simpleCudaGraphs](./Samples/simpleCudaGraphs)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[simpleCUBLASXT](./Samples/simpleCUBLASXT)** |
|
||||
**[simpleAttributes](./Samples/simpleAttributes)** | **[tf32TensorCoreGemm](./Samples/tf32TensorCoreGemm)** | **[UnifiedMemoryPerf](./Samples/UnifiedMemoryPerf)** | **[cudaCompressibleMemory](./Samples/cudaCompressibleMemory)** |
|
||||
**[bf16TensorCoreGemm](./Samples/bf16TensorCoreGemm)** | **[cuSolverDn_LinearSolver](./Samples/cuSolverDn_LinearSolver)** | **[vulkanImageCUDA](./Samples/vulkanImageCUDA)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
|
||||
**[matrixMulDrv](./Samples/matrixMulDrv)** | **[cuSolverSp_LinearSolver](./Samples/cuSolverSp_LinearSolver)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[reduction](./Samples/reduction)** |
|
||||
**[nvJPEG_encoder](./Samples/nvJPEG_encoder)** | **[simpleDrvRuntime](./Samples/simpleDrvRuntime)** | **[simpleD3D11](./Samples/simpleD3D11)** | **[MersenneTwisterGP11213](./Samples/MersenneTwisterGP11213)** |
|
||||
**[simpleAWBarrier](./Samples/simpleAWBarrier)** | **[immaTensorCoreGemm](./Samples/immaTensorCoreGemm)** | **[bandwidthTest](./Samples/bandwidthTest)** | **[concurrentKernels](./Samples/concurrentKernels)** |
|
||||
**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[NV12toBGRandResize](./Samples/NV12toBGRandResize)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[jacobiCudaGraphs](./Samples/jacobiCudaGraphs)** |
|
||||
**[simpleVulkan](./Samples/simpleVulkan)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[cannyEdgeDetectorNPP](./Samples/cannyEdgeDetectorNPP)** | **[p2pBandwidthLatencyTest](./Samples/p2pBandwidthLatencyTest)** |
|
||||
**[simpleVulkanMMAP](./Samples/simpleVulkanMMAP)** | **[cudaOpenMP](./Samples/cudaOpenMP)** | **[matrixMul](./Samples/matrixMul)** |
|
||||
|
||||
## Dependencies
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ EGLStreams Interop
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cuDeviceGet, cuDeviceGetAttribute, cuDeviceComputeCapability, cuDeviceGetCount,
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CURAND Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -274,9 +274,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NV12toBGRandResize", "NV12toBGRandResize_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,112 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>NV12toBGRandResize_vs2013</RootNamespace>
|
||||
<ProjectName>NV12toBGRandResize</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="bgr_resize.cu" />
|
||||
<CudaCompile Include="nv12_resize.cu" />
|
||||
<CudaCompile Include="nv12_to_bgr_planar.cu" />
|
||||
<ClCompile Include="resize_convert_main.cpp" />
|
||||
<CudaCompile Include="utils.cu" />
|
||||
<ClInclude Include="resize_convert.h" />
|
||||
<ClInclude Include="utils.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/NV12toBGRandResize.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -108,6 +108,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Graphics Interop, Image Processing, Video Processing
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMemcpy2D, cudaMallocManaged
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -274,9 +274,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -28,7 +28,7 @@ cudaMallocManaged, cudaStreamAttachMemAsync, cudaMemcpyAsync, cudaMallocHost, cu
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryPerf", "UnifiedMemoryPerf_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UnifiedMemoryPerf", "UnifiedMemoryPerf_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,110 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>UnifiedMemoryPerf_vs2013</RootNamespace>
|
||||
<ProjectName>UnifiedMemoryPerf</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="commonKernels.cu" />
|
||||
<ClCompile Include="helperFunctions.cpp" />
|
||||
<CudaCompile Include="matrixMultiplyPerf.cu" />
|
||||
<ClInclude Include="commonDefs.hpp" />
|
||||
<ClInclude Include="commonKernels.hpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -105,6 +105,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -110,6 +110,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/UnifiedMemoryPerf.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -106,6 +106,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
#define VERIFY_GPU_CORRECTNESS 0
|
||||
|
||||
size_t maxSampleSizeInMb = 64;
|
||||
int numKernelRuns = 100;
|
||||
int numKernelRuns = 20;
|
||||
int verboseResults = 0;
|
||||
|
||||
const char *memAllocTypeStr[MEMALLOC_TYPE_COUNT] = {
|
||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ CUDA Streams and Events, Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaSetDevice, cudaHostAlloc, cudaFree, cudaMallocHost, cudaFreeHost, cudaMemcpy
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidthTest", "bandwidthTest_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bandwidthTest", "bandwidthTest_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/bandwidthTest.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
329
Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
Normal file
329
Samples/batchedLabelMarkersAndLabelCompressionNPP/Makefile
Normal file
|
@ -0,0 +1,329 @@
|
|||
################################################################################
|
||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -rpath=/usr/lib/aarch64-qnx-gnu -L/usr/lib/aarch64-qnx-gnu
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -rpath=$(TARGET_FS)/usr/lib -L $(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath=$(TARGET_FS)/usr/libnvidia -L $(TARGET_FS)/usr/libnvidia
|
||||
endif
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Gencode arguments
|
||||
SMS ?=
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
ifeq ($(SMS),)
|
||||
# Generate PTX code from SM 35
|
||||
GENCODE_FLAGS += -gencode arch=compute_35,code=compute_35
|
||||
endif
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: batchedLabelMarkersAndLabelCompressionNPP
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
batchedLabelMarkersAndLabelCompressionNPP.o:batchedLabelMarkersAndLabelCompressionNPP.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
batchedLabelMarkersAndLabelCompressionNPP: batchedLabelMarkersAndLabelCompressionNPP.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./batchedLabelMarkersAndLabelCompressionNPP
|
||||
|
||||
clean:
|
||||
rm -f batchedLabelMarkersAndLabelCompressionNPP batchedLabelMarkersAndLabelCompressionNPP.o
|
||||
rm -rf ../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchedLabelMarkersAndLabelCompressionNPP
|
||||
|
||||
clobber: clean
|
|
@ -0,0 +1,76 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>batchedLabelMarkersAndLabelCompressionNPP</name>
|
||||
<description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both
|
||||
single image and batched image versions.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<fallback_min_ptx>true</fallback_min_ptx>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../common/inc</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">Performance Strategies</concept>
|
||||
<concept level="basic">Image Processing</concept>
|
||||
<concept level="basic">NPP Library</concept>
|
||||
<concept level="basic">Using NPP Batch Functions</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>NPP</keyword>
|
||||
<keyword>Image Processing</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library>nppisu_static</library>
|
||||
<library>nppif_static</library>
|
||||
<library>nppc_static</library>
|
||||
<library>culibos</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>batchedLabelMarkersAndLabelCompressionNPP.cpp</primary_file>
|
||||
<required_dependencies>
|
||||
<dependency>NPP</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Basic Topics</scope>
|
||||
<scope>1:Performance Strategies</scope>
|
||||
<scope>2:Image Processing</scope>
|
||||
<scope>2:Computer Vision</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm35</sm-arch>
|
||||
<sm-arch>sm37</sm-arch>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>Batched Label Markers And Label Compression NPP</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
72
Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
Normal file
72
Samples/batchedLabelMarkersAndLabelCompressionNPP/README.md
Normal file
|
@ -0,0 +1,72 @@
|
|||
# batchedLabelMarkersAndLabelCompressionNPP - Batched Label Markers And Label Compression NPP
|
||||
|
||||
## Description
|
||||
|
||||
An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both
|
||||
single image and batched image versions.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux, Windows
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
x86_64, ppc64le, armv7l
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[NPP](../../README.md#npp)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Windows
|
||||
The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
|
||||
```
|
||||
*_vs<version>.sln - for Visual Studio <version>
|
||||
```
|
||||
Each individual sample has its own set of solution files in its directory:
|
||||
|
||||
To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
|
||||
> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
|
@ -0,0 +1,798 @@
|
|||
/* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
||||
#define WINDOWS_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#pragma warning(disable : 4819)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fstream>
|
||||
|
||||
#include <helper_cuda.h>
|
||||
#include <npp.h>
|
||||
|
||||
// Note: If you want to view these images we HIGHLY recommend using imagej
|
||||
// which is free on the internet and works on most platforms
|
||||
// because it is one of the few image viewing apps that can display 32
|
||||
// bit integer image data. While it normalizes the data to floating
|
||||
// point values for viewing it still provides a good representation of
|
||||
// the relative brightness of each label value. Note that label
|
||||
// compression output results in smaller differences between label values
|
||||
// making it visually more difficult to detect differences in labeled
|
||||
// regions. If you have an editor that can display hex values you can
|
||||
// see what the exact values of each label is, every 4 bytes represents 1
|
||||
// 32 bit integer label value.
|
||||
//
|
||||
// The files read and written by this sample app use RAW image format,
|
||||
// that is, only the image data itself exists in the files with no image
|
||||
// format information. When viewing RAW files with imagej just enter
|
||||
// the image size and bit depth values that are part of the file name
|
||||
// when requested by imagej.
|
||||
//
|
||||
// This sample app works in 2 stages, first it processes all of the
|
||||
// images individually then it processes them all again in 1 batch using
|
||||
// the Batch_Advanced versions of the NPP batch functions which allow
|
||||
// each image to have it's own ROI. The 2 stages are completely
|
||||
// separable but in this sample the second stage takes advantage of some
|
||||
// of the data that has already been initialized.
|
||||
//
|
||||
// Note that there is a small amount of variability in the number of
|
||||
// unique label markers generated from one run to the next by the UF
|
||||
// algorithm.
|
||||
//
|
||||
// Performance of ALL NPP image batch functions is limited by the maximum
|
||||
// ROI height in the list of images.
|
||||
|
||||
// Batched label compression support is only available on NPP versions > 11.0,
|
||||
// comment out if using NPP 11.0
|
||||
#define USE_BATCHED_LABEL_COMPRESSION 1
|
||||
|
||||
#define NUMBER_OF_IMAGES 5
|
||||
|
||||
Npp8u *pInputImageDev[NUMBER_OF_IMAGES];
|
||||
Npp8u *pInputImageHost[NUMBER_OF_IMAGES];
|
||||
Npp8u *pUFGenerateLabelsScratchBufferDev[NUMBER_OF_IMAGES];
|
||||
Npp8u *pUFCompressedLabelsScratchBufferDev[NUMBER_OF_IMAGES];
|
||||
Npp32u *pUFLabelDev[NUMBER_OF_IMAGES];
|
||||
Npp32u *pUFLabelHost[NUMBER_OF_IMAGES];
|
||||
NppiImageDescriptor *pUFBatchSrcImageListDev = 0;
|
||||
NppiImageDescriptor *pUFBatchSrcDstImageListDev = 0;
|
||||
NppiImageDescriptor *pUFBatchSrcImageListHost = 0;
|
||||
NppiImageDescriptor *pUFBatchSrcDstImageListHost = 0;
|
||||
NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListDev =
|
||||
0; // from nppi_filtering_functions.h
|
||||
NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListHost = 0;
|
||||
Npp32u *pUFBatchPerImageCompressedCountListDev = 0;
|
||||
Npp32u *pUFBatchPerImageCompressedCountListHost = 0;
|
||||
|
||||
void tearDown() // Clean up and tear down
|
||||
{
|
||||
if (pUFBatchPerImageCompressedCountListDev != 0)
|
||||
cudaFree(pUFBatchPerImageCompressedCountListDev);
|
||||
if (pUFBatchSrcDstScratchBufferListDev != 0)
|
||||
cudaFree(pUFBatchSrcDstScratchBufferListDev);
|
||||
if (pUFBatchSrcDstImageListDev != 0) cudaFree(pUFBatchSrcDstImageListDev);
|
||||
if (pUFBatchSrcImageListDev != 0) cudaFree(pUFBatchSrcImageListDev);
|
||||
if (pUFBatchPerImageCompressedCountListHost != 0)
|
||||
free(pUFBatchPerImageCompressedCountListHost);
|
||||
if (pUFBatchSrcDstScratchBufferListHost != 0)
|
||||
free(pUFBatchSrcDstScratchBufferListHost);
|
||||
if (pUFBatchSrcDstImageListHost != 0) free(pUFBatchSrcDstImageListHost);
|
||||
if (pUFBatchSrcImageListHost != 0) free(pUFBatchSrcImageListHost);
|
||||
|
||||
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
||||
if (pUFCompressedLabelsScratchBufferDev[j] != 0)
|
||||
cudaFree(pUFCompressedLabelsScratchBufferDev[j]);
|
||||
if (pUFGenerateLabelsScratchBufferDev[j] != 0)
|
||||
cudaFree(pUFGenerateLabelsScratchBufferDev[j]);
|
||||
if (pUFLabelDev[j] != 0) cudaFree(pUFLabelDev[j]);
|
||||
if (pInputImageDev[j] != 0) cudaFree(pInputImageDev[j]);
|
||||
if (pUFLabelHost[j] != 0) free(pUFLabelHost[j]);
|
||||
if (pInputImageHost[j] != 0) free(pInputImageHost[j]);
|
||||
}
|
||||
}
|
||||
|
||||
const std::string &LabelMarkersOutputFile0 =
|
||||
"Lena_LabelMarkersUF_8Way_512x512_32u.raw";
|
||||
const std::string &LabelMarkersOutputFile1 =
|
||||
"CT_skull_LabelMarkersUF_8Way_512x512_32u.raw";
|
||||
const std::string &LabelMarkersOutputFile2 =
|
||||
"PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw";
|
||||
const std::string &LabelMarkersOutputFile3 =
|
||||
"PCB2_LabelMarkersUF_8Way_1024x683_32u.raw";
|
||||
const std::string &LabelMarkersOutputFile4 =
|
||||
"PCB_LabelMarkersUF_8Way_1280x720_32u.raw";
|
||||
|
||||
const std::string &CompressedMarkerLabelsOutputFile0 =
|
||||
"Lena_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsOutputFile1 =
|
||||
"CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsOutputFile2 =
|
||||
"PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsOutputFile3 =
|
||||
"PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsOutputFile4 =
|
||||
"PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw";
|
||||
|
||||
const std::string &LabelMarkersBatchOutputFile0 =
|
||||
"Lena_LabelMarkersUFBatch_8Way_512x512_32u.raw";
|
||||
const std::string &LabelMarkersBatchOutputFile1 =
|
||||
"CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw";
|
||||
const std::string &LabelMarkersBatchOutputFile2 =
|
||||
"PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw";
|
||||
const std::string &LabelMarkersBatchOutputFile3 =
|
||||
"PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw";
|
||||
const std::string &LabelMarkersBatchOutputFile4 =
|
||||
"PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw";
|
||||
|
||||
const std::string &CompressedMarkerLabelsBatchOutputFile0 =
|
||||
"Lena_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsBatchOutputFile1 =
|
||||
"CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsBatchOutputFile2 =
|
||||
"PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsBatchOutputFile3 =
|
||||
"PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw";
|
||||
const std::string &CompressedMarkerLabelsBatchOutputFile4 =
|
||||
"PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw";
|
||||
|
||||
int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage) {
|
||||
FILE *bmpFile;
|
||||
size_t nSize;
|
||||
|
||||
if (nImage == 0) {
|
||||
if (nWidth != 512 || nHeight != 512) return -1;
|
||||
const char *fileName = "lena_512x512_8u.raw";
|
||||
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||
if (InputFile == NULL) {
|
||||
printf("%s file not found.. exiting\n", fileName);
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
bmpFile = fopen(InputFile, "rb");
|
||||
} else if (nImage == 1) {
|
||||
if (nWidth != 512 || nHeight != 512) return -1;
|
||||
const char *fileName = "CT_skull_512x512_8u.raw";
|
||||
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||
if (InputFile == NULL) {
|
||||
printf("%s file not found.. exiting\n", fileName);
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
bmpFile = fopen(InputFile, "rb");
|
||||
} else if (nImage == 2) {
|
||||
if (nWidth != 509 || nHeight != 335) return -1;
|
||||
const char *fileName = "PCB_METAL_509x335_8u.raw";
|
||||
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||
if (InputFile == NULL) {
|
||||
printf("%s file not found.. exiting\n", fileName);
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
bmpFile = fopen(InputFile, "rb");
|
||||
} else if (nImage == 3) {
|
||||
if (nWidth != 1024 || nHeight != 683) return -1;
|
||||
const char *fileName = "PCB2_1024x683_8u.raw";
|
||||
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||
if (InputFile == NULL) {
|
||||
printf("%s file not found.. exiting\n", fileName);
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
bmpFile = fopen(InputFile, "rb");
|
||||
} else if (nImage == 4) {
|
||||
if (nWidth != 1280 || nHeight != 720) return -1;
|
||||
const char *fileName = "PCB_1280x720_8u.raw";
|
||||
const char *InputFile = sdkFindFilePath(fileName, ".");
|
||||
if (InputFile == NULL) {
|
||||
printf("%s file not found.. exiting\n", fileName);
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
bmpFile = fopen(InputFile, "rb");
|
||||
} else {
|
||||
printf("Input file load failed.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (bmpFile == NULL) return -1;
|
||||
nSize = fread(pImage, 1, nWidth * nHeight, bmpFile);
|
||||
if (nSize < nWidth * nHeight) {
|
||||
fclose(bmpFile);
|
||||
return -1;
|
||||
}
|
||||
fclose(bmpFile);
|
||||
|
||||
printf("Input file load succeeded.\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int aGenerateLabelsScratchBufferSize[NUMBER_OF_IMAGES];
|
||||
int aCompressLabelsScratchBufferSize[NUMBER_OF_IMAGES];
|
||||
|
||||
int nCompressedLabelCount = 0;
|
||||
cudaError_t cudaError;
|
||||
NppStatus nppStatus;
|
||||
NppStreamContext nppStreamCtx;
|
||||
FILE *bmpFile;
|
||||
|
||||
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
||||
pInputImageDev[j] = 0;
|
||||
pInputImageHost[j] = 0;
|
||||
pUFGenerateLabelsScratchBufferDev[j] = 0;
|
||||
pUFCompressedLabelsScratchBufferDev[j] = 0;
|
||||
pUFLabelDev[j] = 0;
|
||||
pUFLabelHost[j] = 0;
|
||||
}
|
||||
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever
|
||||
// your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess) {
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(
|
||||
&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor, nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(
|
||||
&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor, nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError =
|
||||
cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError =
|
||||
cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor =
|
||||
oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
NppiSize oSizeROI[NUMBER_OF_IMAGES];
|
||||
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
if (nImage == 0) {
|
||||
oSizeROI[nImage].width = 512;
|
||||
oSizeROI[nImage].height = 512;
|
||||
} else if (nImage == 1) {
|
||||
oSizeROI[nImage].width = 512;
|
||||
oSizeROI[nImage].height = 512;
|
||||
} else if (nImage == 2) {
|
||||
oSizeROI[nImage].width = 509;
|
||||
oSizeROI[nImage].height = 335;
|
||||
} else if (nImage == 3) {
|
||||
oSizeROI[nImage].width = 1024;
|
||||
oSizeROI[nImage].height = 683;
|
||||
} else if (nImage == 4) {
|
||||
oSizeROI[nImage].width = 1280;
|
||||
oSizeROI[nImage].height = 720;
|
||||
}
|
||||
|
||||
// NOTE: While using cudaMallocPitch() to allocate device memory for NPP can
|
||||
// significantly improve the performance of many NPP functions, for UF
|
||||
// function label markers generation or compression DO NOT USE
|
||||
// cudaMallocPitch(). Doing so could result in incorrect output.
|
||||
|
||||
cudaError = cudaMalloc(
|
||||
(void **)&pInputImageDev[nImage],
|
||||
oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
// For images processed with UF label markers functions ROI width and height
|
||||
// for label markers generation output AND marker compression functions MUST
|
||||
// be the same AND line pitch MUST be equal to ROI.width * sizeof(Npp32u).
|
||||
// Also the image pointer used for label markers generation output must
|
||||
// start at the same position in the image as it does in the marker
|
||||
// compression function. Also note that actual input image size and ROI do
|
||||
// not necessarily need to be related other than ROI being less than or
|
||||
// equal to image size and image starting position does not necessarily have
|
||||
// to be at pixel 0 in the input image.
|
||||
|
||||
cudaError = cudaMalloc(
|
||||
(void **)&pUFLabelDev[nImage],
|
||||
oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
pInputImageHost[nImage] = reinterpret_cast<Npp8u *>(malloc(
|
||||
oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height));
|
||||
pUFLabelHost[nImage] = reinterpret_cast<Npp32u *>(malloc(
|
||||
oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
|
||||
|
||||
// Use UF functions throughout this sample.
|
||||
|
||||
nppStatus = nppiLabelMarkersUFGetBufferSize_32u_C1R(
|
||||
oSizeROI[nImage], &aGenerateLabelsScratchBufferSize[nImage]);
|
||||
|
||||
// One at a time image processing
|
||||
|
||||
cudaError = cudaMalloc((void **)&pUFGenerateLabelsScratchBufferDev[nImage],
|
||||
aGenerateLabelsScratchBufferSize[nImage]);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
if (loadRaw8BitImage(pInputImageHost[nImage],
|
||||
oSizeROI[nImage].width * sizeof(Npp8u),
|
||||
oSizeROI[nImage].height, nImage) == 0) {
|
||||
cudaError = cudaMemcpy2DAsync(
|
||||
pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||
pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||
oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height,
|
||||
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||
|
||||
nppStatus = nppiLabelMarkersUF_8u32u_C1R_Ctx(
|
||||
pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage], nppiNormInf,
|
||||
pUFGenerateLabelsScratchBufferDev[nImage], nppStreamCtx);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS) {
|
||||
if (nImage == 0)
|
||||
printf("Lena_LabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||
else if (nImage == 1)
|
||||
printf("CT_skull_LabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||
else if (nImage == 2)
|
||||
printf("PCB_METAL_LabelMarkersUF_8Way_509x335_32u failed.\n");
|
||||
else if (nImage == 3)
|
||||
printf("PCB2_LabelMarkersUF_8Way_1024x683_32u failed.\n");
|
||||
else if (nImage == 4)
|
||||
printf("PCB_LabelMarkersUF_8Way_1280x720_32u failed.\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
cudaError = cudaMemcpy2DAsync(
|
||||
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||
|
||||
// Wait host image read backs to complete, not necessary if no need to
|
||||
// synchronize
|
||||
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||
cudaSuccess) {
|
||||
printf("Post label generation cudaStreamSynchronize failed\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nImage == 0)
|
||||
bmpFile = fopen(LabelMarkersOutputFile0.c_str(), "wb");
|
||||
else if (nImage == 1)
|
||||
bmpFile = fopen(LabelMarkersOutputFile1.c_str(), "wb");
|
||||
else if (nImage == 2)
|
||||
bmpFile = fopen(LabelMarkersOutputFile2.c_str(), "wb");
|
||||
else if (nImage == 3)
|
||||
bmpFile = fopen(LabelMarkersOutputFile3.c_str(), "wb");
|
||||
else if (nImage == 4)
|
||||
bmpFile = fopen(LabelMarkersOutputFile4.c_str(), "wb");
|
||||
|
||||
if (bmpFile == NULL) return -1;
|
||||
size_t nSize = 0;
|
||||
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||
}
|
||||
fclose(bmpFile);
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height,
|
||||
&aCompressLabelsScratchBufferSize[nImage]);
|
||||
if (nppStatus != NPP_NO_ERROR) return nppStatus;
|
||||
|
||||
cudaError =
|
||||
cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[nImage],
|
||||
aCompressLabelsScratchBufferSize[nImage]);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
nCompressedLabelCount = 0;
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage], oSizeROI[nImage].width * oSizeROI[nImage].height,
|
||||
&nCompressedLabelCount, pUFCompressedLabelsScratchBufferDev[nImage]);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS) {
|
||||
if (nImage == 0)
|
||||
printf("Lena_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||
else if (nImage == 1)
|
||||
printf(
|
||||
"CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
||||
else if (nImage == 2)
|
||||
printf(
|
||||
"PCB_METAL_CompressedLabelMarkersUF_8Way_509x335_32u failed.\n");
|
||||
else if (nImage == 3)
|
||||
printf("PCB2_CompressedLabelMarkersUF_8Way_1024x683_32u failed.\n");
|
||||
else if (nImage == 4)
|
||||
printf("PCB_CompressedLabelMarkersUF_8Way_1280x720_32u failed.\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
cudaError = cudaMemcpy2DAsync(
|
||||
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||
|
||||
// Wait for host image read backs to finish, not necessary if no need to
|
||||
// synchronize
|
||||
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||
cudaSuccess ||
|
||||
nCompressedLabelCount == 0) {
|
||||
printf("Post label compression cudaStreamSynchronize failed\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nImage == 0)
|
||||
bmpFile = fopen(CompressedMarkerLabelsOutputFile0.c_str(), "wb");
|
||||
else if (nImage == 1)
|
||||
bmpFile = fopen(CompressedMarkerLabelsOutputFile1.c_str(), "wb");
|
||||
else if (nImage == 2)
|
||||
bmpFile = fopen(CompressedMarkerLabelsOutputFile2.c_str(), "wb");
|
||||
else if (nImage == 3)
|
||||
bmpFile = fopen(CompressedMarkerLabelsOutputFile3.c_str(), "wb");
|
||||
else if (nImage == 4)
|
||||
bmpFile = fopen(CompressedMarkerLabelsOutputFile4.c_str(), "wb");
|
||||
|
||||
if (bmpFile == NULL) return -1;
|
||||
nSize = 0;
|
||||
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||
}
|
||||
fclose(bmpFile);
|
||||
|
||||
if (nImage == 0)
|
||||
printf(
|
||||
"Lena_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
nCompressedLabelCount);
|
||||
else if (nImage == 1)
|
||||
printf(
|
||||
"CT_Skull_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
nCompressedLabelCount);
|
||||
else if (nImage == 2)
|
||||
printf(
|
||||
"PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
nCompressedLabelCount);
|
||||
else if (nImage == 3)
|
||||
printf(
|
||||
"PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
nCompressedLabelCount);
|
||||
else if (nImage == 4)
|
||||
printf(
|
||||
"PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
nCompressedLabelCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Batch image processing
|
||||
|
||||
// We want to allocate scratch buffers more efficiently for batch processing
|
||||
// so first we free up the scratch buffers for image 0 and reallocate them.
|
||||
// This is not required but helps cudaMalloc to work more efficiently.
|
||||
|
||||
cudaFree(pUFCompressedLabelsScratchBufferDev[0]);
|
||||
|
||||
int nTotalBatchedUFCompressLabelsScratchBufferDevSize = 0;
|
||||
|
||||
for (int k = 0; k < NUMBER_OF_IMAGES; k++)
|
||||
nTotalBatchedUFCompressLabelsScratchBufferDevSize +=
|
||||
aCompressLabelsScratchBufferSize[k];
|
||||
|
||||
cudaError = cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[0],
|
||||
nTotalBatchedUFCompressLabelsScratchBufferDevSize);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
// Now allocate batch lists
|
||||
|
||||
int nBatchImageListBytes = NUMBER_OF_IMAGES * sizeof(NppiImageDescriptor);
|
||||
|
||||
cudaError =
|
||||
cudaMalloc((void **)&pUFBatchSrcImageListDev, nBatchImageListBytes);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
cudaError =
|
||||
cudaMalloc((void **)&pUFBatchSrcDstImageListDev, nBatchImageListBytes);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
pUFBatchSrcImageListHost =
|
||||
reinterpret_cast<NppiImageDescriptor *>(malloc(nBatchImageListBytes));
|
||||
pUFBatchSrcDstImageListHost =
|
||||
reinterpret_cast<NppiImageDescriptor *>(malloc(nBatchImageListBytes));
|
||||
|
||||
NppiSize oMaxROISize = {0, 0};
|
||||
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
pUFBatchSrcImageListHost[nImage].pData = pInputImageDev[nImage];
|
||||
pUFBatchSrcImageListHost[nImage].nStep =
|
||||
oSizeROI[nImage].width * sizeof(Npp8u);
|
||||
// src image oSize parameter is ignored in these NPP functions
|
||||
pUFBatchSrcDstImageListHost[nImage].pData = pUFLabelDev[nImage];
|
||||
pUFBatchSrcDstImageListHost[nImage].nStep =
|
||||
oSizeROI[nImage].width * sizeof(Npp32u);
|
||||
pUFBatchSrcDstImageListHost[nImage].oSize = oSizeROI[nImage];
|
||||
if (oSizeROI[nImage].width > oMaxROISize.width)
|
||||
oMaxROISize.width = oSizeROI[nImage].width;
|
||||
if (oSizeROI[nImage].height > oMaxROISize.height)
|
||||
oMaxROISize.height = oSizeROI[nImage].height;
|
||||
}
|
||||
|
||||
// Copy label generation batch lists from CPU to GPU
|
||||
cudaError = cudaMemcpyAsync(pUFBatchSrcImageListDev, pUFBatchSrcImageListHost,
|
||||
nBatchImageListBytes, cudaMemcpyHostToDevice,
|
||||
nppStreamCtx.hStream);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||
|
||||
cudaError = cudaMemcpyAsync(pUFBatchSrcDstImageListDev,
|
||||
pUFBatchSrcDstImageListHost, nBatchImageListBytes,
|
||||
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||
|
||||
// We use 8-way neighbor search throughout this example
|
||||
nppStatus = nppiLabelMarkersUFBatch_8u32u_C1R_Advanced_Ctx(
|
||||
pUFBatchSrcImageListDev, pUFBatchSrcDstImageListDev, NUMBER_OF_IMAGES,
|
||||
oMaxROISize, nppiNormInf, nppStreamCtx);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS) {
|
||||
printf("LabelMarkersUFBatch_8Way_8u32u failed.\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Now read back generated device images to the host
|
||||
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
cudaError = cudaMemcpy2DAsync(
|
||||
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||
}
|
||||
|
||||
// Wait for host image read backs to complete, not necessary if no need to
|
||||
// synchronize
|
||||
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||
cudaSuccess) {
|
||||
printf("Post label generation cudaStreamSynchronize failed\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Save output to files
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
if (nImage == 0)
|
||||
bmpFile = fopen(LabelMarkersBatchOutputFile0.c_str(), "wb");
|
||||
else if (nImage == 1)
|
||||
bmpFile = fopen(LabelMarkersBatchOutputFile1.c_str(), "wb");
|
||||
else if (nImage == 2)
|
||||
bmpFile = fopen(LabelMarkersBatchOutputFile2.c_str(), "wb");
|
||||
else if (nImage == 3)
|
||||
bmpFile = fopen(LabelMarkersBatchOutputFile3.c_str(), "wb");
|
||||
else if (nImage == 4)
|
||||
bmpFile = fopen(LabelMarkersBatchOutputFile4.c_str(), "wb");
|
||||
|
||||
if (bmpFile == NULL) return -1;
|
||||
size_t nSize = 0;
|
||||
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||
}
|
||||
fclose(bmpFile);
|
||||
}
|
||||
|
||||
#ifdef USE_BATCHED_LABEL_COMPRESSION
|
||||
|
||||
// Now allocate scratch buffer memory for batched label compression
|
||||
cudaError = cudaMalloc((void **)&pUFBatchSrcDstScratchBufferListDev,
|
||||
NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor));
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
cudaError = cudaMalloc((void **)&pUFBatchPerImageCompressedCountListDev,
|
||||
NUMBER_OF_IMAGES * sizeof(Npp32u));
|
||||
if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
|
||||
|
||||
// Allocate host side scratch buffer point and size list and initialize with
|
||||
// device scratch buffer pointers
|
||||
pUFBatchSrcDstScratchBufferListHost =
|
||||
reinterpret_cast<NppiBufferDescriptor *>(
|
||||
malloc(NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor)));
|
||||
|
||||
pUFBatchPerImageCompressedCountListHost =
|
||||
reinterpret_cast<Npp32u *>(malloc(NUMBER_OF_IMAGES * sizeof(Npp32u)));
|
||||
|
||||
// Start buffer pointer at beginning of full per image buffer list sized
|
||||
// pUFCompressedLabelsScratchBufferDev[0]
|
||||
Npp32u *pCurUFCompressedLabelsScratchBufferDev =
|
||||
reinterpret_cast<Npp32u *>(pUFCompressedLabelsScratchBufferDev[0]);
|
||||
|
||||
int nMaxUFCompressedLabelsScratchBufferSize = 0;
|
||||
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
// This particular function works on in-place data and SrcDst image batch
|
||||
// list has already been initialized in batched label generation function
|
||||
// setup
|
||||
|
||||
// Initialize each per image buffer descriptor
|
||||
pUFBatchSrcDstScratchBufferListHost[nImage].pData =
|
||||
reinterpret_cast<void *>(pCurUFCompressedLabelsScratchBufferDev);
|
||||
pUFBatchSrcDstScratchBufferListHost[nImage].nBufferSize =
|
||||
aCompressLabelsScratchBufferSize[nImage];
|
||||
|
||||
if (aCompressLabelsScratchBufferSize[nImage] >
|
||||
nMaxUFCompressedLabelsScratchBufferSize)
|
||||
nMaxUFCompressedLabelsScratchBufferSize =
|
||||
aCompressLabelsScratchBufferSize[nImage];
|
||||
|
||||
// Offset buffer pointer to next per image buffer
|
||||
Npp8u *pTempBuffer =
|
||||
reinterpret_cast<Npp8u *>(pCurUFCompressedLabelsScratchBufferDev);
|
||||
pTempBuffer += aCompressLabelsScratchBufferSize[nImage];
|
||||
pCurUFCompressedLabelsScratchBufferDev =
|
||||
reinterpret_cast<Npp32u *>((void *)(pTempBuffer));
|
||||
}
|
||||
|
||||
// Copy compression batch scratch buffer list from CPU to GPU
|
||||
cudaError = cudaMemcpyAsync(pUFBatchSrcDstScratchBufferListDev,
|
||||
pUFBatchSrcDstScratchBufferListHost,
|
||||
NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor),
|
||||
cudaMemcpyHostToDevice, nppStreamCtx.hStream);
|
||||
if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsUFBatch_32u_C1IR_Advanced_Ctx(
|
||||
pUFBatchSrcDstImageListDev, pUFBatchSrcDstScratchBufferListDev,
|
||||
pUFBatchPerImageCompressedCountListDev, NUMBER_OF_IMAGES, oMaxROISize,
|
||||
nMaxUFCompressedLabelsScratchBufferSize, nppStreamCtx);
|
||||
if (nppStatus != NPP_SUCCESS) {
|
||||
printf("BatchCompressedLabelMarkersUF_8Way_32u failed.\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Copy output compressed label images back to host
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
cudaError = cudaMemcpy2DAsync(
|
||||
pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
|
||||
oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
|
||||
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||
}
|
||||
|
||||
// Wait for host image read backs to complete, not necessary if no need to
|
||||
// synchronize
|
||||
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
|
||||
cudaSuccess) {
|
||||
printf("Post label compression cudaStreamSynchronize failed\n");
|
||||
tearDown();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Save compressed label images into files
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
if (nImage == 0)
|
||||
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile0.c_str(), "wb");
|
||||
else if (nImage == 1)
|
||||
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile1.c_str(), "wb");
|
||||
else if (nImage == 2)
|
||||
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile2.c_str(), "wb");
|
||||
else if (nImage == 3)
|
||||
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile3.c_str(), "wb");
|
||||
else if (nImage == 4)
|
||||
bmpFile = fopen(CompressedMarkerLabelsBatchOutputFile4.c_str(), "wb");
|
||||
|
||||
if (bmpFile == NULL) return -1;
|
||||
size_t nSize = 0;
|
||||
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
||||
nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
|
||||
sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
|
||||
}
|
||||
fclose(bmpFile);
|
||||
}
|
||||
|
||||
// Read back per image compressed label count.
|
||||
cudaError = cudaMemcpyAsync(pUFBatchPerImageCompressedCountListHost,
|
||||
pUFBatchPerImageCompressedCountListDev,
|
||||
NUMBER_OF_IMAGES * sizeof(Npp32u),
|
||||
cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
|
||||
if (cudaError != cudaSuccess) {
|
||||
tearDown();
|
||||
return NPP_MEMCPY_ERROR;
|
||||
}
|
||||
|
||||
// Wait for host read back to complete
|
||||
cudaError = cudaStreamSynchronize(nppStreamCtx.hStream);
|
||||
|
||||
printf("\n\n");
|
||||
|
||||
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
||||
if (nImage == 0)
|
||||
printf(
|
||||
"Lena_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||
else if (nImage == 1)
|
||||
printf(
|
||||
"CT_Skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||
else if (nImage == 2)
|
||||
printf(
|
||||
"PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||
else if (nImage == 3)
|
||||
printf(
|
||||
"PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||
else if (nImage == 4)
|
||||
printf(
|
||||
"PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u succeeded, "
|
||||
"compressed label count is %d.\n",
|
||||
pUFBatchPerImageCompressedCountListHost[nImage]);
|
||||
}
|
||||
|
||||
#endif // USE_BATCHED_LABEL_COMPRESSION
|
||||
|
||||
tearDown();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrixMul", "matrixMul_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
Microsoft Visual Studio Solution File, Format Version 14.00
|
||||
# Visual Studio 2015
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2015.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,15 @@
|
|||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2012</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2015</RootNamespace>
|
||||
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<PlatformToolset>v140</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -57,9 +57,9 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||
|
@ -97,11 +97,11 @@
|
|||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "matrixMul", "matrixMul_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
# Visual Studio 2017
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2017.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,20 @@
|
|||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cannyEdgeDetectorNPP_vs2012</RootNamespace>
|
||||
<ProjectName>cannyEdgeDetectorNPP</ProjectName>
|
||||
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2017</RootNamespace>
|
||||
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(WindowsTargetPlatformVersion)'==''">
|
||||
<LatestTargetPlatformVersion>$([Microsoft.Build.Utilities.ToolLocationHelper]::GetLatestSDKTargetPlatformVersion('Windows', '10.0'))</LatestTargetPlatformVersion>
|
||||
<WindowsTargetPlatformVersion Condition="'$(WindowsTargetPlatformVersion)' == ''">$(LatestTargetPlatformVersion)</WindowsTargetPlatformVersion>
|
||||
<TargetPlatformVersion>$(WindowsTargetPlatformVersion)</TargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
<PlatformToolset>v141</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
|
@ -33,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -53,13 +58,13 @@
|
|||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cannyEdgeDetectorNPP.exe</OutputFile>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||
|
@ -96,22 +101,12 @@
|
|||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
||||
<Link>
|
||||
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
||||
<Link>
|
||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cannyEdgeDetectorNPP.cpp" />
|
||||
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "reduction", "reduction_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
# Visual Studio 2019
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "batchedLabelMarkersAndLabelCompressionNPP", "batchedLabelMarkersAndLabelCompressionNPP_vs2019.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
|
@ -15,15 +15,16 @@
|
|||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>MersenneTwisterGP11213_vs2013</RootNamespace>
|
||||
<ProjectName>MersenneTwisterGP11213</ProjectName>
|
||||
<RootNamespace>batchedLabelMarkersAndLabelCompressionNPP_vs2019</RootNamespace>
|
||||
<ProjectName>batchedLabelMarkersAndLabelCompressionNPP</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
|
@ -33,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -57,9 +58,9 @@
|
|||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>curand.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/MersenneTwisterGP11213.exe</OutputFile>
|
||||
<OutputFile>$(OutDir)/batchedLabelMarkersAndLabelCompressionNPP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||
|
@ -97,11 +98,11 @@
|
|||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="MersenneTwister.cpp" />
|
||||
<ClCompile Include="batchedLabelMarkersAndLabelCompressionNPP.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -306,7 +306,7 @@ ifeq ($(TARGET_OS),linux)
|
|||
endif
|
||||
|
||||
# Gencode arguments
|
||||
SMS ?= 80
|
||||
SMS ?= 80 86
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
<scope>1:CUDA Basic Topics</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Matrix Multiply, WMMA, Tensor Cores
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ cudaMallocManaged, cudaDeviceSynchronize, cudaFuncSetAttribute, cudaEventCreate,
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/bf16TensorCoreGemm.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -301,9 +301,9 @@ endif
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -40,6 +40,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Cooperative Groups
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/binaryPartitionCG.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -59,6 +59,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "boxFilterNPP", "boxFilterNPP_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "boxFilterNPP", "boxFilterNPP_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,117 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>boxFilterNPP_vs2013</RootNamespace>
|
||||
<ProjectName>boxFilterNPP</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/boxFilterNPP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
||||
<Link>
|
||||
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
||||
<Link>
|
||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="boxFilterNPP.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -117,6 +117,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies, Image Processing, NPP Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -27,7 +27,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cannyEdgeDetectorNPP", "cannyEdgeDetectorNPP_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cannyEdgeDetectorNPP", "cannyEdgeDetectorNPP_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,117 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>cannyEdgeDetectorNPP_vs2013</RootNamespace>
|
||||
<ProjectName>cannyEdgeDetectorNPP</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);../../Common/UtilNPP;../../Common/FreeImage/include;</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>nppisu.lib;nppif.lib;nppc.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);../../Common/FreeImage/lib/x64;../../Common/FreeImage/lib/$(TARGET_OS);../../Common/FreeImage/lib/$(TARGET_OS)/$(TARGET_ARCH);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/cannyEdgeDetectorNPP.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,compute_35;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
|
||||
<Link>
|
||||
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
|
||||
<Link>
|
||||
<AdditionalDependencies>freeimage.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="cannyEdgeDetectorNPP.cpp" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -112,6 +112,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -117,6 +117,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -113,6 +113,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Performance Strategies
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -24,7 +24,7 @@ x86_64, ppc64le, armv7l
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
|
||||
## Build and Run
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ int main(int argc, char **argv) {
|
|||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// the kernel takes more time than the channel reset time on arm archs, so to
|
||||
// prevent hangs reduce time_clocks.
|
||||
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 1000));
|
||||
clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 100));
|
||||
#else
|
||||
clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate);
|
||||
#endif
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "concurrentKernels", "concurrentKernels_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -67,7 +67,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -107,6 +107,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -63,7 +63,7 @@
|
|||
<OutputFile>$(OutDir)/concurrentKernels.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -103,6 +103,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
|
@ -268,9 +268,9 @@ LIBRARIES :=
|
|||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64))
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 72 75 80 86
|
||||
else
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80
|
||||
SMS ?= 35 37 50 52 60 61 70 75 80 86
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
|
|
|
@ -58,6 +58,7 @@
|
|||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
|
|
|
@ -10,7 +10,7 @@ Linear Algebra, CUBLAS Library, CUSPARSE Library
|
|||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
[SM 3.5 ](https://developer.nvidia.com/cuda-gpus) [SM 3.7 ](https://developer.nvidia.com/cuda-gpus) [SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
|
@ -30,7 +30,7 @@ cudaStreamBeginCapture, cudaStreamEndCapture, cudaGraphCreate, cudaGraphLaunch,
|
|||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 11.0](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Download and install the [CUDA Toolkit 11.1](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 2012
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "conjugateGradientCudaGraphs", "conjugateGradientCudaGraphs_vs2012.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,107 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>conjugateGradientCudaGraphs_vs2012</RootNamespace>
|
||||
<ProjectName>conjugateGradientCudaGraphs</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v110</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cublas.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="conjugateGradientCudaGraphs.cu" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -1,20 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 13.00
|
||||
# Visual Studio 2013
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "conjugateGradientCudaGraphs", "conjugateGradientCudaGraphs_vs2013.vcxproj", "{997E0757-EA74-4A4E-A0FC-47D8C8831A15}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Debug|x64.Build.0 = Debug|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.ActiveCfg = Release|x64
|
||||
{997E0757-EA74-4A4E-A0FC-47D8C8831A15}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,107 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<CUDAPropsPath Condition="'$(CUDAPropsPath)'==''">$(VCTargetsPath)\BuildCustomizations</CUDAPropsPath>
|
||||
</PropertyGroup>
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{997E0757-EA74-4A4E-A0FC-47D8C8831A15}</ProjectGuid>
|
||||
<RootNamespace>conjugateGradientCudaGraphs_vs2013</RootNamespace>
|
||||
<ProjectName>conjugateGradientCudaGraphs</ProjectName>
|
||||
<CudaToolkitCustomDir />
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup>
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
<PlatformToolset>v120</PlatformToolset>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)'=='Release'">
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)/$(Configuration)/</IntDir>
|
||||
<IncludePath>$(IncludePath)</IncludePath>
|
||||
<CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
|
||||
<CodeAnalysisRules />
|
||||
<CodeAnalysisRuleAssemblies />
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)'=='x64'">
|
||||
<OutDir>../../bin/win64/$(Configuration)/</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PreprocessorDefinitions>WIN32;_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>./;$(CudaToolkitDir)/include;../../Common;$(CudaToolkitIncludeDir);</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<AdditionalDependencies>cublas.lib;cusparse.lib;cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories>$(CudaToolkitLibDir);</AdditionalLibraryDirectories>
|
||||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MTd</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<GenerateDebugInformation>false</GenerateDebugInformation>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<Runtime>MT</Runtime>
|
||||
<TargetMachinePlatform>64</TargetMachinePlatform>
|
||||
</CudaCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="conjugateGradientCudaGraphs.cu" />
|
||||
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
|
@ -33,7 +33,7 @@
|
|||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.props" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.props" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets">
|
||||
<Import Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" />
|
||||
|
@ -62,7 +62,7 @@
|
|||
<OutputFile>$(OutDir)/conjugateGradientCudaGraphs.exe</OutputFile>
|
||||
</Link>
|
||||
<CudaCompile>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;</CodeGeneration>
|
||||
<CodeGeneration>compute_35,sm_35;compute_37,sm_37;compute_50,sm_50;compute_52,sm_52;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75;compute_80,sm_80;compute_86,sm_86;</CodeGeneration>
|
||||
<AdditionalOptions>-Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
|
||||
<Include>./;../../Common</Include>
|
||||
<Defines>WIN32</Defines>
|
||||
|
@ -102,6 +102,6 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.0.targets" />
|
||||
<Import Project="$(CUDAPropsPath)\CUDA 11.1.targets" />
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user