mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-01 20:20:29 +08:00
Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP
This commit is contained in:
parent
3e8f91d1a1
commit
a9869fd6ea
@ -74,26 +74,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -104,11 +84,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
cudaDeviceReset();
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
char *filePath;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
@ -186,11 +205,11 @@ int main(int argc, char *argv[]) {
|
||||
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
|
||||
|
||||
// run Prewitt edge detection gradient vector filter
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// allocate device destination images of appropriatedly size
|
||||
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
|
||||
@ -198,13 +217,13 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
|
||||
// create host images for the results
|
||||
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
|
||||
@ -234,10 +253,10 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// copy and enlarge the original device source image and surround it with a
|
||||
// white edge (border)
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
|
||||
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255));
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
|
||||
|
||||
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
|
||||
// the original source image in the enlarged source image
|
||||
@ -261,23 +280,25 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter bypassing border
|
||||
// control due to enlarged source image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXNoBorders.data(),
|
||||
oDeviceDstOutXNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXNoBorders.data(),
|
||||
oDeviceDstOutXNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYNoBorders.data(),
|
||||
oDeviceDstOutYNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYNoBorders.data(),
|
||||
oDeviceDstOutYNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
// create additional output files
|
||||
std::string sResultXNoBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYNoBordersFilename = sResultBaseFilename;
|
||||
@ -305,15 +326,17 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// diff the two 8u_C1 result images one with and one without border control
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXDiffFilename = sResultBaseFilename;
|
||||
@ -380,11 +403,11 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter to generate the left
|
||||
// side of the output image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// now move the enlarged source pointer to the horizontal middle of the
|
||||
// enlarged source image and tell the function where it was moved to
|
||||
@ -401,23 +424,26 @@ int main(int argc, char *argv[]) {
|
||||
// run Prewitt edge detection gradient vector filter to generate the right
|
||||
// side of the output image adjusting the destination image pointers
|
||||
// appropriately
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
|
||||
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
|
||||
nppStreamCtx));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXMixedBorders.data(),
|
||||
oDeviceDstOutXMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXMixedBorders.data(),
|
||||
oDeviceDstOutXMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYMixedBorders.data(),
|
||||
oDeviceDstOutYMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYMixedBorders.data(),
|
||||
oDeviceDstOutYMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
// create additional output files
|
||||
std::string sResultXMixedBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYMixedBordersFilename = sResultBaseFilename;
|
||||
@ -439,15 +465,17 @@ int main(int argc, char *argv[]) {
|
||||
// diff the original 8u_C1 result images with border control and the mixed
|
||||
// border control images, they should match (diff image will be all black)
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXMixedDiffFilename = sResultBaseFilename;
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -48,39 +48,56 @@
|
||||
#include <helper_string.h>
|
||||
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
try {
|
||||
std::string sFilename;
|
||||
char *filePath;
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -154,10 +171,10 @@ int main(int argc, char *argv[]) {
|
||||
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
|
||||
|
||||
// run box filter
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
|
||||
NPP_BORDER_REPLICATE));
|
||||
NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// declare a host image for the result
|
||||
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
|
||||
|
BIN
Samples/4_CUDA_Libraries/boxFilterNPP/teapot512_boxFilter.pgm
Normal file
BIN
Samples/4_CUDA_Libraries/boxFilterNPP/teapot512_boxFilter.pgm
Normal file
Binary file not shown.
@ -68,26 +68,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -97,10 +77,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -190,11 +210,11 @@ int main(int argc, char *argv[]) {
|
||||
Npp16s nHighThreshold = 256;
|
||||
|
||||
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
|
||||
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP));
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
|
||||
}
|
||||
|
||||
// free scratch buffer memory
|
||||
|
Binary file not shown.
@ -70,25 +70,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
// Error handler for FreeImage library.
|
||||
// In case this handler is invoked, it throws an NPP exception.
|
||||
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
|
||||
@ -157,11 +138,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
if (printfNPPinfo(argc, argv, 1, 0) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -260,9 +280,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp8u *pDstImageCUDA =
|
||||
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
|
||||
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||
oMaskSize, oMaskAchnor));
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||
oMaskSize, oMaskAchnor, nppStreamCtx));
|
||||
// create the result image storage using FreeImage so we can easily
|
||||
// save
|
||||
FIBITMAP *pResultBitmap = FreeImage_Allocate(
|
||||
|
@ -73,26 +73,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.1 devices
|
||||
bool bVal = checkCudaCapabilities(1, 1);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -102,10 +82,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -182,8 +202,9 @@ int main(int argc, char *argv[]) {
|
||||
(int)oDeviceSrc.height()}; // full image
|
||||
// create device scratch buffer for nppiHistogram
|
||||
size_t nDeviceBufferSize;
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize);
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize,
|
||||
nppStreamCtx);
|
||||
Npp8u *pDeviceBuffer;
|
||||
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
|
||||
|
||||
@ -191,9 +212,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp32s levelsHost[levelCount];
|
||||
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
|
||||
// compute the histogram
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
|
||||
0, binCount, pDeviceBuffer));
|
||||
0, binCount, pDeviceBuffer, nppStreamCtx));
|
||||
// copy histogram and levels to host memory
|
||||
Npp32s histHost[binCount];
|
||||
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
|
||||
@ -254,20 +275,22 @@ int main(int argc, char *argv[]) {
|
||||
sizeof(Npp32s) * (levelCount),
|
||||
cudaMemcpyHostToDevice));
|
||||
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutDevice, // value and level arrays are in GPU device memory
|
||||
lvlsDevice, levelCount));
|
||||
lutDevice, // value and level arrays are in host memory
|
||||
lvlsDevice, levelCount,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_CUDA(cudaFree(lutDevice));
|
||||
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
|
||||
#else
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutHost, // value and level arrays are in host memory
|
||||
levelsHost, levelCount));
|
||||
levelsHost, levelCount,
|
||||
nppStreamCtx));
|
||||
#endif
|
||||
|
||||
// copy the result image back into the storage that contained the
|
||||
|
@ -328,9 +328,10 @@ main( int argc, char** argv )
|
||||
|
||||
int nCompressedLabelCount = 0;
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||
pCompressedLabelsScratchBufferDev);
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||
pCompressedLabelsScratchBufferDev,
|
||||
nppStreamCtx);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS)
|
||||
{
|
||||
@ -539,6 +540,3 @@ main( int argc, char** argv )
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user