diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp index d57bdd9b..e79eb73b 100644 --- a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp +++ b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/FilterBorderControlNPP.cpp @@ -74,6 +74,26 @@ inline int cudaDeviceInit(int argc, const char **argv) { return dev; } +bool printfNPPinfo(int argc, char *argv[]) { + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, + libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + + // Min spec is SM 1.0 devices + bool bVal = checkCudaCapabilities(1, 0); + return bVal; +} + int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); @@ -84,50 +104,11 @@ int main(int argc, char *argv[]) { cudaDeviceInit(argc, (const char **)argv); - NppStreamContext nppStreamCtx; - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream. - - cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; + if (printfNPPinfo(argc, argv) == false) { + cudaDeviceReset(); + exit(EXIT_SUCCESS); } - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10); - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - char *filePath; if (checkCmdLineFlag(argc, (const char **)argv, "input")) { @@ -205,11 +186,11 @@ int main(int argc, char *argv[]) { npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height); // run Prewitt edge detection gradient vector filter - NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx( + NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset, oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(), oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3, - nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx)); + nppiNormL1, NPP_BORDER_REPLICATE)); // allocate device destination images of appropriatedly size npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height); @@ -217,13 +198,13 @@ int main(int argc, char *argv[]) { // convert 16s_C1 result images to binary 8u_C1 output images using constant // value to adjust amount of visible detail - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx( + NPP_CHECK_NPP(nppiCompareC_16s_C1R( oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(), - oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx)); + oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ)); - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx( + NPP_CHECK_NPP(nppiCompareC_16s_C1R( oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(), - oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx)); + oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ)); // create host images for the results npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size()); @@ -253,10 +234,10 @@ int main(int argc, char *argv[]) { // copy and enlarge the original device source image and surround it with a // white edge (border) - NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, - oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx)); + oMaskSize.width / 2, oMaskSize.height / 2, 255)); // adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of // the original source image in the enlarged source image @@ -280,25 +261,23 @@ int main(int argc, char *argv[]) { // run Prewitt edge detection gradient vector filter bypassing border // control due to enlarged source image - NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx( + NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R( pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset, oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(), oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3, - nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx)); + nppiNormL1, NPP_BORDER_REPLICATE)); // convert 16s_C1 result images to binary 8u_C1 output images using constant // value to adjust amount of visible detail - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(), - 32, oDeviceDstOutXNoBorders.data(), - oDeviceDstOutXNoBorders.pitch(), - oSizeROI, NPP_CMP_GREATER_EQ, - nppStreamCtx)); + NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(), + 32, oDeviceDstOutXNoBorders.data(), + oDeviceDstOutXNoBorders.pitch(), + oSizeROI, NPP_CMP_GREATER_EQ)); - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(), - 32, oDeviceDstOutYNoBorders.data(), - oDeviceDstOutYNoBorders.pitch(), - oSizeROI, NPP_CMP_GREATER_EQ, - nppStreamCtx)); + NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(), + 32, oDeviceDstOutYNoBorders.data(), + oDeviceDstOutYNoBorders.pitch(), + oSizeROI, NPP_CMP_GREATER_EQ)); // create additional output files std::string sResultXNoBordersFilename = sResultBaseFilename; std::string sResultYNoBordersFilename = sResultBaseFilename; @@ -326,17 +305,15 @@ int main(int argc, char *argv[]) { // diff the two 8u_C1 result images one with and one without border control - NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiAbsDiff_8u_C1R( oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(), oDeviceDstOutX.data(), oDeviceDstOutX.pitch(), - oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI, - nppStreamCtx)); + oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI)); - NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiAbsDiff_8u_C1R( oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(), oDeviceDstOutY.data(), oDeviceDstOutY.pitch(), - oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI, - nppStreamCtx)); + oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI)); // create additional output files std::string sResultXDiffFilename = sResultBaseFilename; @@ -403,11 +380,11 @@ int main(int argc, char *argv[]) { // run Prewitt edge detection gradient vector filter to generate the left // side of the output image - NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx( + NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R( pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset, oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(), oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3, - nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx)); + nppiNormL1, NPP_BORDER_REPLICATE)); // now move the enlarged source pointer to the horizontal middle of the // enlarged source image and tell the function where it was moved to @@ -424,26 +401,23 @@ int main(int argc, char *argv[]) { // run Prewitt edge detection gradient vector filter to generate the right // side of the output image adjusting the destination image pointers // appropriately - NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx( + NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R( pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset, oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(), oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0, - oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE, - nppStreamCtx)); + oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE)); // convert 16s_C1 result images to binary 8u_C1 output images using constant // value to adjust amount of visible detail - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(), - 32, oDeviceDstOutXMixedBorders.data(), - oDeviceDstOutXMixedBorders.pitch(), - oSizeROI, NPP_CMP_GREATER_EQ, - nppStreamCtx)); + NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(), + 32, oDeviceDstOutXMixedBorders.data(), + oDeviceDstOutXMixedBorders.pitch(), + oSizeROI, NPP_CMP_GREATER_EQ)); - NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(), - 32, oDeviceDstOutYMixedBorders.data(), - oDeviceDstOutYMixedBorders.pitch(), - oSizeROI, NPP_CMP_GREATER_EQ, - nppStreamCtx)); + NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(), + 32, oDeviceDstOutYMixedBorders.data(), + oDeviceDstOutYMixedBorders.pitch(), + oSizeROI, NPP_CMP_GREATER_EQ)); // create additional output files std::string sResultXMixedBordersFilename = sResultBaseFilename; std::string sResultYMixedBordersFilename = sResultBaseFilename; @@ -465,17 +439,15 @@ int main(int argc, char *argv[]) { // diff the original 8u_C1 result images with border control and the mixed // border control images, they should match (diff image will be all black) - NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiAbsDiff_8u_C1R( oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(), oDeviceDstOutX.data(), oDeviceDstOutX.pitch(), - oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI, - nppStreamCtx)); + oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI)); - NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiAbsDiff_8u_C1R( oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(), oDeviceDstOutY.data(), oDeviceDstOutY.pitch(), - oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI, - nppStreamCtx)); + oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI)); // create additional output files std::string sResultXMixedDiffFilename = sResultBaseFilename; diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm_gradientVectorPrewittBorderX_Vertical.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm_gradientVectorPrewittBorderX_Vertical.pgm deleted file mode 100644 index cb16736e..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512.pgm_gradientVectorPrewittBorderX_Vertical.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_BorderDiffs.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_BorderDiffs.pgm deleted file mode 100644 index 693a9af6..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_BorderDiffs.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_MixedBorderDiffs.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_MixedBorderDiffs.pgm deleted file mode 100644 index 1099979c..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_MixedBorderDiffs.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithMixedBorders.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithMixedBorders.pgm deleted file mode 100644 index d9963cc9..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithMixedBorders.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithNoSourceBorders.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithNoSourceBorders.pgm deleted file mode 100644 index b191e108..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderX_Vertical_WithNoSourceBorders.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal.pgm deleted file mode 100644 index dff35323..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_BorderDiffs.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_BorderDiffs.pgm deleted file mode 100644 index c7ac2fd7..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_BorderDiffs.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_MixedBorderDiffs.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_MixedBorderDiffs.pgm deleted file mode 100644 index 30d47c98..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_MixedBorderDiffs.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithMixedBorders.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithMixedBorders.pgm deleted file mode 100644 index 34e88fe7..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithMixedBorders.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithNoSourceBorders.pgm b/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithNoSourceBorders.pgm deleted file mode 100644 index 5037464d..00000000 Binary files a/Samples/4_CUDA_Libraries/FilterBorderControlNPP/data/teapot512_gradientVectorPrewittBorderY_Horizontal_WithNoSourceBorders.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp index 10cef4af..3d177ce8 100644 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/boxFilterNPP.cpp @@ -48,56 +48,39 @@ #include +bool printfNPPinfo(int argc, char *argv[]) { + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, + libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + + // Min spec is SM 1.0 devices + bool bVal = checkCudaCapabilities(1, 0); + return bVal; +} + int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); try { std::string sFilename; char *filePath; - NppStreamContext nppStreamCtx; - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream. - cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; + findCudaDevice(argc, (const char **)argv); + + if (printfNPPinfo(argc, argv) == false) { + exit(EXIT_SUCCESS); } - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10); - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - if (checkCmdLineFlag(argc, (const char **)argv, "input")) { getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath); } else { @@ -171,10 +154,10 @@ int main(int argc, char *argv[]) { NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2}; // run box filter - NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset, oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor, - NPP_BORDER_REPLICATE, nppStreamCtx)); + NPP_BORDER_REPLICATE)); // declare a host image for the result npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size()); diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/teapot512_boxFilter.pgm b/Samples/4_CUDA_Libraries/boxFilterNPP/teapot512_boxFilter.pgm deleted file mode 100644 index a681196d..00000000 Binary files a/Samples/4_CUDA_Libraries/boxFilterNPP/teapot512_boxFilter.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp index 4a8ae7f7..e823e0b6 100644 --- a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp +++ b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/cannyEdgeDetectorNPP.cpp @@ -68,6 +68,26 @@ inline int cudaDeviceInit(int argc, const char **argv) { return dev; } +bool printfNPPinfo(int argc, char *argv[]) { + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, + libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + + // Min spec is SM 1.0 devices + bool bVal = checkCudaCapabilities(1, 0); + return bVal; +} + int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); @@ -77,50 +97,10 @@ int main(int argc, char *argv[]) { cudaDeviceInit(argc, (const char **)argv); - NppStreamContext nppStreamCtx; - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream. - - cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; + if (printfNPPinfo(argc, argv) == false) { + exit(EXIT_SUCCESS); } - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10); - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - if (checkCmdLineFlag(argc, (const char **)argv, "input")) { getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath); } else { @@ -210,11 +190,11 @@ int main(int argc, char *argv[]) { Npp16s nHighThreshold = 256; if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) { - NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset, oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL, NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2, - NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx)); + NPP_BORDER_REPLICATE, pScratchBufferNPP)); } // free scratch buffer memory diff --git a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512_cannyEdgeDetection.pgm b/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512_cannyEdgeDetection.pgm deleted file mode 100644 index 84b090d9..00000000 Binary files a/Samples/4_CUDA_Libraries/cannyEdgeDetectorNPP/teapot512_cannyEdgeDetection.pgm and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp index 43aad888..f657f3b7 100644 --- a/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp +++ b/Samples/4_CUDA_Libraries/freeImageInteropNPP/freeImageInteropNPP.cpp @@ -70,6 +70,25 @@ inline int cudaDeviceInit(int argc, const char **argv) { return dev; } +bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) { + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, + libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + + bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor); + return bVal; +} + // Error handler for FreeImage library. // In case this handler is invoked, it throws an NPP exception. extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif, @@ -138,50 +157,11 @@ int main(int argc, char *argv[]) { cudaDeviceInit(argc, (const char **)argv); - NppStreamContext nppStreamCtx; - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream. - - cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; + // Min spec is SM 1.0 devices + if (printfNPPinfo(argc, argv, 1, 0) == false) { + exit(EXIT_SUCCESS); } - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10); - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - if (checkCmdLineFlag(argc, (const char **)argv, "input")) { getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath); } else { @@ -280,9 +260,9 @@ int main(int argc, char *argv[]) { Npp8u *pDstImageCUDA = nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA); NPP_ASSERT_NOT_NULL(pDstImageCUDA); - NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA, - pDstImageCUDA, nDstPitchCUDA, oSizeROI, - oMaskSize, oMaskAchnor, nppStreamCtx)); + NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA, + pDstImageCUDA, nDstPitchCUDA, oSizeROI, + oMaskSize, oMaskAchnor)); // create the result image storage using FreeImage so we can easily // save FIBITMAP *pResultBitmap = FreeImage_Allocate( diff --git a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp index 1869feb7..ae897b27 100644 --- a/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp +++ b/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp @@ -73,6 +73,26 @@ inline int cudaDeviceInit(int argc, const char **argv) { return dev; } +bool printfNPPinfo(int argc, char *argv[]) { + const NppLibraryVersion *libVer = nppGetLibVersion(); + + printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, + libVer->build); + + int driverVersion, runtimeVersion; + cudaDriverGetVersion(&driverVersion); + cudaRuntimeGetVersion(&runtimeVersion); + + printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000, + (driverVersion % 100) / 10); + printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000, + (runtimeVersion % 100) / 10); + + // Min spec is SM 1.1 devices + bool bVal = checkCudaCapabilities(1, 1); + return bVal; +} + int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); @@ -82,50 +102,10 @@ int main(int argc, char *argv[]) { cudaDeviceInit(argc, (const char **)argv); - NppStreamContext nppStreamCtx; - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream. - - cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; + if (printfNPPinfo(argc, argv) == false) { + exit(EXIT_SUCCESS); } - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10); - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, - nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - if (checkCmdLineFlag(argc, (const char **)argv, "input")) { getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath); } else { @@ -202,9 +182,8 @@ int main(int argc, char *argv[]) { (int)oDeviceSrc.height()}; // full image // create device scratch buffer for nppiHistogram size_t nDeviceBufferSize; - nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount, - &nDeviceBufferSize, - nppStreamCtx); + nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount, + &nDeviceBufferSize); Npp8u *pDeviceBuffer; NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize)); @@ -212,9 +191,9 @@ int main(int argc, char *argv[]) { Npp32s levelsHost[levelCount]; NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount)); // compute the histogram - NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiHistogramEven_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount, - 0, binCount, pDeviceBuffer, nppStreamCtx)); + 0, binCount, pDeviceBuffer)); // copy histogram and levels to host memory Npp32s histHost[binCount]; NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s), @@ -275,22 +254,20 @@ int main(int argc, char *argv[]) { sizeof(Npp32s) * (levelCount), cudaMemcpyHostToDevice)); - NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, - lutDevice, // value and level arrays are in host memory - lvlsDevice, levelCount, - nppStreamCtx)); + lutDevice, // value and level arrays are in GPU device memory + lvlsDevice, levelCount)); NPP_CHECK_CUDA(cudaFree(lutDevice)); NPP_CHECK_CUDA(cudaFree(lvlsDevice)); #else - NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx( + NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R( oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, lutHost, // value and level arrays are in host memory - levelsHost, levelCount, - nppStreamCtx)); + levelsHost, levelCount)); #endif // copy the result image back into the storage that contained the diff --git a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp index 825cea6f..e6800ab2 100644 --- a/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp +++ b/Samples/4_CUDA_Libraries/watershedSegmentationNPP/watershedSegmentationNPP.cpp @@ -328,10 +328,9 @@ main( int argc, char** argv ) int nCompressedLabelCount = 0; - nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage], - oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount, - pCompressedLabelsScratchBufferDev, - nppStreamCtx); + nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage], + oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount, + pCompressedLabelsScratchBufferDev); if (nppStatus != NPP_SUCCESS) { @@ -540,3 +539,6 @@ main( int argc, char** argv ) return 0; } + + +