Revert "Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP"

This reverts commit a9869fd6eaeecc748fc5f10f4b331fa41efbdaca
This commit is contained in:
Shawn Zeng 2025-02-27 02:48:03 -08:00
parent a9869fd6ea
commit acd3a015c8
18 changed files with 173 additions and 279 deletions

View File

@ -74,6 +74,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
return dev;
}
bool printfNPPinfo(int argc, char *argv[]) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
// Min spec is SM 1.0 devices
bool bVal = checkCudaCapabilities(1, 0);
return bVal;
}
int main(int argc, char *argv[]) {
printf("%s Starting...\n\n", argv[0]);
@ -84,50 +104,11 @@ int main(int argc, char *argv[]) {
cudaDeviceInit(argc, (const char **)argv);
NppStreamContext nppStreamCtx;
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
{
printf("CUDA error: no devices supporting CUDA.\n");
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
if (printfNPPinfo(argc, argv) == false) {
cudaDeviceReset();
exit(EXIT_SUCCESS);
}
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
cudaDeviceProp oDeviceProperties;
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
char *filePath;
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
@ -205,11 +186,11 @@ int main(int argc, char *argv[]) {
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
// run Prewitt edge detection gradient vector filter
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
nppiNormL1, NPP_BORDER_REPLICATE));
// allocate device destination images of appropriatedly size
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
@ -217,13 +198,13 @@ int main(int argc, char *argv[]) {
// convert 16s_C1 result images to binary 8u_C1 output images using constant
// value to adjust amount of visible detail
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
// create host images for the results
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
@ -253,10 +234,10 @@ int main(int argc, char *argv[]) {
// copy and enlarge the original device source image and surround it with a
// white edge (border)
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
oMaskSize.width / 2, oMaskSize.height / 2, 255));
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
// the original source image in the enlarged source image
@ -280,25 +261,23 @@ int main(int argc, char *argv[]) {
// run Prewitt edge detection gradient vector filter bypassing border
// control due to enlarged source image
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
nppiNormL1, NPP_BORDER_REPLICATE));
// convert 16s_C1 result images to binary 8u_C1 output images using constant
// value to adjust amount of visible detail
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
32, oDeviceDstOutXNoBorders.data(),
oDeviceDstOutXNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ,
nppStreamCtx));
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
32, oDeviceDstOutXNoBorders.data(),
oDeviceDstOutXNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
32, oDeviceDstOutYNoBorders.data(),
oDeviceDstOutYNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ,
nppStreamCtx));
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
32, oDeviceDstOutYNoBorders.data(),
oDeviceDstOutYNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
// create additional output files
std::string sResultXNoBordersFilename = sResultBaseFilename;
std::string sResultYNoBordersFilename = sResultBaseFilename;
@ -326,17 +305,15 @@ int main(int argc, char *argv[]) {
// diff the two 8u_C1 result images one with and one without border control
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
nppStreamCtx));
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
nppStreamCtx));
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
// create additional output files
std::string sResultXDiffFilename = sResultBaseFilename;
@ -403,11 +380,11 @@ int main(int argc, char *argv[]) {
// run Prewitt edge detection gradient vector filter to generate the left
// side of the output image
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
nppiNormL1, NPP_BORDER_REPLICATE));
// now move the enlarged source pointer to the horizontal middle of the
// enlarged source image and tell the function where it was moved to
@ -424,26 +401,23 @@ int main(int argc, char *argv[]) {
// run Prewitt edge detection gradient vector filter to generate the right
// side of the output image adjusting the destination image pointers
// appropriately
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
nppStreamCtx));
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
// convert 16s_C1 result images to binary 8u_C1 output images using constant
// value to adjust amount of visible detail
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
32, oDeviceDstOutXMixedBorders.data(),
oDeviceDstOutXMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ,
nppStreamCtx));
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
32, oDeviceDstOutXMixedBorders.data(),
oDeviceDstOutXMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
32, oDeviceDstOutYMixedBorders.data(),
oDeviceDstOutYMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ,
nppStreamCtx));
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
32, oDeviceDstOutYMixedBorders.data(),
oDeviceDstOutYMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
// create additional output files
std::string sResultXMixedBordersFilename = sResultBaseFilename;
std::string sResultYMixedBordersFilename = sResultBaseFilename;
@ -465,17 +439,15 @@ int main(int argc, char *argv[]) {
// diff the original 8u_C1 result images with border control and the mixed
// border control images, they should match (diff image will be all black)
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
nppStreamCtx));
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
nppStreamCtx));
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
// create additional output files
std::string sResultXMixedDiffFilename = sResultBaseFilename;

View File

@ -48,56 +48,39 @@
#include <helper_string.h>
bool printfNPPinfo(int argc, char *argv[]) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
// Min spec is SM 1.0 devices
bool bVal = checkCudaCapabilities(1, 0);
return bVal;
}
int main(int argc, char *argv[]) {
printf("%s Starting...\n\n", argv[0]);
try {
std::string sFilename;
char *filePath;
NppStreamContext nppStreamCtx;
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
{
printf("CUDA error: no devices supporting CUDA.\n");
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
findCudaDevice(argc, (const char **)argv);
if (printfNPPinfo(argc, argv) == false) {
exit(EXIT_SUCCESS);
}
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
cudaDeviceProp oDeviceProperties;
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
} else {
@ -171,10 +154,10 @@ int main(int argc, char *argv[]) {
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
// run box filter
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
NPP_BORDER_REPLICATE, nppStreamCtx));
NPP_BORDER_REPLICATE));
// declare a host image for the result
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());

View File

@ -68,6 +68,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
return dev;
}
bool printfNPPinfo(int argc, char *argv[]) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
// Min spec is SM 1.0 devices
bool bVal = checkCudaCapabilities(1, 0);
return bVal;
}
int main(int argc, char *argv[]) {
printf("%s Starting...\n\n", argv[0]);
@ -77,50 +97,10 @@ int main(int argc, char *argv[]) {
cudaDeviceInit(argc, (const char **)argv);
NppStreamContext nppStreamCtx;
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
{
printf("CUDA error: no devices supporting CUDA.\n");
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
if (printfNPPinfo(argc, argv) == false) {
exit(EXIT_SUCCESS);
}
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
cudaDeviceProp oDeviceProperties;
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
} else {
@ -210,11 +190,11 @@ int main(int argc, char *argv[]) {
Npp16s nHighThreshold = 256;
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
NPP_BORDER_REPLICATE, pScratchBufferNPP));
}
// free scratch buffer memory

View File

@ -70,6 +70,25 @@ inline int cudaDeviceInit(int argc, const char **argv) {
return dev;
}
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
return bVal;
}
// Error handler for FreeImage library.
// In case this handler is invoked, it throws an NPP exception.
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
@ -138,50 +157,11 @@ int main(int argc, char *argv[]) {
cudaDeviceInit(argc, (const char **)argv);
NppStreamContext nppStreamCtx;
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
{
printf("CUDA error: no devices supporting CUDA.\n");
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
// Min spec is SM 1.0 devices
if (printfNPPinfo(argc, argv, 1, 0) == false) {
exit(EXIT_SUCCESS);
}
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
cudaDeviceProp oDeviceProperties;
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
} else {
@ -280,9 +260,9 @@ int main(int argc, char *argv[]) {
Npp8u *pDstImageCUDA =
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
oMaskSize, oMaskAchnor, nppStreamCtx));
NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
oMaskSize, oMaskAchnor));
// create the result image storage using FreeImage so we can easily
// save
FIBITMAP *pResultBitmap = FreeImage_Allocate(

View File

@ -73,6 +73,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
return dev;
}
bool printfNPPinfo(int argc, char *argv[]) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
// Min spec is SM 1.1 devices
bool bVal = checkCudaCapabilities(1, 1);
return bVal;
}
int main(int argc, char *argv[]) {
printf("%s Starting...\n\n", argv[0]);
@ -82,50 +102,10 @@ int main(int argc, char *argv[]) {
cudaDeviceInit(argc, (const char **)argv);
NppStreamContext nppStreamCtx;
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
{
printf("CUDA error: no devices supporting CUDA.\n");
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
if (printfNPPinfo(argc, argv) == false) {
exit(EXIT_SUCCESS);
}
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
cudaDevAttrComputeCapabilityMajor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
cudaDevAttrComputeCapabilityMinor,
nppStreamCtx.nCudaDeviceId);
if (cudaError != cudaSuccess)
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
cudaDeviceProp oDeviceProperties;
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
} else {
@ -202,9 +182,8 @@ int main(int argc, char *argv[]) {
(int)oDeviceSrc.height()}; // full image
// create device scratch buffer for nppiHistogram
size_t nDeviceBufferSize;
nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
&nDeviceBufferSize,
nppStreamCtx);
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
&nDeviceBufferSize);
Npp8u *pDeviceBuffer;
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
@ -212,9 +191,9 @@ int main(int argc, char *argv[]) {
Npp32s levelsHost[levelCount];
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
// compute the histogram
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
0, binCount, pDeviceBuffer, nppStreamCtx));
0, binCount, pDeviceBuffer));
// copy histogram and levels to host memory
Npp32s histHost[binCount];
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
@ -275,22 +254,20 @@ int main(int argc, char *argv[]) {
sizeof(Npp32s) * (levelCount),
cudaMemcpyHostToDevice));
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
oDeviceDst.pitch(), oSizeROI,
lutDevice, // value and level arrays are in host memory
lvlsDevice, levelCount,
nppStreamCtx));
lutDevice, // value and level arrays are in GPU device memory
lvlsDevice, levelCount));
NPP_CHECK_CUDA(cudaFree(lutDevice));
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
#else
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
oDeviceDst.pitch(), oSizeROI,
lutHost, // value and level arrays are in host memory
levelsHost, levelCount,
nppStreamCtx));
levelsHost, levelCount));
#endif
// copy the result image back into the storage that contained the

View File

@ -328,10 +328,9 @@ main( int argc, char** argv )
int nCompressedLabelCount = 0;
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
pCompressedLabelsScratchBufferDev,
nppStreamCtx);
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
pCompressedLabelsScratchBufferDev);
if (nppStatus != NPP_SUCCESS)
{
@ -540,3 +539,6 @@ main( int argc, char** argv )
return 0;
}