mirror of
				https://github.com/NVIDIA/cuda-samples.git
				synced 2025-11-04 07:27:49 +08:00 
			
		
		
		
	Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP
This commit is contained in:
		
							parent
							
								
									acd3a015c8
								
							
						
					
					
						commit
						7f0f63f311
					
				@ -74,26 +74,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
 | 
			
		||||
  return dev;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool printfNPPinfo(int argc, char *argv[]) {
 | 
			
		||||
  const NppLibraryVersion *libVer = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
 | 
			
		||||
         libVer->build);
 | 
			
		||||
 | 
			
		||||
  int driverVersion, runtimeVersion;
 | 
			
		||||
  cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
  cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
  printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
 | 
			
		||||
         (driverVersion % 100) / 10);
 | 
			
		||||
  printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
 | 
			
		||||
         (runtimeVersion % 100) / 10);
 | 
			
		||||
 | 
			
		||||
  // Min spec is SM 1.0 devices
 | 
			
		||||
  bool bVal = checkCudaCapabilities(1, 0);
 | 
			
		||||
  return bVal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  printf("%s Starting...\n\n", argv[0]);
 | 
			
		||||
 | 
			
		||||
@ -104,11 +84,50 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    cudaDeviceInit(argc, (const char **)argv);
 | 
			
		||||
 | 
			
		||||
    if (printfNPPinfo(argc, argv) == false) {
 | 
			
		||||
      cudaDeviceReset();
 | 
			
		||||
      exit(EXIT_SUCCESS);
 | 
			
		||||
    NppStreamContext nppStreamCtx;
 | 
			
		||||
    nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
 | 
			
		||||
 | 
			
		||||
    cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
    {
 | 
			
		||||
        printf("CUDA error: no devices supporting CUDA.\n");
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const NppLibraryVersion *libVer   = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
 | 
			
		||||
 | 
			
		||||
    int driverVersion, runtimeVersion;
 | 
			
		||||
    cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
    cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
    printf("CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
 | 
			
		||||
    printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
 | 
			
		||||
 | 
			
		||||
    cudaDeviceProp oDeviceProperties;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
 | 
			
		||||
    nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
 | 
			
		||||
    nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
 | 
			
		||||
 | 
			
		||||
    char *filePath;
 | 
			
		||||
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
 | 
			
		||||
@ -186,11 +205,11 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
 | 
			
		||||
 | 
			
		||||
    // run Prewitt edge detection gradient vector filter
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
 | 
			
		||||
        oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
 | 
			
		||||
        oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE));
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // allocate device destination images of appropriatedly size
 | 
			
		||||
    npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
 | 
			
		||||
@ -198,13 +217,13 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    // convert 16s_C1 result images to binary 8u_C1 output images using constant
 | 
			
		||||
    // value to adjust amount of visible detail
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
 | 
			
		||||
        oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
 | 
			
		||||
        oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
        oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
 | 
			
		||||
        oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
 | 
			
		||||
        oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
        oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // create host images for the results
 | 
			
		||||
    npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
 | 
			
		||||
@ -234,10 +253,10 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    // copy and enlarge the original device source image and surround it with a
 | 
			
		||||
    // white edge (border)
 | 
			
		||||
    NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
 | 
			
		||||
        oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
 | 
			
		||||
        oMaskSize.width / 2, oMaskSize.height / 2, 255));
 | 
			
		||||
        oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
 | 
			
		||||
    // the original source image in the enlarged source image
 | 
			
		||||
@ -261,23 +280,25 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    // run Prewitt edge detection gradient vector filter bypassing border
 | 
			
		||||
    // control due to enlarged source image
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
 | 
			
		||||
        pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
 | 
			
		||||
        oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
 | 
			
		||||
        oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE));
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // convert 16s_C1 result images to binary 8u_C1 output images using constant
 | 
			
		||||
    // value to adjust amount of visible detail
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
 | 
			
		||||
                                       32, oDeviceDstOutXNoBorders.data(),
 | 
			
		||||
                                       oDeviceDstOutXNoBorders.pitch(),
 | 
			
		||||
                                       oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
 | 
			
		||||
                                           32, oDeviceDstOutXNoBorders.data(),
 | 
			
		||||
                                           oDeviceDstOutXNoBorders.pitch(),
 | 
			
		||||
                                           oSizeROI, NPP_CMP_GREATER_EQ,
 | 
			
		||||
                                           nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
 | 
			
		||||
                                       32, oDeviceDstOutYNoBorders.data(),
 | 
			
		||||
                                       oDeviceDstOutYNoBorders.pitch(),
 | 
			
		||||
                                       oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
 | 
			
		||||
                                           32, oDeviceDstOutYNoBorders.data(),
 | 
			
		||||
                                           oDeviceDstOutYNoBorders.pitch(),
 | 
			
		||||
                                           oSizeROI, NPP_CMP_GREATER_EQ,
 | 
			
		||||
                                           nppStreamCtx));
 | 
			
		||||
    // create additional output files
 | 
			
		||||
    std::string sResultXNoBordersFilename = sResultBaseFilename;
 | 
			
		||||
    std::string sResultYNoBordersFilename = sResultBaseFilename;
 | 
			
		||||
@ -305,15 +326,17 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    // diff the two 8u_C1 result images one with and one without border control
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
 | 
			
		||||
        oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
 | 
			
		||||
        oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
 | 
			
		||||
        oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
 | 
			
		||||
        oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
 | 
			
		||||
        oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
 | 
			
		||||
        oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // create additional output files
 | 
			
		||||
    std::string sResultXDiffFilename = sResultBaseFilename;
 | 
			
		||||
@ -380,11 +403,11 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    // run Prewitt edge detection gradient vector filter to generate the left
 | 
			
		||||
    // side of the output image
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
 | 
			
		||||
        pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
 | 
			
		||||
        oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
 | 
			
		||||
        oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE));
 | 
			
		||||
        nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // now move the enlarged source pointer to the horizontal middle of the
 | 
			
		||||
    // enlarged source image and tell the function where it was moved to
 | 
			
		||||
@ -401,23 +424,26 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    // run Prewitt edge detection gradient vector filter to generate the right
 | 
			
		||||
    // side of the output image adjusting the destination image pointers
 | 
			
		||||
    // appropriately
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
 | 
			
		||||
        pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
 | 
			
		||||
        oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
 | 
			
		||||
        oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
 | 
			
		||||
        oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
 | 
			
		||||
        oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // convert 16s_C1 result images to binary 8u_C1 output images using constant
 | 
			
		||||
    // value to adjust amount of visible detail
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
 | 
			
		||||
                                       32, oDeviceDstOutXMixedBorders.data(),
 | 
			
		||||
                                       oDeviceDstOutXMixedBorders.pitch(),
 | 
			
		||||
                                       oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
 | 
			
		||||
                                           32, oDeviceDstOutXMixedBorders.data(),
 | 
			
		||||
                                           oDeviceDstOutXMixedBorders.pitch(),
 | 
			
		||||
                                           oSizeROI, NPP_CMP_GREATER_EQ,
 | 
			
		||||
                                           nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
 | 
			
		||||
                                       32, oDeviceDstOutYMixedBorders.data(),
 | 
			
		||||
                                       oDeviceDstOutYMixedBorders.pitch(),
 | 
			
		||||
                                       oSizeROI, NPP_CMP_GREATER_EQ));
 | 
			
		||||
    NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
 | 
			
		||||
                                           32, oDeviceDstOutYMixedBorders.data(),
 | 
			
		||||
                                           oDeviceDstOutYMixedBorders.pitch(),
 | 
			
		||||
                                           oSizeROI, NPP_CMP_GREATER_EQ,
 | 
			
		||||
                                           nppStreamCtx));
 | 
			
		||||
    // create additional output files
 | 
			
		||||
    std::string sResultXMixedBordersFilename = sResultBaseFilename;
 | 
			
		||||
    std::string sResultYMixedBordersFilename = sResultBaseFilename;
 | 
			
		||||
@ -439,15 +465,17 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    // diff the original 8u_C1 result images with border control and the mixed
 | 
			
		||||
    // border control images, they should match (diff image will be all black)
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
 | 
			
		||||
        oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
 | 
			
		||||
        oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
 | 
			
		||||
        oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
 | 
			
		||||
        oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
 | 
			
		||||
        oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
 | 
			
		||||
        oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // create additional output files
 | 
			
		||||
    std::string sResultXMixedDiffFilename = sResultBaseFilename;
 | 
			
		||||
 | 
			
		||||
@ -48,39 +48,56 @@
 | 
			
		||||
#include <helper_string.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
bool printfNPPinfo(int argc, char *argv[]) {
 | 
			
		||||
  const NppLibraryVersion *libVer = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
 | 
			
		||||
         libVer->build);
 | 
			
		||||
 | 
			
		||||
  int driverVersion, runtimeVersion;
 | 
			
		||||
  cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
  cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
  printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
 | 
			
		||||
         (driverVersion % 100) / 10);
 | 
			
		||||
  printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
 | 
			
		||||
         (runtimeVersion % 100) / 10);
 | 
			
		||||
 | 
			
		||||
  // Min spec is SM 1.0 devices
 | 
			
		||||
  bool bVal = checkCudaCapabilities(1, 0);
 | 
			
		||||
  return bVal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  printf("%s Starting...\n\n", argv[0]);
 | 
			
		||||
 | 
			
		||||
  try {
 | 
			
		||||
    std::string sFilename;
 | 
			
		||||
    char *filePath;
 | 
			
		||||
    NppStreamContext nppStreamCtx;
 | 
			
		||||
    nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
 | 
			
		||||
 | 
			
		||||
    findCudaDevice(argc, (const char **)argv);
 | 
			
		||||
 | 
			
		||||
    if (printfNPPinfo(argc, argv) == false) {
 | 
			
		||||
      exit(EXIT_SUCCESS);
 | 
			
		||||
    cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
    {
 | 
			
		||||
        printf("CUDA error: no devices supporting CUDA.\n");
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const NppLibraryVersion *libVer   = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
 | 
			
		||||
 | 
			
		||||
    int driverVersion, runtimeVersion;
 | 
			
		||||
    cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
    cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
    printf("CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
 | 
			
		||||
    printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
 | 
			
		||||
 | 
			
		||||
    cudaDeviceProp oDeviceProperties;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
 | 
			
		||||
    nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
 | 
			
		||||
    nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
 | 
			
		||||
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
 | 
			
		||||
    } else {
 | 
			
		||||
@ -154,10 +171,10 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
 | 
			
		||||
 | 
			
		||||
    // run box filter
 | 
			
		||||
    NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
 | 
			
		||||
        oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
 | 
			
		||||
        NPP_BORDER_REPLICATE));
 | 
			
		||||
        NPP_BORDER_REPLICATE, nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    // declare a host image for the result
 | 
			
		||||
    npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
 | 
			
		||||
 | 
			
		||||
@ -68,26 +68,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
 | 
			
		||||
  return dev;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool printfNPPinfo(int argc, char *argv[]) {
 | 
			
		||||
  const NppLibraryVersion *libVer = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
 | 
			
		||||
         libVer->build);
 | 
			
		||||
 | 
			
		||||
  int driverVersion, runtimeVersion;
 | 
			
		||||
  cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
  cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
  printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
 | 
			
		||||
         (driverVersion % 100) / 10);
 | 
			
		||||
  printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
 | 
			
		||||
         (runtimeVersion % 100) / 10);
 | 
			
		||||
 | 
			
		||||
  // Min spec is SM 1.0 devices
 | 
			
		||||
  bool bVal = checkCudaCapabilities(1, 0);
 | 
			
		||||
  return bVal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  printf("%s Starting...\n\n", argv[0]);
 | 
			
		||||
 | 
			
		||||
@ -97,10 +77,50 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    cudaDeviceInit(argc, (const char **)argv);
 | 
			
		||||
 | 
			
		||||
    if (printfNPPinfo(argc, argv) == false) {
 | 
			
		||||
      exit(EXIT_SUCCESS);
 | 
			
		||||
    NppStreamContext nppStreamCtx;
 | 
			
		||||
    nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
 | 
			
		||||
 | 
			
		||||
    cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
    {
 | 
			
		||||
        printf("CUDA error: no devices supporting CUDA.\n");
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const NppLibraryVersion *libVer   = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
 | 
			
		||||
 | 
			
		||||
    int driverVersion, runtimeVersion;
 | 
			
		||||
    cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
    cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
    printf("CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
 | 
			
		||||
    printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
 | 
			
		||||
 | 
			
		||||
    cudaDeviceProp oDeviceProperties;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
 | 
			
		||||
    nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
 | 
			
		||||
    nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
 | 
			
		||||
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
 | 
			
		||||
    } else {
 | 
			
		||||
@ -190,11 +210,11 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    Npp16s nHighThreshold = 256;
 | 
			
		||||
 | 
			
		||||
    if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
 | 
			
		||||
      NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
 | 
			
		||||
      NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
 | 
			
		||||
          oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
 | 
			
		||||
          oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
 | 
			
		||||
          NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
 | 
			
		||||
          NPP_BORDER_REPLICATE, pScratchBufferNPP));
 | 
			
		||||
          NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // free scratch buffer memory
 | 
			
		||||
 | 
			
		||||
@ -70,25 +70,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
 | 
			
		||||
  return dev;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
 | 
			
		||||
  const NppLibraryVersion *libVer = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
 | 
			
		||||
         libVer->build);
 | 
			
		||||
 | 
			
		||||
  int driverVersion, runtimeVersion;
 | 
			
		||||
  cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
  cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
  printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
 | 
			
		||||
         (driverVersion % 100) / 10);
 | 
			
		||||
  printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
 | 
			
		||||
         (runtimeVersion % 100) / 10);
 | 
			
		||||
 | 
			
		||||
  bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
 | 
			
		||||
  return bVal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Error handler for FreeImage library.
 | 
			
		||||
//  In case this handler is invoked, it throws an NPP exception.
 | 
			
		||||
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
 | 
			
		||||
@ -157,11 +138,50 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    cudaDeviceInit(argc, (const char **)argv);
 | 
			
		||||
 | 
			
		||||
    // Min spec is SM 1.0 devices
 | 
			
		||||
    if (printfNPPinfo(argc, argv, 1, 0) == false) {
 | 
			
		||||
      exit(EXIT_SUCCESS);
 | 
			
		||||
    NppStreamContext nppStreamCtx;
 | 
			
		||||
    nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
 | 
			
		||||
 | 
			
		||||
    cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
    {
 | 
			
		||||
        printf("CUDA error: no devices supporting CUDA.\n");
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const NppLibraryVersion *libVer   = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
 | 
			
		||||
 | 
			
		||||
    int driverVersion, runtimeVersion;
 | 
			
		||||
    cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
    cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
    printf("CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
 | 
			
		||||
    printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
 | 
			
		||||
 | 
			
		||||
    cudaDeviceProp oDeviceProperties;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
 | 
			
		||||
    nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
 | 
			
		||||
    nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
 | 
			
		||||
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
 | 
			
		||||
    } else {
 | 
			
		||||
@ -260,9 +280,9 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    Npp8u *pDstImageCUDA =
 | 
			
		||||
        nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
 | 
			
		||||
    NPP_ASSERT_NOT_NULL(pDstImageCUDA);
 | 
			
		||||
    NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
 | 
			
		||||
                                       pDstImageCUDA, nDstPitchCUDA, oSizeROI,
 | 
			
		||||
                                       oMaskSize, oMaskAchnor));
 | 
			
		||||
    NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
 | 
			
		||||
                                           pDstImageCUDA, nDstPitchCUDA, oSizeROI,
 | 
			
		||||
                                           oMaskSize, oMaskAchnor, nppStreamCtx));
 | 
			
		||||
    // create the result image storage using FreeImage so we can easily
 | 
			
		||||
    // save
 | 
			
		||||
    FIBITMAP *pResultBitmap = FreeImage_Allocate(
 | 
			
		||||
 | 
			
		||||
@ -73,26 +73,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
 | 
			
		||||
  return dev;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool printfNPPinfo(int argc, char *argv[]) {
 | 
			
		||||
  const NppLibraryVersion *libVer = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
 | 
			
		||||
         libVer->build);
 | 
			
		||||
 | 
			
		||||
  int driverVersion, runtimeVersion;
 | 
			
		||||
  cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
  cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
  printf("  CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
 | 
			
		||||
         (driverVersion % 100) / 10);
 | 
			
		||||
  printf("  CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
 | 
			
		||||
         (runtimeVersion % 100) / 10);
 | 
			
		||||
 | 
			
		||||
  // Min spec is SM 1.1 devices
 | 
			
		||||
  bool bVal = checkCudaCapabilities(1, 1);
 | 
			
		||||
  return bVal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[]) {
 | 
			
		||||
  printf("%s Starting...\n\n", argv[0]);
 | 
			
		||||
 | 
			
		||||
@ -102,10 +82,50 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
 | 
			
		||||
    cudaDeviceInit(argc, (const char **)argv);
 | 
			
		||||
 | 
			
		||||
    if (printfNPPinfo(argc, argv) == false) {
 | 
			
		||||
      exit(EXIT_SUCCESS);
 | 
			
		||||
    NppStreamContext nppStreamCtx;
 | 
			
		||||
    nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
 | 
			
		||||
 | 
			
		||||
    cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
    {
 | 
			
		||||
        printf("CUDA error: no devices supporting CUDA.\n");
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const NppLibraryVersion *libVer   = nppGetLibVersion();
 | 
			
		||||
 | 
			
		||||
    printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
 | 
			
		||||
 | 
			
		||||
    int driverVersion, runtimeVersion;
 | 
			
		||||
    cudaDriverGetVersion(&driverVersion);
 | 
			
		||||
    cudaRuntimeGetVersion(&runtimeVersion);
 | 
			
		||||
 | 
			
		||||
    printf("CUDA Driver  Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
 | 
			
		||||
    printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMajor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      cudaDevAttrComputeCapabilityMinor, 
 | 
			
		||||
                                      nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
    if (cudaError != cudaSuccess)
 | 
			
		||||
        return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
 | 
			
		||||
 | 
			
		||||
    cudaDeviceProp oDeviceProperties;
 | 
			
		||||
 | 
			
		||||
    cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
 | 
			
		||||
 | 
			
		||||
    nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
 | 
			
		||||
    nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
 | 
			
		||||
    nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
 | 
			
		||||
 | 
			
		||||
    if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
 | 
			
		||||
      getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
 | 
			
		||||
    } else {
 | 
			
		||||
@ -182,8 +202,9 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
                         (int)oDeviceSrc.height()};  // full image
 | 
			
		||||
    // create device scratch buffer for nppiHistogram
 | 
			
		||||
    size_t nDeviceBufferSize;
 | 
			
		||||
    nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
 | 
			
		||||
                                          &nDeviceBufferSize);
 | 
			
		||||
    nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
 | 
			
		||||
                                              &nDeviceBufferSize,
 | 
			
		||||
                                              nppStreamCtx);
 | 
			
		||||
    Npp8u *pDeviceBuffer;
 | 
			
		||||
    NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
 | 
			
		||||
 | 
			
		||||
@ -191,9 +212,9 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
    Npp32s levelsHost[levelCount];
 | 
			
		||||
    NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
 | 
			
		||||
    // compute the histogram
 | 
			
		||||
    NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
 | 
			
		||||
        0, binCount, pDeviceBuffer));
 | 
			
		||||
        0, binCount, pDeviceBuffer, nppStreamCtx));
 | 
			
		||||
    // copy histogram and levels to host memory
 | 
			
		||||
    Npp32s histHost[binCount];
 | 
			
		||||
    NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
 | 
			
		||||
@ -254,20 +275,22 @@ int main(int argc, char *argv[]) {
 | 
			
		||||
                              sizeof(Npp32s) * (levelCount),
 | 
			
		||||
                              cudaMemcpyHostToDevice));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
 | 
			
		||||
        oDeviceDst.pitch(), oSizeROI,
 | 
			
		||||
        lutDevice,  // value and level arrays are in GPU device memory
 | 
			
		||||
        lvlsDevice, levelCount));
 | 
			
		||||
        lutDevice,  // value and level arrays are in host memory
 | 
			
		||||
        lvlsDevice, levelCount,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
 | 
			
		||||
    NPP_CHECK_CUDA(cudaFree(lutDevice));
 | 
			
		||||
    NPP_CHECK_CUDA(cudaFree(lvlsDevice));
 | 
			
		||||
#else
 | 
			
		||||
    NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
 | 
			
		||||
    NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
 | 
			
		||||
        oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
 | 
			
		||||
        oDeviceDst.pitch(), oSizeROI,
 | 
			
		||||
        lutHost,  // value and level arrays are in host memory
 | 
			
		||||
        levelsHost, levelCount));
 | 
			
		||||
        levelsHost, levelCount,
 | 
			
		||||
        nppStreamCtx));
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    // copy the result image back into the storage that contained the
 | 
			
		||||
 | 
			
		||||
@ -328,9 +328,10 @@ main( int argc, char** argv )
 | 
			
		||||
 | 
			
		||||
            int nCompressedLabelCount = 0;
 | 
			
		||||
 | 
			
		||||
            nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage], 
 | 
			
		||||
                                                            oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount, 
 | 
			
		||||
                                                            pCompressedLabelsScratchBufferDev);
 | 
			
		||||
            nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage], 
 | 
			
		||||
                                                                oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount, 
 | 
			
		||||
                                                                pCompressedLabelsScratchBufferDev,
 | 
			
		||||
                                                                nppStreamCtx);
 | 
			
		||||
 | 
			
		||||
            if (nppStatus != NPP_SUCCESS)  
 | 
			
		||||
            {
 | 
			
		||||
@ -539,6 +540,3 @@ main( int argc, char** argv )
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user