mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-02 05:10:31 +08:00
Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP
This commit is contained in:
parent
acd3a015c8
commit
7f0f63f311
@ -74,26 +74,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
|||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool printfNPPinfo(int argc, char *argv[]) {
|
|
||||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
||||||
|
|
||||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
|
||||||
libVer->build);
|
|
||||||
|
|
||||||
int driverVersion, runtimeVersion;
|
|
||||||
cudaDriverGetVersion(&driverVersion);
|
|
||||||
cudaRuntimeGetVersion(&runtimeVersion);
|
|
||||||
|
|
||||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
|
||||||
(driverVersion % 100) / 10);
|
|
||||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
|
||||||
(runtimeVersion % 100) / 10);
|
|
||||||
|
|
||||||
// Min spec is SM 1.0 devices
|
|
||||||
bool bVal = checkCudaCapabilities(1, 0);
|
|
||||||
return bVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
printf("%s Starting...\n\n", argv[0]);
|
printf("%s Starting...\n\n", argv[0]);
|
||||||
|
|
||||||
@ -104,11 +84,50 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
cudaDeviceInit(argc, (const char **)argv);
|
cudaDeviceInit(argc, (const char **)argv);
|
||||||
|
|
||||||
if (printfNPPinfo(argc, argv) == false) {
|
NppStreamContext nppStreamCtx;
|
||||||
cudaDeviceReset();
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
|
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
{
|
||||||
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
char *filePath;
|
char *filePath;
|
||||||
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||||
@ -186,11 +205,11 @@ int main(int argc, char *argv[]) {
|
|||||||
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
|
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
|
||||||
|
|
||||||
// run Prewitt edge detection gradient vector filter
|
// run Prewitt edge detection gradient vector filter
|
||||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||||
|
|
||||||
// allocate device destination images of appropriatedly size
|
// allocate device destination images of appropriatedly size
|
||||||
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
|
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
|
||||||
@ -198,13 +217,13 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||||
// value to adjust amount of visible detail
|
// value to adjust amount of visible detail
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||||
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
|
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
|
||||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||||
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
|
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
|
||||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||||
|
|
||||||
// create host images for the results
|
// create host images for the results
|
||||||
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
|
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
|
||||||
@ -234,10 +253,10 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// copy and enlarge the original device source image and surround it with a
|
// copy and enlarge the original device source image and surround it with a
|
||||||
// white edge (border)
|
// white edge (border)
|
||||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
|
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
|
||||||
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
|
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
|
||||||
oMaskSize.width / 2, oMaskSize.height / 2, 255));
|
oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
|
||||||
|
|
||||||
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
|
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
|
||||||
// the original source image in the enlarged source image
|
// the original source image in the enlarged source image
|
||||||
@ -261,23 +280,25 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// run Prewitt edge detection gradient vector filter bypassing border
|
// run Prewitt edge detection gradient vector filter bypassing border
|
||||||
// control due to enlarged source image
|
// control due to enlarged source image
|
||||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||||
|
|
||||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||||
// value to adjust amount of visible detail
|
// value to adjust amount of visible detail
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||||
32, oDeviceDstOutXNoBorders.data(),
|
32, oDeviceDstOutXNoBorders.data(),
|
||||||
oDeviceDstOutXNoBorders.pitch(),
|
oDeviceDstOutXNoBorders.pitch(),
|
||||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||||
32, oDeviceDstOutYNoBorders.data(),
|
32, oDeviceDstOutYNoBorders.data(),
|
||||||
oDeviceDstOutYNoBorders.pitch(),
|
oDeviceDstOutYNoBorders.pitch(),
|
||||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||||
|
nppStreamCtx));
|
||||||
// create additional output files
|
// create additional output files
|
||||||
std::string sResultXNoBordersFilename = sResultBaseFilename;
|
std::string sResultXNoBordersFilename = sResultBaseFilename;
|
||||||
std::string sResultYNoBordersFilename = sResultBaseFilename;
|
std::string sResultYNoBordersFilename = sResultBaseFilename;
|
||||||
@ -305,15 +326,17 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// diff the two 8u_C1 result images one with and one without border control
|
// diff the two 8u_C1 result images one with and one without border control
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||||
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
|
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
|
||||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||||
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
|
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
|
||||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
// create additional output files
|
// create additional output files
|
||||||
std::string sResultXDiffFilename = sResultBaseFilename;
|
std::string sResultXDiffFilename = sResultBaseFilename;
|
||||||
@ -380,11 +403,11 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
// run Prewitt edge detection gradient vector filter to generate the left
|
// run Prewitt edge detection gradient vector filter to generate the left
|
||||||
// side of the output image
|
// side of the output image
|
||||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||||
|
|
||||||
// now move the enlarged source pointer to the horizontal middle of the
|
// now move the enlarged source pointer to the horizontal middle of the
|
||||||
// enlarged source image and tell the function where it was moved to
|
// enlarged source image and tell the function where it was moved to
|
||||||
@ -401,23 +424,26 @@ int main(int argc, char *argv[]) {
|
|||||||
// run Prewitt edge detection gradient vector filter to generate the right
|
// run Prewitt edge detection gradient vector filter to generate the right
|
||||||
// side of the output image adjusting the destination image pointers
|
// side of the output image adjusting the destination image pointers
|
||||||
// appropriately
|
// appropriately
|
||||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||||
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
|
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
|
||||||
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
|
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
|
||||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
|
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||||
// value to adjust amount of visible detail
|
// value to adjust amount of visible detail
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||||
32, oDeviceDstOutXMixedBorders.data(),
|
32, oDeviceDstOutXMixedBorders.data(),
|
||||||
oDeviceDstOutXMixedBorders.pitch(),
|
oDeviceDstOutXMixedBorders.pitch(),
|
||||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||||
32, oDeviceDstOutYMixedBorders.data(),
|
32, oDeviceDstOutYMixedBorders.data(),
|
||||||
oDeviceDstOutYMixedBorders.pitch(),
|
oDeviceDstOutYMixedBorders.pitch(),
|
||||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||||
|
nppStreamCtx));
|
||||||
// create additional output files
|
// create additional output files
|
||||||
std::string sResultXMixedBordersFilename = sResultBaseFilename;
|
std::string sResultXMixedBordersFilename = sResultBaseFilename;
|
||||||
std::string sResultYMixedBordersFilename = sResultBaseFilename;
|
std::string sResultYMixedBordersFilename = sResultBaseFilename;
|
||||||
@ -439,15 +465,17 @@ int main(int argc, char *argv[]) {
|
|||||||
// diff the original 8u_C1 result images with border control and the mixed
|
// diff the original 8u_C1 result images with border control and the mixed
|
||||||
// border control images, they should match (diff image will be all black)
|
// border control images, they should match (diff image will be all black)
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||||
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
|
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
|
||||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||||
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
|
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
|
||||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
// create additional output files
|
// create additional output files
|
||||||
std::string sResultXMixedDiffFilename = sResultBaseFilename;
|
std::string sResultXMixedDiffFilename = sResultBaseFilename;
|
||||||
|
@ -48,39 +48,56 @@
|
|||||||
#include <helper_string.h>
|
#include <helper_string.h>
|
||||||
|
|
||||||
|
|
||||||
bool printfNPPinfo(int argc, char *argv[]) {
|
|
||||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
||||||
|
|
||||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
|
||||||
libVer->build);
|
|
||||||
|
|
||||||
int driverVersion, runtimeVersion;
|
|
||||||
cudaDriverGetVersion(&driverVersion);
|
|
||||||
cudaRuntimeGetVersion(&runtimeVersion);
|
|
||||||
|
|
||||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
|
||||||
(driverVersion % 100) / 10);
|
|
||||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
|
||||||
(runtimeVersion % 100) / 10);
|
|
||||||
|
|
||||||
// Min spec is SM 1.0 devices
|
|
||||||
bool bVal = checkCudaCapabilities(1, 0);
|
|
||||||
return bVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
printf("%s Starting...\n\n", argv[0]);
|
printf("%s Starting...\n\n", argv[0]);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
std::string sFilename;
|
std::string sFilename;
|
||||||
char *filePath;
|
char *filePath;
|
||||||
|
NppStreamContext nppStreamCtx;
|
||||||
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||||
|
|
||||||
findCudaDevice(argc, (const char **)argv);
|
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
if (printfNPPinfo(argc, argv) == false) {
|
{
|
||||||
exit(EXIT_SUCCESS);
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||||
} else {
|
} else {
|
||||||
@ -154,10 +171,10 @@ int main(int argc, char *argv[]) {
|
|||||||
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
|
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
|
||||||
|
|
||||||
// run box filter
|
// run box filter
|
||||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
|
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
|
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
|
||||||
NPP_BORDER_REPLICATE));
|
NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||||
|
|
||||||
// declare a host image for the result
|
// declare a host image for the result
|
||||||
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
|
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
|
||||||
|
@ -68,26 +68,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
|||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool printfNPPinfo(int argc, char *argv[]) {
|
|
||||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
||||||
|
|
||||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
|
||||||
libVer->build);
|
|
||||||
|
|
||||||
int driverVersion, runtimeVersion;
|
|
||||||
cudaDriverGetVersion(&driverVersion);
|
|
||||||
cudaRuntimeGetVersion(&runtimeVersion);
|
|
||||||
|
|
||||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
|
||||||
(driverVersion % 100) / 10);
|
|
||||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
|
||||||
(runtimeVersion % 100) / 10);
|
|
||||||
|
|
||||||
// Min spec is SM 1.0 devices
|
|
||||||
bool bVal = checkCudaCapabilities(1, 0);
|
|
||||||
return bVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
printf("%s Starting...\n\n", argv[0]);
|
printf("%s Starting...\n\n", argv[0]);
|
||||||
|
|
||||||
@ -97,10 +77,50 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
cudaDeviceInit(argc, (const char **)argv);
|
cudaDeviceInit(argc, (const char **)argv);
|
||||||
|
|
||||||
if (printfNPPinfo(argc, argv) == false) {
|
NppStreamContext nppStreamCtx;
|
||||||
exit(EXIT_SUCCESS);
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||||
|
|
||||||
|
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
{
|
||||||
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||||
} else {
|
} else {
|
||||||
@ -190,11 +210,11 @@ int main(int argc, char *argv[]) {
|
|||||||
Npp16s nHighThreshold = 256;
|
Npp16s nHighThreshold = 256;
|
||||||
|
|
||||||
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
|
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
|
||||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
|
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
|
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
|
||||||
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
|
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
|
||||||
NPP_BORDER_REPLICATE, pScratchBufferNPP));
|
NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
|
||||||
}
|
}
|
||||||
|
|
||||||
// free scratch buffer memory
|
// free scratch buffer memory
|
||||||
|
@ -70,25 +70,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
|||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
|
|
||||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
||||||
|
|
||||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
|
||||||
libVer->build);
|
|
||||||
|
|
||||||
int driverVersion, runtimeVersion;
|
|
||||||
cudaDriverGetVersion(&driverVersion);
|
|
||||||
cudaRuntimeGetVersion(&runtimeVersion);
|
|
||||||
|
|
||||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
|
||||||
(driverVersion % 100) / 10);
|
|
||||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
|
||||||
(runtimeVersion % 100) / 10);
|
|
||||||
|
|
||||||
bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
|
|
||||||
return bVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Error handler for FreeImage library.
|
// Error handler for FreeImage library.
|
||||||
// In case this handler is invoked, it throws an NPP exception.
|
// In case this handler is invoked, it throws an NPP exception.
|
||||||
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
|
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
|
||||||
@ -157,11 +138,50 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
cudaDeviceInit(argc, (const char **)argv);
|
cudaDeviceInit(argc, (const char **)argv);
|
||||||
|
|
||||||
// Min spec is SM 1.0 devices
|
NppStreamContext nppStreamCtx;
|
||||||
if (printfNPPinfo(argc, argv, 1, 0) == false) {
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
|
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
{
|
||||||
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||||
} else {
|
} else {
|
||||||
@ -260,9 +280,9 @@ int main(int argc, char *argv[]) {
|
|||||||
Npp8u *pDstImageCUDA =
|
Npp8u *pDstImageCUDA =
|
||||||
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
|
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
|
||||||
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
|
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
|
||||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
|
NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
|
||||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||||
oMaskSize, oMaskAchnor));
|
oMaskSize, oMaskAchnor, nppStreamCtx));
|
||||||
// create the result image storage using FreeImage so we can easily
|
// create the result image storage using FreeImage so we can easily
|
||||||
// save
|
// save
|
||||||
FIBITMAP *pResultBitmap = FreeImage_Allocate(
|
FIBITMAP *pResultBitmap = FreeImage_Allocate(
|
||||||
|
@ -73,26 +73,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
|||||||
return dev;
|
return dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool printfNPPinfo(int argc, char *argv[]) {
|
|
||||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
||||||
|
|
||||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
|
||||||
libVer->build);
|
|
||||||
|
|
||||||
int driverVersion, runtimeVersion;
|
|
||||||
cudaDriverGetVersion(&driverVersion);
|
|
||||||
cudaRuntimeGetVersion(&runtimeVersion);
|
|
||||||
|
|
||||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
|
||||||
(driverVersion % 100) / 10);
|
|
||||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
|
||||||
(runtimeVersion % 100) / 10);
|
|
||||||
|
|
||||||
// Min spec is SM 1.1 devices
|
|
||||||
bool bVal = checkCudaCapabilities(1, 1);
|
|
||||||
return bVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
printf("%s Starting...\n\n", argv[0]);
|
printf("%s Starting...\n\n", argv[0]);
|
||||||
|
|
||||||
@ -102,10 +82,50 @@ int main(int argc, char *argv[]) {
|
|||||||
|
|
||||||
cudaDeviceInit(argc, (const char **)argv);
|
cudaDeviceInit(argc, (const char **)argv);
|
||||||
|
|
||||||
if (printfNPPinfo(argc, argv) == false) {
|
NppStreamContext nppStreamCtx;
|
||||||
exit(EXIT_SUCCESS);
|
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||||
|
|
||||||
|
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
{
|
||||||
|
printf("CUDA error: no devices supporting CUDA.\n");
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||||
|
|
||||||
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||||
|
|
||||||
|
int driverVersion, runtimeVersion;
|
||||||
|
cudaDriverGetVersion(&driverVersion);
|
||||||
|
cudaRuntimeGetVersion(&runtimeVersion);
|
||||||
|
|
||||||
|
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||||
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||||
|
cudaDevAttrComputeCapabilityMajor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||||
|
cudaDevAttrComputeCapabilityMinor,
|
||||||
|
nppStreamCtx.nCudaDeviceId);
|
||||||
|
if (cudaError != cudaSuccess)
|
||||||
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||||
|
|
||||||
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||||
|
|
||||||
|
cudaDeviceProp oDeviceProperties;
|
||||||
|
|
||||||
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||||
|
|
||||||
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||||
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||||
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||||
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||||
|
|
||||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||||
} else {
|
} else {
|
||||||
@ -182,8 +202,9 @@ int main(int argc, char *argv[]) {
|
|||||||
(int)oDeviceSrc.height()}; // full image
|
(int)oDeviceSrc.height()}; // full image
|
||||||
// create device scratch buffer for nppiHistogram
|
// create device scratch buffer for nppiHistogram
|
||||||
size_t nDeviceBufferSize;
|
size_t nDeviceBufferSize;
|
||||||
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
|
nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
|
||||||
&nDeviceBufferSize);
|
&nDeviceBufferSize,
|
||||||
|
nppStreamCtx);
|
||||||
Npp8u *pDeviceBuffer;
|
Npp8u *pDeviceBuffer;
|
||||||
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
|
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
|
||||||
|
|
||||||
@ -191,9 +212,9 @@ int main(int argc, char *argv[]) {
|
|||||||
Npp32s levelsHost[levelCount];
|
Npp32s levelsHost[levelCount];
|
||||||
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
|
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
|
||||||
// compute the histogram
|
// compute the histogram
|
||||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
|
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
|
||||||
0, binCount, pDeviceBuffer));
|
0, binCount, pDeviceBuffer, nppStreamCtx));
|
||||||
// copy histogram and levels to host memory
|
// copy histogram and levels to host memory
|
||||||
Npp32s histHost[binCount];
|
Npp32s histHost[binCount];
|
||||||
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
|
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
|
||||||
@ -254,20 +275,22 @@ int main(int argc, char *argv[]) {
|
|||||||
sizeof(Npp32s) * (levelCount),
|
sizeof(Npp32s) * (levelCount),
|
||||||
cudaMemcpyHostToDevice));
|
cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||||
oDeviceDst.pitch(), oSizeROI,
|
oDeviceDst.pitch(), oSizeROI,
|
||||||
lutDevice, // value and level arrays are in GPU device memory
|
lutDevice, // value and level arrays are in host memory
|
||||||
lvlsDevice, levelCount));
|
lvlsDevice, levelCount,
|
||||||
|
nppStreamCtx));
|
||||||
|
|
||||||
NPP_CHECK_CUDA(cudaFree(lutDevice));
|
NPP_CHECK_CUDA(cudaFree(lutDevice));
|
||||||
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
|
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
|
||||||
#else
|
#else
|
||||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||||
oDeviceDst.pitch(), oSizeROI,
|
oDeviceDst.pitch(), oSizeROI,
|
||||||
lutHost, // value and level arrays are in host memory
|
lutHost, // value and level arrays are in host memory
|
||||||
levelsHost, levelCount));
|
levelsHost, levelCount,
|
||||||
|
nppStreamCtx));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// copy the result image back into the storage that contained the
|
// copy the result image back into the storage that contained the
|
||||||
|
@ -328,9 +328,10 @@ main( int argc, char** argv )
|
|||||||
|
|
||||||
int nCompressedLabelCount = 0;
|
int nCompressedLabelCount = 0;
|
||||||
|
|
||||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||||
pCompressedLabelsScratchBufferDev);
|
pCompressedLabelsScratchBufferDev,
|
||||||
|
nppStreamCtx);
|
||||||
|
|
||||||
if (nppStatus != NPP_SUCCESS)
|
if (nppStatus != NPP_SUCCESS)
|
||||||
{
|
{
|
||||||
@ -539,6 +540,3 @@ main( int argc, char** argv )
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user