mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-01 20:20:29 +08:00
Revert "Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP"
This reverts commit a9869fd6eaeecc748fc5f10f4b331fa41efbdaca
This commit is contained in:
parent
a9869fd6ea
commit
acd3a015c8
@ -74,6 +74,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -84,50 +104,11 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
cudaDeviceReset();
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
char *filePath;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
@ -205,11 +186,11 @@ int main(int argc, char *argv[]) {
|
||||
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
|
||||
|
||||
// run Prewitt edge detection gradient vector filter
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
|
||||
// allocate device destination images of appropriatedly size
|
||||
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
|
||||
@ -217,13 +198,13 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
|
||||
// create host images for the results
|
||||
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
|
||||
@ -253,10 +234,10 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// copy and enlarge the original device source image and surround it with a
|
||||
// white edge (border)
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
|
||||
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255));
|
||||
|
||||
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
|
||||
// the original source image in the enlarged source image
|
||||
@ -280,25 +261,23 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter bypassing border
|
||||
// control due to enlarged source image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXNoBorders.data(),
|
||||
oDeviceDstOutXNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYNoBorders.data(),
|
||||
oDeviceDstOutYNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
// create additional output files
|
||||
std::string sResultXNoBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYNoBordersFilename = sResultBaseFilename;
|
||||
@ -326,17 +305,15 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// diff the two 8u_C1 result images one with and one without border control
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXDiffFilename = sResultBaseFilename;
|
||||
@ -403,11 +380,11 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter to generate the left
|
||||
// side of the output image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
|
||||
// now move the enlarged source pointer to the horizontal middle of the
|
||||
// enlarged source image and tell the function where it was moved to
|
||||
@ -424,26 +401,23 @@ int main(int argc, char *argv[]) {
|
||||
// run Prewitt edge detection gradient vector filter to generate the right
|
||||
// side of the output image adjusting the destination image pointers
|
||||
// appropriately
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
|
||||
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
|
||||
nppStreamCtx));
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXMixedBorders.data(),
|
||||
oDeviceDstOutXMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYMixedBorders.data(),
|
||||
oDeviceDstOutYMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
// create additional output files
|
||||
std::string sResultXMixedBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYMixedBordersFilename = sResultBaseFilename;
|
||||
@ -465,17 +439,15 @@ int main(int argc, char *argv[]) {
|
||||
// diff the original 8u_C1 result images with border control and the mixed
|
||||
// border control images, they should match (diff image will be all black)
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXMixedDiffFilename = sResultBaseFilename;
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -48,56 +48,39 @@
|
||||
#include <helper_string.h>
|
||||
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
try {
|
||||
std::string sFilename;
|
||||
char *filePath;
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -171,10 +154,10 @@ int main(int argc, char *argv[]) {
|
||||
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
|
||||
|
||||
// run box filter
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
|
||||
NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
NPP_BORDER_REPLICATE));
|
||||
|
||||
// declare a host image for the result
|
||||
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
|
||||
|
Binary file not shown.
@ -68,6 +68,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -77,50 +97,10 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -210,11 +190,11 @@ int main(int argc, char *argv[]) {
|
||||
Npp16s nHighThreshold = 256;
|
||||
|
||||
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
|
||||
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP));
|
||||
}
|
||||
|
||||
// free scratch buffer memory
|
||||
|
Binary file not shown.
@ -70,6 +70,25 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
// Error handler for FreeImage library.
|
||||
// In case this handler is invoked, it throws an NPP exception.
|
||||
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
|
||||
@ -138,50 +157,11 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
// Min spec is SM 1.0 devices
|
||||
if (printfNPPinfo(argc, argv, 1, 0) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -280,9 +260,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp8u *pDstImageCUDA =
|
||||
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
|
||||
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||
oMaskSize, oMaskAchnor, nppStreamCtx));
|
||||
oMaskSize, oMaskAchnor));
|
||||
// create the result image storage using FreeImage so we can easily
|
||||
// save
|
||||
FIBITMAP *pResultBitmap = FreeImage_Allocate(
|
||||
|
@ -73,6 +73,26 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.1 devices
|
||||
bool bVal = checkCudaCapabilities(1, 1);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -82,50 +102,10 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -202,9 +182,8 @@ int main(int argc, char *argv[]) {
|
||||
(int)oDeviceSrc.height()}; // full image
|
||||
// create device scratch buffer for nppiHistogram
|
||||
size_t nDeviceBufferSize;
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize,
|
||||
nppStreamCtx);
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize);
|
||||
Npp8u *pDeviceBuffer;
|
||||
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
|
||||
|
||||
@ -212,9 +191,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp32s levelsHost[levelCount];
|
||||
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
|
||||
// compute the histogram
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
|
||||
0, binCount, pDeviceBuffer, nppStreamCtx));
|
||||
0, binCount, pDeviceBuffer));
|
||||
// copy histogram and levels to host memory
|
||||
Npp32s histHost[binCount];
|
||||
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
|
||||
@ -275,22 +254,20 @@ int main(int argc, char *argv[]) {
|
||||
sizeof(Npp32s) * (levelCount),
|
||||
cudaMemcpyHostToDevice));
|
||||
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutDevice, // value and level arrays are in host memory
|
||||
lvlsDevice, levelCount,
|
||||
nppStreamCtx));
|
||||
lutDevice, // value and level arrays are in GPU device memory
|
||||
lvlsDevice, levelCount));
|
||||
|
||||
NPP_CHECK_CUDA(cudaFree(lutDevice));
|
||||
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
|
||||
#else
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutHost, // value and level arrays are in host memory
|
||||
levelsHost, levelCount,
|
||||
nppStreamCtx));
|
||||
levelsHost, levelCount));
|
||||
#endif
|
||||
|
||||
// copy the result image back into the storage that contained the
|
||||
|
@ -328,10 +328,9 @@ main( int argc, char** argv )
|
||||
|
||||
int nCompressedLabelCount = 0;
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||
pCompressedLabelsScratchBufferDev,
|
||||
nppStreamCtx);
|
||||
pCompressedLabelsScratchBufferDev);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS)
|
||||
{
|
||||
@ -540,3 +539,6 @@ main( int argc, char** argv )
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user