mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-01 20:20:29 +08:00
Bug 5034785: Update all non-ctx nppi APIs to ctx APIs as per latest change on NPP
This commit is contained in:
parent
acd3a015c8
commit
7f0f63f311
@ -74,26 +74,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -104,11 +84,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
cudaDeviceReset();
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
char *filePath;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
@ -186,11 +205,11 @@ int main(int argc, char *argv[]) {
|
||||
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
|
||||
|
||||
// run Prewitt edge detection gradient vector filter
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// allocate device destination images of appropriatedly size
|
||||
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
|
||||
@ -198,13 +217,13 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), 32, oDeviceDstOutX.data(),
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
oDeviceDstOutX.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(
|
||||
oDeviceDstY.data(), oDeviceDstY.pitch(), 32, oDeviceDstOutY.data(),
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
oDeviceDstOutY.pitch(), oSizeROI, NPP_CMP_GREATER_EQ, nppStreamCtx));
|
||||
|
||||
// create host images for the results
|
||||
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
|
||||
@ -234,10 +253,10 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// copy and enlarge the original device source image and surround it with a
|
||||
// white edge (border)
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiCopyConstBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
|
||||
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize,
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255));
|
||||
oMaskSize.width / 2, oMaskSize.height / 2, 255, nppStreamCtx));
|
||||
|
||||
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of
|
||||
// the original source image in the enlarged source image
|
||||
@ -261,23 +280,25 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter bypassing border
|
||||
// control due to enlarged source image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXNoBorders.data(),
|
||||
oDeviceDstOutXNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXNoBorders.data(),
|
||||
oDeviceDstOutXNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYNoBorders.data(),
|
||||
oDeviceDstOutYNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYNoBorders.data(),
|
||||
oDeviceDstOutYNoBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
// create additional output files
|
||||
std::string sResultXNoBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYNoBordersFilename = sResultBaseFilename;
|
||||
@ -305,15 +326,17 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// diff the two 8u_C1 result images one with and one without border control
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXDiffFilename = sResultBaseFilename;
|
||||
@ -380,11 +403,11 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run Prewitt edge detection gradient vector filter to generate the left
|
||||
// side of the output image
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data(), oDeviceDstX.pitch(), oDeviceDstY.data(),
|
||||
oDeviceDstY.pitch(), 0, 0, 0, 0, oSizeROI, NPP_MASK_SIZE_3_X_3,
|
||||
nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
nppiNormL1, NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// now move the enlarged source pointer to the horizontal middle of the
|
||||
// enlarged source image and tell the function where it was moved to
|
||||
@ -401,23 +424,26 @@ int main(int argc, char *argv[]) {
|
||||
// run Prewitt edge detection gradient vector filter to generate the right
|
||||
// side of the output image adjusting the destination image pointers
|
||||
// appropriately
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R(
|
||||
NPP_CHECK_NPP(nppiGradientVectorPrewittBorder_8u16s_C1R_Ctx(
|
||||
pAdjustedSrc, oEnlargedDeviceSrc.pitch(), oEnlargedSrcSize, oSrcOffset,
|
||||
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
|
||||
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(), 0, 0, 0, 0,
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
|
||||
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE,
|
||||
nppStreamCtx));
|
||||
|
||||
// convert 16s_C1 result images to binary 8u_C1 output images using constant
|
||||
// value to adjust amount of visible detail
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXMixedBorders.data(),
|
||||
oDeviceDstOutXMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstX.data(), oDeviceDstX.pitch(),
|
||||
32, oDeviceDstOutXMixedBorders.data(),
|
||||
oDeviceDstOutXMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYMixedBorders.data(),
|
||||
oDeviceDstOutYMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ));
|
||||
NPP_CHECK_NPP(nppiCompareC_16s_C1R_Ctx(oDeviceDstY.data(), oDeviceDstY.pitch(),
|
||||
32, oDeviceDstOutYMixedBorders.data(),
|
||||
oDeviceDstOutYMixedBorders.pitch(),
|
||||
oSizeROI, NPP_CMP_GREATER_EQ,
|
||||
nppStreamCtx));
|
||||
// create additional output files
|
||||
std::string sResultXMixedBordersFilename = sResultBaseFilename;
|
||||
std::string sResultYMixedBordersFilename = sResultBaseFilename;
|
||||
@ -439,15 +465,17 @@ int main(int argc, char *argv[]) {
|
||||
// diff the original 8u_C1 result images with border control and the mixed
|
||||
// border control images, they should match (diff image will be all black)
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
|
||||
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiAbsDiff_8u_C1R_Ctx(
|
||||
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
|
||||
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI));
|
||||
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(), oSizeROI,
|
||||
nppStreamCtx));
|
||||
|
||||
// create additional output files
|
||||
std::string sResultXMixedDiffFilename = sResultBaseFilename;
|
||||
|
@ -48,39 +48,56 @@
|
||||
#include <helper_string.h>
|
||||
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
try {
|
||||
std::string sFilename;
|
||||
char *filePath;
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -154,10 +171,10 @@ int main(int argc, char *argv[]) {
|
||||
NppiPoint oAnchor = {oMaskSize.width / 2, oMaskSize.height / 2};
|
||||
|
||||
// run box filter
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiFilterBoxBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor,
|
||||
NPP_BORDER_REPLICATE));
|
||||
NPP_BORDER_REPLICATE, nppStreamCtx));
|
||||
|
||||
// declare a host image for the result
|
||||
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
|
||||
|
@ -68,26 +68,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
bool bVal = checkCudaCapabilities(1, 0);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -97,10 +77,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -190,11 +210,11 @@ int main(int argc, char *argv[]) {
|
||||
Npp16s nHighThreshold = 256;
|
||||
|
||||
if ((nBufferSize > 0) && (pScratchBufferNPP != 0)) {
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiFilterCannyBorder_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize, oSrcOffset,
|
||||
oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, NPP_FILTER_SOBEL,
|
||||
NPP_MASK_SIZE_3_X_3, nLowThreshold, nHighThreshold, nppiNormL2,
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP));
|
||||
NPP_BORDER_REPLICATE, pScratchBufferNPP, nppStreamCtx));
|
||||
}
|
||||
|
||||
// free scratch buffer memory
|
||||
|
@ -70,25 +70,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[], int cudaVerMajor, int cudaVerMinor) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
bool bVal = checkCudaCapabilities(cudaVerMajor, cudaVerMinor);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
// Error handler for FreeImage library.
|
||||
// In case this handler is invoked, it throws an NPP exception.
|
||||
extern "C" void FreeImageErrorHandler(FREE_IMAGE_FORMAT oFif,
|
||||
@ -157,11 +138,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
// Min spec is SM 1.0 devices
|
||||
if (printfNPPinfo(argc, argv, 1, 0) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -260,9 +280,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp8u *pDstImageCUDA =
|
||||
nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA);
|
||||
NPP_ASSERT_NOT_NULL(pDstImageCUDA);
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||
oMaskSize, oMaskAchnor));
|
||||
NPP_CHECK_NPP(nppiFilterBox_8u_C1R_Ctx(pSrcImageCUDA, nSrcPitchCUDA,
|
||||
pDstImageCUDA, nDstPitchCUDA, oSizeROI,
|
||||
oMaskSize, oMaskAchnor, nppStreamCtx));
|
||||
// create the result image storage using FreeImage so we can easily
|
||||
// save
|
||||
FIBITMAP *pResultBitmap = FreeImage_Allocate(
|
||||
|
@ -73,26 +73,6 @@ inline int cudaDeviceInit(int argc, const char **argv) {
|
||||
return dev;
|
||||
}
|
||||
|
||||
bool printfNPPinfo(int argc, char *argv[]) {
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
|
||||
libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
|
||||
(driverVersion % 100) / 10);
|
||||
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
|
||||
(runtimeVersion % 100) / 10);
|
||||
|
||||
// Min spec is SM 1.1 devices
|
||||
bool bVal = checkCudaCapabilities(1, 1);
|
||||
return bVal;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
printf("%s Starting...\n\n", argv[0]);
|
||||
|
||||
@ -102,10 +82,50 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
cudaDeviceInit(argc, (const char **)argv);
|
||||
|
||||
if (printfNPPinfo(argc, argv) == false) {
|
||||
exit(EXIT_SUCCESS);
|
||||
NppStreamContext nppStreamCtx;
|
||||
nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
||||
|
||||
cudaError_t cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
{
|
||||
printf("CUDA error: no devices supporting CUDA.\n");
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
}
|
||||
|
||||
const NppLibraryVersion *libVer = nppGetLibVersion();
|
||||
|
||||
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
||||
|
||||
int driverVersion, runtimeVersion;
|
||||
cudaDriverGetVersion(&driverVersion);
|
||||
cudaRuntimeGetVersion(&runtimeVersion);
|
||||
|
||||
printf("CUDA Driver Version: %d.%d\n", driverVersion/1000, (driverVersion%100)/10);
|
||||
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion/1000, (runtimeVersion%100)/10);
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
||||
cudaDevAttrComputeCapabilityMajor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
||||
cudaDevAttrComputeCapabilityMinor,
|
||||
nppStreamCtx.nCudaDeviceId);
|
||||
if (cudaError != cudaSuccess)
|
||||
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
||||
|
||||
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
||||
|
||||
cudaDeviceProp oDeviceProperties;
|
||||
|
||||
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
||||
|
||||
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
||||
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
||||
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
||||
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
||||
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
|
||||
} else {
|
||||
@ -182,8 +202,9 @@ int main(int argc, char *argv[]) {
|
||||
(int)oDeviceSrc.height()}; // full image
|
||||
// create device scratch buffer for nppiHistogram
|
||||
size_t nDeviceBufferSize;
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize);
|
||||
nppiHistogramEvenGetBufferSize_8u_C1R_Ctx(oSizeROI, levelCount,
|
||||
&nDeviceBufferSize,
|
||||
nppStreamCtx);
|
||||
Npp8u *pDeviceBuffer;
|
||||
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
|
||||
|
||||
@ -191,9 +212,9 @@ int main(int argc, char *argv[]) {
|
||||
Npp32s levelsHost[levelCount];
|
||||
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
|
||||
// compute the histogram
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
|
||||
0, binCount, pDeviceBuffer));
|
||||
0, binCount, pDeviceBuffer, nppStreamCtx));
|
||||
// copy histogram and levels to host memory
|
||||
Npp32s histHost[binCount];
|
||||
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
|
||||
@ -254,20 +275,22 @@ int main(int argc, char *argv[]) {
|
||||
sizeof(Npp32s) * (levelCount),
|
||||
cudaMemcpyHostToDevice));
|
||||
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutDevice, // value and level arrays are in GPU device memory
|
||||
lvlsDevice, levelCount));
|
||||
lutDevice, // value and level arrays are in host memory
|
||||
lvlsDevice, levelCount,
|
||||
nppStreamCtx));
|
||||
|
||||
NPP_CHECK_CUDA(cudaFree(lutDevice));
|
||||
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
|
||||
#else
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
|
||||
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R_Ctx(
|
||||
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
|
||||
oDeviceDst.pitch(), oSizeROI,
|
||||
lutHost, // value and level arrays are in host memory
|
||||
levelsHost, levelCount));
|
||||
levelsHost, levelCount,
|
||||
nppStreamCtx));
|
||||
#endif
|
||||
|
||||
// copy the result image back into the storage that contained the
|
||||
|
@ -328,9 +328,10 @@ main( int argc, char** argv )
|
||||
|
||||
int nCompressedLabelCount = 0;
|
||||
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||
pCompressedLabelsScratchBufferDev);
|
||||
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage],
|
||||
oSizeROI[nImage].width * oSizeROI[nImage].height, &nCompressedLabelCount,
|
||||
pCompressedLabelsScratchBufferDev,
|
||||
nppStreamCtx);
|
||||
|
||||
if (nppStatus != NPP_SUCCESS)
|
||||
{
|
||||
@ -539,6 +540,3 @@ main( int argc, char** argv )
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user