cuda-samples/Samples/4_CUDA_Libraries/histEqualizationNPP/histEqualizationNPP.cpp
2022-01-13 11:35:24 +05:30

301 lines
9.5 KiB
C++

/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#pragma warning(disable : 4819)
#define WINDOWS_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#endif
#include <Exceptions.h>
#include <ImageIO.h>
#include <ImagesCPU.h>
#include <ImagesNPP.h>
#include <helper_cuda.h>
#include <npp.h>
#include <string.h>
#include <fstream>
#include <iostream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#define STRCASECMP _stricmp
#define STRNCASECMP _strnicmp
#else
#define STRCASECMP strcasecmp
#define STRNCASECMP strncasecmp
#endif
inline int cudaDeviceInit(int argc, const char **argv) {
int deviceCount;
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0) {
std::cerr << "CUDA error: no devices supporting CUDA." << std::endl;
exit(EXIT_FAILURE);
}
int dev = findCudaDevice(argc, argv);
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
std::cerr << "cudaSetDevice GPU" << dev << " = " << deviceProp.name
<< std::endl;
checkCudaErrors(cudaSetDevice(dev));
return dev;
}
bool printfNPPinfo(int argc, char *argv[]) {
const NppLibraryVersion *libVer = nppGetLibVersion();
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
libVer->build);
int driverVersion, runtimeVersion;
cudaDriverGetVersion(&driverVersion);
cudaRuntimeGetVersion(&runtimeVersion);
printf(" CUDA Driver Version: %d.%d\n", driverVersion / 1000,
(driverVersion % 100) / 10);
printf(" CUDA Runtime Version: %d.%d\n", runtimeVersion / 1000,
(runtimeVersion % 100) / 10);
// Min spec is SM 1.1 devices
bool bVal = checkCudaCapabilities(1, 1);
return bVal;
}
int main(int argc, char *argv[]) {
printf("%s Starting...\n\n", argv[0]);
try {
std::string sFilename;
char *filePath;
cudaDeviceInit(argc, (const char **)argv);
if (printfNPPinfo(argc, argv) == false) {
exit(EXIT_SUCCESS);
}
if (checkCmdLineFlag(argc, (const char **)argv, "input")) {
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
} else {
filePath = sdkFindFilePath("teapot512.pgm", argv[0]);
}
if (filePath) {
sFilename = filePath;
} else {
sFilename = "teapot512.pgm";
}
// if we specify the filename at the command line, then we only test
// sFilename.
int file_errors = 0;
std::ifstream infile(sFilename.data(), std::ifstream::in);
if (infile.good()) {
std::cout << "histEqualizationNPP opened: <" << sFilename.data()
<< "> successfully!" << std::endl;
file_errors = 0;
infile.close();
} else {
std::cout << "histEqualizationNPP unable to open: <" << sFilename.data()
<< ">" << std::endl;
file_errors++;
infile.close();
}
if (file_errors > 0) {
exit(EXIT_FAILURE);
}
std::string dstFileName = sFilename;
std::string::size_type dot = dstFileName.rfind('.');
if (dot != std::string::npos) {
dstFileName = dstFileName.substr(0, dot);
}
dstFileName += "_histEqualization.pgm";
if (checkCmdLineFlag(argc, (const char **)argv, "output")) {
char *outputFilePath;
getCmdLineArgumentString(argc, (const char **)argv, "output",
&outputFilePath);
dstFileName = outputFilePath;
}
npp::ImageCPU_8u_C1 oHostSrc;
npp::loadImage(sFilename, oHostSrc);
npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc);
//
// allocate arrays for histogram and levels
//
const int binCount = 255;
const int levelCount = binCount + 1; // levels array has one more element
Npp32s *histDevice = 0;
Npp32s *levelsDevice = 0;
NPP_CHECK_CUDA(cudaMalloc((void **)&histDevice, binCount * sizeof(Npp32s)));
NPP_CHECK_CUDA(
cudaMalloc((void **)&levelsDevice, levelCount * sizeof(Npp32s)));
//
// compute histogram
//
NppiSize oSizeROI = {(int)oDeviceSrc.width(),
(int)oDeviceSrc.height()}; // full image
// create device scratch buffer for nppiHistogram
int nDeviceBufferSize;
nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount,
&nDeviceBufferSize);
Npp8u *pDeviceBuffer;
NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize));
// compute levels values on host
Npp32s levelsHost[levelCount];
NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount));
// compute the histogram
NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount,
0, binCount, pDeviceBuffer));
// copy histogram and levels to host memory
Npp32s histHost[binCount];
NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s),
cudaMemcpyDeviceToHost));
Npp32s lutHost[levelCount];
// fill LUT
{
Npp32s *pHostHistogram = histHost;
Npp32s totalSum = 0;
for (; pHostHistogram < histHost + binCount; ++pHostHistogram) {
totalSum += *pHostHistogram;
}
NPP_ASSERT(totalSum <= oSizeROI.width * oSizeROI.height);
if (totalSum == 0) {
totalSum = 1;
}
float multiplier = 1.0f / float(oSizeROI.width * oSizeROI.height) * 0xFF;
Npp32s runningSum = 0;
Npp32s *pLookupTable = lutHost;
for (pHostHistogram = histHost; pHostHistogram < histHost + binCount;
++pHostHistogram) {
*pLookupTable = (Npp32s)(runningSum * multiplier + 0.5f);
pLookupTable++;
runningSum += *pHostHistogram;
}
lutHost[binCount] = 0xFF; // last element is always 1
}
//
// apply LUT transformation to the image
//
// Create a device image for the result.
npp::ImageNPP_8u_C1 oDeviceDst(oDeviceSrc.size());
#if CUDART_VERSION >= 5000
// Note for CUDA 5.0, that nppiLUT_Linear_8u_C1R requires these pointers to
// be in GPU device memory
Npp32s *lutDevice = 0;
Npp32s *lvlsDevice = 0;
NPP_CHECK_CUDA(
cudaMalloc((void **)&lutDevice, sizeof(Npp32s) * (levelCount)));
NPP_CHECK_CUDA(
cudaMalloc((void **)&lvlsDevice, sizeof(Npp32s) * (levelCount)));
NPP_CHECK_CUDA(cudaMemcpy(lutDevice, lutHost, sizeof(Npp32s) * (levelCount),
cudaMemcpyHostToDevice));
NPP_CHECK_CUDA(cudaMemcpy(lvlsDevice, levelsHost,
sizeof(Npp32s) * (levelCount),
cudaMemcpyHostToDevice));
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
oDeviceDst.pitch(), oSizeROI,
lutDevice, // value and level arrays are in GPU device memory
lvlsDevice, levelCount));
NPP_CHECK_CUDA(cudaFree(lutDevice));
NPP_CHECK_CUDA(cudaFree(lvlsDevice));
#else
NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(
oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(),
oDeviceDst.pitch(), oSizeROI,
lutHost, // value and level arrays are in host memory
levelsHost, levelCount));
#endif
// copy the result image back into the storage that contained the
// input image
npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size());
oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch());
cudaFree(histDevice);
cudaFree(levelsDevice);
cudaFree(pDeviceBuffer);
nppiFree(oDeviceSrc.data());
nppiFree(oDeviceDst.data());
// save the result
npp::saveImage(dstFileName.c_str(), oHostDst);
std::cout << "Saved image file " << dstFileName << std::endl;
exit(EXIT_SUCCESS);
} catch (npp::Exception &rException) {
std::cerr << "Program error! The following exception occurred: \n";
std::cerr << rException << std::endl;
std::cerr << "Aborting." << std::endl;
exit(EXIT_FAILURE);
} catch (...) {
std::cerr << "Program error! An unknow type of exception occurred. \n";
std::cerr << "Aborting." << std::endl;
exit(EXIT_FAILURE);
}
return 0;
}