mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-02 04:41:59 +08:00
597 lines
28 KiB
C++
597 lines
28 KiB
C++
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
|
#define WINDOWS_LEAN_AND_MEAN
|
|
#define NOMINMAX
|
|
#include <windows.h>
|
|
#pragma warning(disable : 4819)
|
|
#endif
|
|
|
|
#include <fstream>
|
|
#include <helper_cuda.h>
|
|
#include <npp.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
// Note: If you want to view these images we HIGHLY recommend using imagej which is free on the internet and works on
|
|
// most platforms
|
|
// because it is one of the few image viewing apps that can display 32 bit integer image data. While it
|
|
// normalizes the data to floating point values for viewing it still provides a good representation of the
|
|
// relative brightness of each label value.
|
|
//
|
|
// The files read and written by this sample app use RAW image format, that is, only the image data itself exists
|
|
// in the files with no image format information. When viewing RAW files with imagej just enter the image size
|
|
// and bit depth values that are part of the file name when requested by imagej.
|
|
//
|
|
|
|
#define NUMBER_OF_IMAGES 3
|
|
|
|
Npp8u *pInputImageDev[NUMBER_OF_IMAGES];
|
|
Npp8u *pInputImageHost[NUMBER_OF_IMAGES];
|
|
Npp8u *pSegmentationScratchBufferDev[NUMBER_OF_IMAGES];
|
|
Npp8u *pSegmentsDev[NUMBER_OF_IMAGES];
|
|
Npp8u *pSegmentsHost[NUMBER_OF_IMAGES];
|
|
Npp32u *pSegmentLabelsOutputBufferDev[NUMBER_OF_IMAGES];
|
|
Npp32u *pSegmentLabelsOutputBufferHost[NUMBER_OF_IMAGES];
|
|
|
|
void tearDown() // Clean up and tear down
|
|
{
|
|
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
|
if (pSegmentLabelsOutputBufferDev[j] != 0)
|
|
cudaFree(pSegmentLabelsOutputBufferDev[j]);
|
|
if (pSegmentationScratchBufferDev[j] != 0)
|
|
cudaFree(pSegmentationScratchBufferDev[j]);
|
|
if (pSegmentsDev[j] != 0)
|
|
cudaFree(pSegmentsDev[j]);
|
|
if (pInputImageDev[j] != 0)
|
|
cudaFree(pInputImageDev[j]);
|
|
if (pSegmentLabelsOutputBufferHost[j] != 0)
|
|
free(pSegmentLabelsOutputBufferHost[j]);
|
|
if (pSegmentsHost[j] != 0)
|
|
free(pSegmentsHost[j]);
|
|
if (pInputImageHost[j] != 0)
|
|
free(pInputImageHost[j]);
|
|
}
|
|
}
|
|
|
|
const std::string &SegmentsOutputFile0 = "teapot_Segments_8Way_512x512_8u.raw";
|
|
const std::string &SegmentsOutputFile1 = "CT_skull_Segments_8Way_512x512_8u.raw";
|
|
const std::string &SegmentsOutputFile2 = "Rocks_Segments_8Way_512x512_8u.raw";
|
|
|
|
const std::string &SegmentBoundariesOutputFile0 = "teapot_SegmentBoundaries_8Way_512x512_8u.raw";
|
|
const std::string &SegmentBoundariesOutputFile1 = "CT_skull_SegmentBoundaries_8Way_512x512_8u.raw";
|
|
const std::string &SegmentBoundariesOutputFile2 = "Rocks_SegmentBoundaries_8Way_512x512_8u.raw";
|
|
|
|
const std::string &SegmentsWithContrastingBoundariesOutputFile0 =
|
|
"teapot_SegmentsWithContrastingBoundaries_8Way_512x512_8u.raw";
|
|
const std::string &SegmentsWithContrastingBoundariesOutputFile1 =
|
|
"CT_skull_SegmentsWithContrastingBoundaries_8Way_512x512_8u.raw";
|
|
const std::string &SegmentsWithContrastingBoundariesOutputFile2 =
|
|
"Rocks_SegmentsWithContrastingBoundaries_8Way_512x512_8u.raw";
|
|
|
|
const std::string &CompressedSegmentLabelsOutputFile0 = "teapot_CompressedSegmentLabels_8Way_512x512_32u.raw";
|
|
const std::string &CompressedSegmentLabelsOutputFile1 = "CT_skull_CompressedSegmentLabels_8Way_512x512_32u.raw";
|
|
const std::string &CompressedSegmentLabelsOutputFile2 = "Rocks_CompressedSegmentLabels_8Way_512x512_32u.raw";
|
|
|
|
int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage)
|
|
{
|
|
FILE *bmpFile;
|
|
size_t nSize;
|
|
|
|
if (nImage == 0) {
|
|
if (nWidth != 512 || nHeight != 512)
|
|
return -1;
|
|
const char *fileName = "teapot_512x512_8u_Gray.raw";
|
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
|
if (InputFile == NULL) {
|
|
printf("%s file not found.. exiting\n", fileName);
|
|
exit(EXIT_WAIVED);
|
|
}
|
|
|
|
bmpFile = fopen(InputFile, "rb");
|
|
}
|
|
else if (nImage == 1) {
|
|
if (nWidth != 512 || nHeight != 512)
|
|
return -1;
|
|
const char *fileName = "CT_skull_512x512_8u_Gray.raw";
|
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
|
if (InputFile == NULL) {
|
|
printf("%s file not found.. exiting\n", fileName);
|
|
exit(EXIT_WAIVED);
|
|
}
|
|
|
|
bmpFile = fopen(InputFile, "rb");
|
|
}
|
|
else if (nImage == 2) {
|
|
if (nWidth != 512 || nHeight != 512)
|
|
return -1;
|
|
const char *fileName = "Rocks_512x512_8u_Gray.raw";
|
|
const char *InputFile = sdkFindFilePath(fileName, ".");
|
|
if (InputFile == NULL) {
|
|
printf("%s file not found.. exiting\n", fileName);
|
|
exit(EXIT_WAIVED);
|
|
}
|
|
|
|
bmpFile = fopen(InputFile, "rb");
|
|
}
|
|
else {
|
|
printf("Input file load failed.\n");
|
|
return -1;
|
|
}
|
|
|
|
if (bmpFile == NULL) {
|
|
printf("Input file load failed.\n");
|
|
return -1;
|
|
}
|
|
nSize = fread(pImage, 1, nWidth * nHeight, bmpFile);
|
|
if (nSize < nWidth * nHeight) {
|
|
printf("Input file load failed.\n");
|
|
fclose(bmpFile);
|
|
return -1;
|
|
}
|
|
fclose(bmpFile);
|
|
|
|
printf("Input file load succeeded.\n");
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
|
|
size_t aSegmentationScratchBufferSize[NUMBER_OF_IMAGES];
|
|
int aSegmentLabelsOutputBufferSize[NUMBER_OF_IMAGES];
|
|
|
|
cudaError_t cudaError;
|
|
NppStatus nppStatus;
|
|
NppStreamContext nppStreamCtx;
|
|
FILE *bmpFile;
|
|
NppiNorm eNorm = nppiNormInf; // default to 8 way neighbor search
|
|
|
|
for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
|
|
pInputImageDev[j] = 0;
|
|
pInputImageHost[j] = 0;
|
|
pSegmentationScratchBufferDev[j] = 0;
|
|
pSegmentLabelsOutputBufferDev[j] = 0;
|
|
pSegmentLabelsOutputBufferHost[j] = 0;
|
|
pSegmentsDev[j] = 0;
|
|
pSegmentsHost[j] = 0;
|
|
}
|
|
|
|
nppStreamCtx.hStream =
|
|
0; // The NULL stream by default, set this to whatever your stream ID is if not the NULL stream.
|
|
|
|
cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
|
|
if (cudaError != cudaSuccess) {
|
|
printf("CUDA error: no devices supporting CUDA.\n");
|
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
|
}
|
|
|
|
const NppLibraryVersion *libVer = nppGetLibVersion();
|
|
|
|
printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, libVer->build);
|
|
|
|
int driverVersion, runtimeVersion;
|
|
cudaDriverGetVersion(&driverVersion);
|
|
cudaRuntimeGetVersion(&runtimeVersion);
|
|
|
|
printf("CUDA Driver Version: %d.%d\n", driverVersion / 1000, (driverVersion % 100) / 10);
|
|
printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000, (runtimeVersion % 100) / 10);
|
|
|
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
|
|
cudaDevAttrComputeCapabilityMajor,
|
|
nppStreamCtx.nCudaDeviceId);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
|
|
|
cudaError = cudaDeviceGetAttribute(&nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
|
|
cudaDevAttrComputeCapabilityMinor,
|
|
nppStreamCtx.nCudaDeviceId);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
|
|
|
|
cudaError = cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
|
|
|
|
cudaDeviceProp oDeviceProperties;
|
|
|
|
cudaError = cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
|
|
|
|
nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
|
|
nppStreamCtx.nMaxThreadsPerMultiProcessor = oDeviceProperties.maxThreadsPerMultiProcessor;
|
|
nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
|
|
nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
|
|
|
|
NppiSize oSizeROI[NUMBER_OF_IMAGES];
|
|
|
|
for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
|
|
if (nImage == 0) {
|
|
oSizeROI[nImage].width = 512;
|
|
oSizeROI[nImage].height = 512;
|
|
}
|
|
else if (nImage == 1) {
|
|
oSizeROI[nImage].width = 512;
|
|
oSizeROI[nImage].height = 512;
|
|
}
|
|
else if (nImage == 2) {
|
|
oSizeROI[nImage].width = 512;
|
|
oSizeROI[nImage].height = 512;
|
|
}
|
|
|
|
// cudaMallocPitch OR cudaMalloc can be used here, in this sample case width == pitch.
|
|
|
|
cudaError = cudaMalloc((void **)&pInputImageDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_MEMORY_ALLOCATION_ERR;
|
|
|
|
cudaError = cudaMalloc((void **)&pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_MEMORY_ALLOCATION_ERR;
|
|
|
|
pInputImageHost[nImage] =
|
|
reinterpret_cast<Npp8u *>(malloc(oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height));
|
|
pSegmentsHost[nImage] =
|
|
reinterpret_cast<Npp8u *>(malloc(oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
|
|
|
|
nppStatus = nppiSegmentWatershedGetBufferSize_8u_C1R(oSizeROI[nImage], &aSegmentationScratchBufferSize[nImage]);
|
|
|
|
cudaError = cudaMalloc((void **)&pSegmentationScratchBufferDev[nImage], aSegmentationScratchBufferSize[nImage]);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_MEMORY_ALLOCATION_ERR;
|
|
|
|
// Output label marker buffers are only needed if you want to same the generated segmentation labels, they ARE
|
|
// compatible with NPP UF generated labels. Requesting segmentation output may slightly decrease segmentation
|
|
// function performance. Regardless of the pitch of the segmentation image the segment labels output buffer
|
|
// will have a pitch of oSizeROI[nImage].width * sizeof(Npp32u).
|
|
|
|
aSegmentLabelsOutputBufferSize[nImage] = oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height;
|
|
|
|
cudaError = cudaMalloc((void **)&pSegmentLabelsOutputBufferDev[nImage], aSegmentLabelsOutputBufferSize[nImage]);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_MEMORY_ALLOCATION_ERR;
|
|
|
|
pSegmentLabelsOutputBufferHost[nImage] =
|
|
reinterpret_cast<Npp32u *>(malloc(oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
|
|
|
|
if (loadRaw8BitImage(
|
|
pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height, nImage)
|
|
== 0) {
|
|
cudaError = cudaMemcpy2DAsync(pInputImageDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pInputImageHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyHostToDevice,
|
|
nppStreamCtx.hStream);
|
|
|
|
// Make a second copy of the unaltered input image since this function works in place and we want to reuse
|
|
// the input image multiple times.
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pInputImageHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyHostToDevice,
|
|
nppStreamCtx.hStream);
|
|
|
|
nppStatus = nppiSegmentWatershed_8u_C1IR_Ctx(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pSegmentLabelsOutputBufferDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp32u),
|
|
eNorm,
|
|
NPP_WATERSHED_SEGMENT_BOUNDARIES_NONE,
|
|
oSizeROI[nImage],
|
|
pSegmentationScratchBufferDev[nImage],
|
|
nppStreamCtx);
|
|
|
|
if (nppStatus != NPP_SUCCESS) {
|
|
if (nImage == 0)
|
|
printf("Lena segments 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 1)
|
|
printf("CT skull segments 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks segments 8Way 512x512 8u failed.\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
// Now compress the label markers output to make them easier to view.
|
|
int nCompressedLabelsScratchBufferSize;
|
|
Npp8u *pCompressedLabelsScratchBufferDev;
|
|
|
|
nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(oSizeROI[nImage].width * oSizeROI[nImage].height,
|
|
&nCompressedLabelsScratchBufferSize);
|
|
if (nppStatus != NPP_NO_ERROR)
|
|
return nppStatus;
|
|
|
|
cudaError = cudaMalloc((void **)&pCompressedLabelsScratchBufferDev, nCompressedLabelsScratchBufferSize);
|
|
if (cudaError != cudaSuccess)
|
|
return NPP_MEMORY_ALLOCATION_ERR;
|
|
|
|
int nCompressedLabelCount = 0;
|
|
|
|
nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR_Ctx(pSegmentLabelsOutputBufferDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp32u),
|
|
oSizeROI[nImage],
|
|
oSizeROI[nImage].width * oSizeROI[nImage].height,
|
|
&nCompressedLabelCount,
|
|
pCompressedLabelsScratchBufferDev,
|
|
nppStreamCtx);
|
|
|
|
if (nppStatus != NPP_SUCCESS) {
|
|
if (nImage == 0)
|
|
printf("teapot_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
|
else if (nImage == 1)
|
|
printf("CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
// Copy segmented image to host
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyDeviceToHost,
|
|
nppStreamCtx.hStream);
|
|
|
|
// Copy segment labels image to host
|
|
cudaError = cudaMemcpy2DAsync(pSegmentLabelsOutputBufferHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp32u),
|
|
pSegmentLabelsOutputBufferDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp32u),
|
|
oSizeROI[nImage].width * sizeof(Npp32u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyDeviceToHost,
|
|
nppStreamCtx.hStream);
|
|
|
|
// Wait host image read backs to complete, not necessary if no need to synchronize
|
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != cudaSuccess) {
|
|
printf("Post segmentation cudaStreamSynchronize failed\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
// Free single image scratch buffer
|
|
cudaFree(pCompressedLabelsScratchBufferDev);
|
|
|
|
// Save default segments file.
|
|
if (nImage == 0)
|
|
bmpFile = fopen(SegmentsOutputFile0.c_str(), "wb");
|
|
else if (nImage == 1)
|
|
bmpFile = fopen(SegmentsOutputFile1.c_str(), "wb");
|
|
else if (nImage == 2)
|
|
bmpFile = fopen(SegmentsOutputFile2.c_str(), "wb");
|
|
|
|
if (bmpFile == NULL)
|
|
return -1;
|
|
size_t nSize = 0;
|
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
|
nSize += fwrite(
|
|
&pSegmentsHost[nImage][j * oSizeROI[nImage].width], sizeof(Npp8u), oSizeROI[nImage].width, bmpFile);
|
|
}
|
|
fclose(bmpFile);
|
|
|
|
if (nImage == 0)
|
|
printf("teapot_Segments_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 1)
|
|
printf("CT_Skull_Segments_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks_Segments_8Way_512x512_8u succeeded.\n");
|
|
|
|
// Save segment labels file.
|
|
if (nImage == 0)
|
|
bmpFile = fopen(CompressedSegmentLabelsOutputFile0.c_str(), "wb");
|
|
else if (nImage == 1)
|
|
bmpFile = fopen(CompressedSegmentLabelsOutputFile1.c_str(), "wb");
|
|
else if (nImage == 2)
|
|
bmpFile = fopen(CompressedSegmentLabelsOutputFile2.c_str(), "wb");
|
|
|
|
if (bmpFile == NULL)
|
|
return -1;
|
|
nSize = 0;
|
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
|
nSize += fwrite(&pSegmentLabelsOutputBufferHost[nImage][j * oSizeROI[nImage].width],
|
|
sizeof(Npp32u),
|
|
oSizeROI[nImage].width,
|
|
bmpFile);
|
|
}
|
|
fclose(bmpFile);
|
|
|
|
if (nImage == 0)
|
|
printf("teapot_CompressedSegmentLabels_8Way_512x512_32u succeeded.\n");
|
|
else if (nImage == 1)
|
|
printf("CT_Skull_CompressedSegmentLabels_8Way_512x512_32u succeeded.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks_CompressedSegmentLabels_8Way_512x512_32u succeeded.\n");
|
|
|
|
// Now generate a segment boundaries only output image
|
|
|
|
// Make a second copy of the unaltered input image since this function works in place and we want to reuse
|
|
// the input image multiple times.
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pInputImageHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyHostToDevice,
|
|
nppStreamCtx.hStream);
|
|
|
|
// We already generated segment labels images to skip that this time
|
|
nppStatus = nppiSegmentWatershed_8u_C1IR_Ctx(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
0,
|
|
0,
|
|
eNorm,
|
|
NPP_WATERSHED_SEGMENT_BOUNDARIES_ONLY,
|
|
oSizeROI[nImage],
|
|
pSegmentationScratchBufferDev[nImage],
|
|
nppStreamCtx);
|
|
|
|
if (nppStatus != NPP_SUCCESS) {
|
|
if (nImage == 0)
|
|
printf("Lena segment boundaries 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 1)
|
|
printf("CT skull segment boundaries 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks segment boundaries 8Way 512x512 8u failed.\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
// Copy segment boundaries image to host
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyDeviceToHost,
|
|
nppStreamCtx.hStream);
|
|
|
|
// Wait host image read backs to complete, not necessary if no need to synchronize
|
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != cudaSuccess) {
|
|
printf("Post segmentation cudaStreamSynchronize failed\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
if (nImage == 0)
|
|
bmpFile = fopen(SegmentBoundariesOutputFile0.c_str(), "wb");
|
|
else if (nImage == 1)
|
|
bmpFile = fopen(SegmentBoundariesOutputFile1.c_str(), "wb");
|
|
else if (nImage == 2)
|
|
bmpFile = fopen(SegmentBoundariesOutputFile2.c_str(), "wb");
|
|
|
|
if (bmpFile == NULL)
|
|
return -1;
|
|
nSize = 0;
|
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
|
nSize += fwrite(
|
|
&pSegmentsHost[nImage][j * oSizeROI[nImage].width], sizeof(Npp8u), oSizeROI[nImage].width, bmpFile);
|
|
}
|
|
fclose(bmpFile);
|
|
|
|
if (nImage == 0)
|
|
printf("teapot_SegmentBoundaries_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 1)
|
|
printf("CT_Skull_SegmentBoundaries_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks_SegmentBoundaries_8Way_512x512_8u succeeded.\n");
|
|
|
|
// Now generate a segmented with contrasting boundaries output image
|
|
|
|
// Make a second copy of the unaltered input image since this function works in place and we want to reuse
|
|
// the input image multiple times.
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pInputImageHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyHostToDevice,
|
|
nppStreamCtx.hStream);
|
|
|
|
// We already generated segment labels images to skip that this time
|
|
nppStatus = nppiSegmentWatershed_8u_C1IR_Ctx(pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
0,
|
|
0,
|
|
eNorm,
|
|
NPP_WATERSHED_SEGMENT_BOUNDARIES_CONTRAST,
|
|
oSizeROI[nImage],
|
|
pSegmentationScratchBufferDev[nImage],
|
|
nppStreamCtx);
|
|
|
|
if (nppStatus != NPP_SUCCESS) {
|
|
if (nImage == 0)
|
|
printf("Lena segments with contrasting boundaries 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 1)
|
|
printf("CT skull segments with contrasting boundaries 8Way 512x512 8u failed.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks segments with contrasting boundaries 8Way 512x512 8u failed.\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
// Copy segment boundaries image to host
|
|
cudaError = cudaMemcpy2DAsync(pSegmentsHost[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
pSegmentsDev[nImage],
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].width * sizeof(Npp8u),
|
|
oSizeROI[nImage].height,
|
|
cudaMemcpyDeviceToHost,
|
|
nppStreamCtx.hStream);
|
|
|
|
// Wait host image read backs to complete, not necessary if no need to synchronize
|
|
if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != cudaSuccess) {
|
|
printf("Post segmentation cudaStreamSynchronize failed\n");
|
|
tearDown();
|
|
return -1;
|
|
}
|
|
|
|
if (nImage == 0)
|
|
bmpFile = fopen(SegmentsWithContrastingBoundariesOutputFile0.c_str(), "wb");
|
|
else if (nImage == 1)
|
|
bmpFile = fopen(SegmentsWithContrastingBoundariesOutputFile1.c_str(), "wb");
|
|
else if (nImage == 2)
|
|
bmpFile = fopen(SegmentsWithContrastingBoundariesOutputFile2.c_str(), "wb");
|
|
|
|
if (bmpFile == NULL)
|
|
return -1;
|
|
nSize = 0;
|
|
for (int j = 0; j < oSizeROI[nImage].height; j++) {
|
|
nSize += fwrite(
|
|
&pSegmentsHost[nImage][j * oSizeROI[nImage].width], sizeof(Npp8u), oSizeROI[nImage].width, bmpFile);
|
|
}
|
|
fclose(bmpFile);
|
|
|
|
if (nImage == 0)
|
|
printf("teapot_SegmentsWithContrastingBoundaries_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 1)
|
|
printf("CT_Skull_SegmentsWithContrastingBoundaries_8Way_512x512_8u succeeded.\n");
|
|
else if (nImage == 2)
|
|
printf("Rocks_SegmentsWithContrastingBoundaries_8Way_512x512_8u succeeded.\n");
|
|
}
|
|
}
|
|
|
|
tearDown();
|
|
|
|
return 0;
|
|
}
|