621 lines
24 KiB
C++

/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cudaNvSci.h"
#include <condition_variable>
#include <cuda.h>
#include <iostream>
#include <thread>
std::mutex m_mutex;
std::condition_variable m_condVar;
bool workSubmitted = false;
class cudaNvSciSignal
{
private:
NvSciSyncModule m_syncModule;
NvSciBufModule m_bufModule;
NvSciSyncAttrList m_syncAttrList;
NvSciSyncFence *m_fence;
NvSciBufAttrList m_rawBufAttrList;
NvSciBufAttrList m_imageBufAttrList;
NvSciBufAttrList m_buffAttrListOut[2];
NvSciBufAttrKeyValuePair pairArrayOut[10];
cudaExternalMemory_t extMemRawBuf, extMemImageBuf;
cudaMipmappedArray_t d_mipmapArray;
cudaArray_t d_mipLevelArray;
cudaTextureObject_t texObject;
cudaExternalSemaphore_t signalSem;
cudaStream_t streamToRun;
int m_cudaDeviceId;
CUuuid m_devUUID;
uint64_t m_imageWidth;
uint64_t m_imageHeight;
void *d_outputBuf;
size_t m_bufSize;
public:
cudaNvSciSignal(NvSciBufModule bufModule,
NvSciSyncModule syncModule,
int cudaDeviceId,
int bufSize,
uint64_t imageWidth,
uint64_t imageHeight,
NvSciSyncFence *fence)
: m_syncModule(syncModule)
, m_bufModule(bufModule)
, m_cudaDeviceId(cudaDeviceId)
, d_outputBuf(NULL)
, m_bufSize(bufSize)
, m_imageWidth(imageWidth)
, m_imageHeight(imageHeight)
, m_fence(fence)
{
initCuda();
checkNvSciErrors(NvSciSyncAttrListCreate(m_syncModule, &m_syncAttrList));
checkNvSciErrors(NvSciBufAttrListCreate(m_bufModule, &m_rawBufAttrList));
checkNvSciErrors(NvSciBufAttrListCreate(m_bufModule, &m_imageBufAttrList));
setRawBufAttrList(m_bufSize);
setImageBufAttrList(m_imageWidth, m_imageHeight);
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(m_syncAttrList, m_cudaDeviceId, cudaNvSciSyncAttrSignal));
}
~cudaNvSciSignal()
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkCudaErrors(cudaFreeMipmappedArray(d_mipmapArray));
checkCudaErrors(cudaFree(d_outputBuf));
checkCudaErrors(cudaDestroyExternalSemaphore(signalSem));
checkCudaErrors(cudaDestroyExternalMemory(extMemRawBuf));
checkCudaErrors(cudaDestroyExternalMemory(extMemImageBuf));
checkCudaErrors(cudaDestroyTextureObject(texObject));
checkCudaErrors(cudaStreamDestroy(streamToRun));
}
void initCuda()
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkCudaErrors(cudaStreamCreateWithFlags(&streamToRun, cudaStreamNonBlocking));
int major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, m_cudaDeviceId));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, m_cudaDeviceId));
printf("[cudaNvSciSignal] GPU Device %d: \"%s\" with compute capability "
"%d.%d\n\n",
m_cudaDeviceId,
_ConvertSMVer2ArchName(major, minor),
major,
minor);
#ifdef cuDeviceGetUuid_v2
CUresult res = cuDeviceGetUuid_v2(&m_devUUID, m_cudaDeviceId);
#else
CUresult res = cuDeviceGetUuid(&m_devUUID, m_cudaDeviceId);
#endif
if (res != CUDA_SUCCESS) {
fprintf(stderr, "Driver API error = %04d \n", res);
exit(EXIT_FAILURE);
}
}
void setRawBufAttrList(uint64_t size)
{
NvSciBufType bufType = NvSciBufType_RawBuffer;
bool cpuAccess = false;
NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair rawBufAttrs[] = {
{NvSciBufRawBufferAttrKey_Size, &size, sizeof(size)},
{NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType)},
{NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccess, sizeof(cpuAccess)},
{NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm)},
{NvSciBufGeneralAttrKey_GpuId, &m_devUUID, sizeof(m_devUUID)},
};
checkNvSciErrors(NvSciBufAttrListSetAttrs(
m_rawBufAttrList, rawBufAttrs, sizeof(rawBufAttrs) / sizeof(NvSciBufAttrKeyValuePair)));
}
void setImageBufAttrList(uint32_t width, uint32_t height)
{
NvSciBufType bufType = NvSciBufType_Image;
NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType;
NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
uint32_t planeCount = 1;
uint32_t planeWidths[] = {width};
uint32_t planeHeights[] = {height};
uint64_t lrpad = 0, tbpad = 100;
bool cpuAccessFlag = false;
NvSciBufAttrValColorFmt planecolorfmts[] = {NvSciColor_B8G8R8A8};
NvSciBufAttrValColorStd planecolorstds[] = {NvSciColorStd_SRGB};
NvSciBufAttrValImageScanType planescantype[] = {NvSciBufScan_InterlaceType};
NvSciBufAttrKeyValuePair imgBufAttrs[] = {
{NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType)},
{NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount)},
{NvSciBufImageAttrKey_Layout, &layout, sizeof(layout)},
{NvSciBufImageAttrKey_TopPadding, &tbpad, sizeof(tbpad)},
{NvSciBufImageAttrKey_BottomPadding, &tbpad, sizeof(tbpad)},
{NvSciBufImageAttrKey_LeftPadding, &lrpad, sizeof(lrpad)},
{NvSciBufImageAttrKey_RightPadding, &lrpad, sizeof(lrpad)},
{NvSciBufImageAttrKey_PlaneColorFormat, planecolorfmts, sizeof(planecolorfmts)},
{NvSciBufImageAttrKey_PlaneColorStd, planecolorstds, sizeof(planecolorstds)},
{NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths)},
{NvSciBufImageAttrKey_PlaneHeight, planeHeights, sizeof(planeHeights)},
{NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag, sizeof(cpuAccessFlag)},
{NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm)},
{NvSciBufImageAttrKey_PlaneScanType, planescantype, sizeof(planescantype)},
{NvSciBufGeneralAttrKey_GpuId, &m_devUUID, sizeof(m_devUUID)},
};
checkNvSciErrors(NvSciBufAttrListSetAttrs(
m_imageBufAttrList, imgBufAttrs, sizeof(imgBufAttrs) / sizeof(NvSciBufAttrKeyValuePair)));
}
NvSciSyncAttrList getNvSciSyncAttrList() { return m_syncAttrList; }
NvSciBufAttrList getNvSciRawBufAttrList() { return m_rawBufAttrList; }
NvSciBufAttrList getNvSciImageBufAttrList() { return m_imageBufAttrList; }
void runRotateImageAndSignal(unsigned char *imageData)
{
int numOfGPUs = 0;
checkCudaErrors(cudaGetDeviceCount(&numOfGPUs)); // For cuda init purpose
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
copyDataToImageArray(imageData);
createTexture();
float angle = 0.5f; // angle to rotate image by (in radians)
rotateKernel(texObject, angle, (unsigned int *)d_outputBuf, m_imageWidth, m_imageHeight, streamToRun);
signalExternalSemaphore();
}
void cudaImportNvSciSemaphore(NvSciSyncObj syncObj)
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
checkCudaErrors(cudaImportExternalSemaphore(&signalSem, &extSemDesc));
}
void signalExternalSemaphore()
{
cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
signalParams.params.nvSciSync.fence = (void *)m_fence;
signalParams.flags = 0;
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, 1, streamToRun));
}
void cudaImportNvSciRawBuf(NvSciBufObj inputBufObj)
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &m_buffAttrListOut[0]));
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
pairArrayOut[0].key = NvSciBufRawBufferAttrKey_Size;
checkNvSciErrors(NvSciBufAttrListGetAttrs(m_buffAttrListOut[0], pairArrayOut, 1));
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(cudaImportExternalMemory(&extMemRawBuf, &memHandleDesc));
cudaExternalMemoryBufferDesc bufferDesc;
memset(&bufferDesc, 0, sizeof(bufferDesc));
bufferDesc.offset = 0;
bufferDesc.size = size;
m_bufSize = size;
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&d_outputBuf, extMemRawBuf, &bufferDesc));
}
void cudaImportNvSciImage(NvSciBufObj inputBufObj)
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &m_buffAttrListOut[1]));
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
pairArrayOut[0].key = NvSciBufImageAttrKey_Size;
pairArrayOut[1].key = NvSciBufImageAttrKey_Alignment;
pairArrayOut[2].key = NvSciBufImageAttrKey_PlaneCount;
pairArrayOut[3].key = NvSciBufImageAttrKey_PlaneWidth;
pairArrayOut[4].key = NvSciBufImageAttrKey_PlaneHeight;
checkNvSciErrors(NvSciBufAttrListGetAttrs(m_buffAttrListOut[1], pairArrayOut, 5));
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
uint64_t alignment = *(uint64_t *)pairArrayOut[1].value;
uint64_t planeCount = *(uint64_t *)pairArrayOut[2].value;
uint64_t imageWidth = *(uint64_t *)pairArrayOut[3].value;
uint64_t imageHeight = *(uint64_t *)pairArrayOut[4].value;
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(cudaImportExternalMemory(&extMemImageBuf, &memHandleDesc));
cudaExtent extent = {};
memset(&extent, 0, sizeof(extent));
extent.width = imageWidth;
extent.height = imageHeight;
extent.depth = 0;
cudaChannelFormatDesc desc;
desc.x = 8;
desc.y = 8;
desc.z = 8;
desc.w = 8;
desc.f = cudaChannelFormatKindUnsigned;
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
mipmapDesc.offset = 0;
mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent;
mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&d_mipmapArray, extMemImageBuf, &mipmapDesc));
}
void copyDataToImageArray(unsigned char *imageData)
{
uint32_t mipLevelId = 0;
checkCudaErrors(cudaGetMipmappedArrayLevel(&d_mipLevelArray, d_mipmapArray, mipLevelId));
checkCudaErrors(cudaMemcpy2DToArrayAsync(d_mipLevelArray,
0,
0,
imageData,
m_imageWidth * sizeof(unsigned int),
m_imageWidth * sizeof(unsigned int),
m_imageHeight,
cudaMemcpyHostToDevice,
streamToRun));
}
void createTexture()
{
cudaResourceDesc texRes;
memset(&texRes, 0, sizeof(cudaResourceDesc));
texRes.resType = cudaResourceTypeArray;
texRes.res.array.array = d_mipLevelArray;
cudaTextureDesc texDescr;
memset(&texDescr, 0, sizeof(cudaTextureDesc));
texDescr.normalizedCoords = true;
texDescr.filterMode = cudaFilterModeLinear;
texDescr.addressMode[0] = cudaAddressModeWrap;
texDescr.addressMode[1] = cudaAddressModeWrap;
texDescr.readMode = cudaReadModeNormalizedFloat;
checkCudaErrors(cudaCreateTextureObject(&texObject, &texRes, &texDescr, NULL));
}
};
class cudaNvSciWait
{
private:
NvSciSyncModule m_syncModule;
NvSciBufModule m_bufModule;
NvSciSyncAttrList m_syncAttrList;
NvSciBufAttrList m_rawBufAttrList;
NvSciBufAttrList m_buffAttrListOut;
NvSciBufAttrKeyValuePair pairArrayOut[10];
NvSciSyncFence *m_fence;
cudaExternalMemory_t extMemRawBuf;
cudaExternalSemaphore_t waitSem;
cudaStream_t streamToRun;
int m_cudaDeviceId;
CUuuid m_devUUID;
void *d_outputBuf;
size_t m_bufSize;
size_t imageWidth;
size_t imageHeight;
public:
cudaNvSciWait(NvSciBufModule bufModule,
NvSciSyncModule syncModule,
int cudaDeviceId,
int bufSize,
NvSciSyncFence *fence)
: m_bufModule(bufModule)
, m_syncModule(syncModule)
, m_cudaDeviceId(cudaDeviceId)
, m_bufSize(bufSize)
, m_fence(fence)
{
initCuda();
checkNvSciErrors(NvSciSyncAttrListCreate(m_syncModule, &m_syncAttrList));
checkNvSciErrors(NvSciBufAttrListCreate(m_bufModule, &m_rawBufAttrList));
setRawBufAttrList(m_bufSize);
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(m_syncAttrList, m_cudaDeviceId, cudaNvSciSyncAttrWait));
}
~cudaNvSciWait()
{
checkCudaErrors(cudaStreamDestroy(streamToRun));
checkCudaErrors(cudaDestroyExternalSemaphore(waitSem));
checkCudaErrors(cudaDestroyExternalMemory(extMemRawBuf));
checkCudaErrors(cudaFree(d_outputBuf));
}
void initCuda()
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkCudaErrors(cudaStreamCreateWithFlags(&streamToRun, cudaStreamNonBlocking));
#ifdef cuDeviceGetUuid_v2
CUresult res = cuDeviceGetUuid_v2(&m_devUUID, m_cudaDeviceId);
#else
CUresult res = cuDeviceGetUuid(&m_devUUID, m_cudaDeviceId);
#endif
if (res != CUDA_SUCCESS) {
fprintf(stderr, "Driver API error = %04d \n", res);
exit(EXIT_FAILURE);
}
int major = 0, minor = 0;
checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, m_cudaDeviceId));
checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, m_cudaDeviceId));
printf("[cudaNvSciWait] GPU Device %d: \"%s\" with compute capability "
"%d.%d\n\n",
m_cudaDeviceId,
_ConvertSMVer2ArchName(major, minor),
major,
minor);
}
void setRawBufAttrList(uint64_t size)
{
NvSciBufType bufType = NvSciBufType_RawBuffer;
bool cpuAccess = false;
NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair rawBufAttrs[] = {
{NvSciBufRawBufferAttrKey_Size, &size, sizeof(size)},
{NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType)},
{NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccess, sizeof(cpuAccess)},
{NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm)},
{NvSciBufGeneralAttrKey_GpuId, &m_devUUID, sizeof(m_devUUID)},
};
checkNvSciErrors(NvSciBufAttrListSetAttrs(
m_rawBufAttrList, rawBufAttrs, sizeof(rawBufAttrs) / sizeof(NvSciBufAttrKeyValuePair)));
}
NvSciSyncAttrList getNvSciSyncAttrList() { return m_syncAttrList; }
NvSciBufAttrList getNvSciRawBufAttrList() { return m_rawBufAttrList; }
void runImageGrayscale(std::string image_filename, size_t imageWidth, size_t imageHeight)
{
int numOfGPUs = 0;
checkCudaErrors(cudaGetDeviceCount(&numOfGPUs)); // For cuda init purpose
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
waitExternalSemaphore();
launchGrayScaleKernel((unsigned int *)d_outputBuf, image_filename, imageWidth, imageHeight, streamToRun);
}
void cudaImportNvSciSemaphore(NvSciSyncObj syncObj)
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
checkCudaErrors(cudaImportExternalSemaphore(&waitSem, &extSemDesc));
}
void waitExternalSemaphore()
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
waitParams.params.nvSciSync.fence = (void *)m_fence;
waitParams.flags = 0;
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, streamToRun));
}
void cudaImportNvSciRawBuf(NvSciBufObj inputBufObj)
{
checkCudaErrors(cudaSetDevice(m_cudaDeviceId));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &m_buffAttrListOut));
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
pairArrayOut[0].key = NvSciBufRawBufferAttrKey_Size;
checkNvSciErrors(NvSciBufAttrListGetAttrs(m_buffAttrListOut, pairArrayOut, 1));
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(cudaImportExternalMemory(&extMemRawBuf, &memHandleDesc));
cudaExternalMemoryBufferDesc bufferDesc;
memset(&bufferDesc, 0, sizeof(bufferDesc));
bufferDesc.offset = 0;
bufferDesc.size = size;
m_bufSize = size;
checkCudaErrors(cudaExternalMemoryGetMappedBuffer(&d_outputBuf, extMemRawBuf, &bufferDesc));
}
};
void thread_rotateAndSignal(cudaNvSciSignal *cudaNvSciSignalObj, unsigned char *imageData)
{
std::lock_guard<std::mutex> guard(m_mutex);
cudaNvSciSignalObj->runRotateImageAndSignal(imageData);
workSubmitted = true;
m_condVar.notify_one();
}
void thread_waitAndGrayscale(cudaNvSciWait *cudaNvSciWaitObj,
std::string image_filename,
size_t imageWidth,
size_t imageHeight)
{
// Acquire the lock
std::unique_lock<std::mutex> mlock(m_mutex);
m_condVar.wait(mlock, [] { return workSubmitted; });
cudaNvSciWaitObj->runImageGrayscale(image_filename, imageWidth, imageHeight);
}
cudaNvSci::cudaNvSci(int isMultiGPU, std::vector<int> &deviceIds, unsigned char *imageData, size_t width, size_t height)
: m_isMultiGPU(isMultiGPU)
, image_data(imageData)
, imageWidth(width)
, imageHeight(height)
{
if (isMultiGPU) {
m_cudaNvSciSignalDeviceId = deviceIds[0];
m_cudaNvSciWaitDeviceId = deviceIds[1];
}
else {
m_cudaNvSciSignalDeviceId = m_cudaNvSciWaitDeviceId = deviceIds[0];
}
m_bufSize = imageWidth * imageHeight * sizeof(unsigned int);
}
void cudaNvSci::initNvSci()
{
checkNvSciErrors(NvSciSyncModuleOpen(&syncModule));
checkNvSciErrors(NvSciBufModuleOpen(&buffModule));
fence = (NvSciSyncFence *)calloc(1, sizeof(NvSciSyncFence));
}
void cudaNvSci::runCudaNvSci(std::string &image_filename)
{
initNvSci();
cudaNvSciSignal rotateAndSignal(
buffModule, syncModule, m_cudaNvSciSignalDeviceId, m_bufSize, imageWidth, imageHeight, fence);
cudaNvSciWait waitAndGrayscale(buffModule, syncModule, m_cudaNvSciWaitDeviceId, m_bufSize, fence);
rawBufUnreconciledList[0] = rotateAndSignal.getNvSciRawBufAttrList();
rawBufUnreconciledList[1] = waitAndGrayscale.getNvSciRawBufAttrList();
createNvSciRawBufObj();
imageBufUnreconciledList[0] = rotateAndSignal.getNvSciImageBufAttrList();
createNvSciBufImageObj();
rotateAndSignal.cudaImportNvSciRawBuf(rawBufObj);
rotateAndSignal.cudaImportNvSciImage(imageBufObj);
waitAndGrayscale.cudaImportNvSciRawBuf(rawBufObj);
syncUnreconciledList[0] = rotateAndSignal.getNvSciSyncAttrList();
syncUnreconciledList[1] = waitAndGrayscale.getNvSciSyncAttrList();
createNvSciSyncObj();
rotateAndSignal.cudaImportNvSciSemaphore(syncObj);
waitAndGrayscale.cudaImportNvSciSemaphore(syncObj);
std::thread rotateThread(&thread_rotateAndSignal, &rotateAndSignal, image_data);
std::thread grayscaleThread(&thread_waitAndGrayscale, &waitAndGrayscale, image_filename, imageWidth, imageHeight);
rotateThread.join();
grayscaleThread.join();
}
void cudaNvSci::createNvSciRawBufObj()
{
int numAttrList = 2;
checkNvSciErrors(
NvSciBufAttrListReconcile(rawBufUnreconciledList, numAttrList, &rawBufReconciledList, &buffConflictList));
checkNvSciErrors(NvSciBufObjAlloc(rawBufReconciledList, &rawBufObj));
printf("created NvSciBufObj\n");
}
void cudaNvSci::createNvSciBufImageObj()
{
int numAttrList = 1;
checkNvSciErrors(NvSciBufAttrListReconcile(
imageBufUnreconciledList, numAttrList, &imageBufReconciledList, &imageBufConflictList));
checkNvSciErrors(NvSciBufObjAlloc(imageBufReconciledList, &imageBufObj));
printf("created NvSciBufImageObj\n");
}
void cudaNvSci::createNvSciSyncObj()
{
int numAttrList = 2;
checkNvSciErrors(
NvSciSyncAttrListReconcile(syncUnreconciledList, numAttrList, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
printf("created NvSciSyncObj\n");
}