cudaNvSciNvMedia plane offset correction

This commit is contained in:
Rutwik Choughule 2021-06-10 17:33:24 +05:30
parent ba5a483c6e
commit 2aeaf51b11
7 changed files with 907 additions and 863 deletions

View File

@ -36,7 +36,6 @@
// Enable this to 1 if require cuda processed output to ppm file. // Enable this to 1 if require cuda processed output to ppm file.
#define WRITE_OUTPUT_IMAGE 0 #define WRITE_OUTPUT_IMAGE 0
#define checkNvSciErrors(call) \ #define checkNvSciErrors(call) \
do { \ do { \
NvSciError _status = call; \ NvSciError _status = call; \
@ -48,325 +47,382 @@
fflush(stdout); \ fflush(stdout); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
} \ } \
} while (0) } while (0)
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject, unsigned int *dstImage, int32_t imageWidth, int32_t imageHeight) __global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject,
{ unsigned int *dstImage,
size_t x = blockIdx.x*blockDim.x + threadIdx.x; int32_t imageWidth, int32_t imageHeight) {
size_t y = blockIdx.y*blockDim.y + threadIdx.y; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
uchar4* dstImageUchar4 = (uchar4*)dstImage; uchar4 *dstImageUchar4 = (uchar4 *)dstImage;
for ( ; x < imageWidth && y < imageHeight; x += gridDim.x*blockDim.x, y += gridDim.y*blockDim.y) for (; x < imageWidth && y < imageHeight;
{ x += gridDim.x * blockDim.x, y += gridDim.y * blockDim.y) {
int colInBytes = x * sizeof(unsigned char); int colInBytes = x * sizeof(unsigned char);
unsigned char luma = surf2Dread<unsigned char>(surfaceObject, colInBytes, y); unsigned char luma =
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0); surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
dstImageUchar4[y*imageWidth + x] = grayscalePix; dstImageUchar4[y * imageWidth + x] = grayscalePix;
} }
} }
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem, NvSciSyncObj &syncObj) static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem,
{ NvSciSyncObj &syncObj) {
cudaExternalSemaphoreHandleDesc extSemDesc; cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc)); memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync; extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = (void *)syncObj; extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc)); checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
} }
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem, NvSciSyncFence *fence, static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem,
cudaStream_t stream) { NvSciSyncFence *fence, cudaStream_t stream) {
cudaExternalSemaphoreWaitParams waitParams; cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams)); memset(&waitParams, 0, sizeof(waitParams));
// For cross-process signaler-waiter applications need to use NvSciIpc // For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence // and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process. // across process. This step is optional in single-process.
waitParams.params.nvSciSync.fence = (void *)fence; waitParams.params.nvSciSync.fence = (void *)fence;
waitParams.flags = 0; waitParams.flags = 0;
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream)); checkCudaErrors(
cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
} }
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem, NvSciSyncFence *fence, static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem,
cudaStream_t stream) { NvSciSyncFence *fence,
cudaExternalSemaphoreSignalParams signalParams; cudaStream_t stream) {
memset(&signalParams, 0, sizeof(signalParams)); cudaExternalSemaphoreSignalParams signalParams;
// For cross-process signaler-waiter applications need to use NvSciIpc memset(&signalParams, 0, sizeof(signalParams));
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence // For cross-process signaler-waiter applications need to use NvSciIpc
// across process. This step is optional in single-process. // and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
signalParams.params.nvSciSync.fence = (void *)fence; // across process. This step is optional in single-process.
signalParams.flags = 0; signalParams.params.nvSciSync.fence = (void *)fence;
signalParams.flags = 0;
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, checkCudaErrors(
1, stream)); cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, 1, stream));
} }
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop &cudaExtResObj,
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop& cudaExtResObj, int32_t imageWidth, int32_t imageHeight) int32_t imageWidth, int32_t imageHeight) {
{
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage; unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth)); checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif #endif
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1); dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage, imageWidth, imageHeight); yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(
cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage,
imageWidth, imageHeight);
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaExtResObj.stream)); checkCudaErrors(
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream)); cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage,
char outputFilename[1024]; sizeof(unsigned int) * imageHeight * imageWidth,
std::string image_filename = "Grayscale"; cudaMemcpyDeviceToHost, cudaExtResObj.stream));
strcpy(outputFilename, image_filename.c_str()); checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm"); char outputFilename[1024];
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight); std::string image_filename = "Grayscale";
printf("Wrote '%s'\n", outputFilename); strcpy(outputFilename, image_filename.c_str());
checkCudaErrors(cudaFreeHost(h_dstImage)); strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
#endif #endif
} }
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBufObj& inputBufObj) static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj,
{ NvSciBufObj &inputBufObj) {
NvSciBufModule module = NULL; NvSciBufModule module = NULL;
NvSciBufAttrList attrlist = NULL; NvSciBufAttrList attrlist = NULL;
NvSciBufAttrKeyValuePair pairArrayOut[10]; NvSciBufAttrKeyValuePair pairArrayOut[10];
checkNvSciErrors(NvSciBufModuleOpen(&module)); checkNvSciErrors(NvSciBufModuleOpen(&module));
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist)); checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist)); checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist));
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10); memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
int numAttrs = 0; int numAttrs = 0;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneOffset;
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs)); checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
uint64_t size = *(uint64_t *)pairArrayOut[0].value; uint64_t size = *(uint64_t *)pairArrayOut[0].value;
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value; uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value; cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
cudaExtResObj.imageWidth = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount); cudaExtResObj.imageWidth =
cudaExtResObj.imageHeight = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount); (int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.imageHeight =
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.planeOffset =
(uint64_t *)malloc(sizeof(uint64_t) * cudaExtResObj.planeCount);
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value, cudaExtResObj.planeCount * sizeof(int32_t)); memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value,
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value, cudaExtResObj.planeCount * sizeof(int32_t)); cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value,
cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.planeOffset, (uint64_t *)pairArrayOut[7].value,
cudaExtResObj.planeCount * sizeof(uint64_t));
NvSciBufAttrValImageLayoutType layout = *(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value; NvSciBufAttrValImageLayoutType layout =
uint32_t bitsPerPixel = *(uint32_t*)pairArrayOut[6].value; *(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
uint32_t bitsPerPixel = *(uint32_t *)pairArrayOut[6].value;
if (layout != NvSciBufImage_BlockLinearType) { if (layout != NvSciBufImage_BlockLinearType) {
printf("Image layout is not block linear.. waiving execution\n"); printf("Image layout is not block linear.. waiving execution\n");
exit(EXIT_WAIVED); exit(EXIT_WAIVED);
}
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(
cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t *)malloc(
sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
cudaExtent extent = {};
memset(&extent, 0, sizeof(extent));
extent.width = cudaExtResObj.imageWidth[i];
extent.height = cudaExtResObj.imageHeight[i];
extent.depth = 0;
cudaChannelFormatDesc desc;
switch (channelCount) {
case 1:
default:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0,
cudaChannelFormatKindUnsigned);
break;
case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0,
cudaChannelFormatKindUnsigned);
break;
case 3:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
0, cudaChannelFormatKindUnsigned);
break;
case 4:
desc =
cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
bitsPerPixel, cudaChannelFormatKindUnsigned);
break;
} }
cudaExternalMemoryHandleDesc memHandleDesc; cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
memset(&memHandleDesc, 0, sizeof(memHandleDesc)); mipmapDesc.offset = cudaExtResObj.planeOffset[i];
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf; mipmapDesc.formatDesc = desc;
memHandleDesc.handle.nvSciBufObject = inputBufObj; mipmapDesc.extent = extent;
memHandleDesc.size = size; mipmapDesc.flags = 0;
checkCudaErrors(cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc)); mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t*) malloc(sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount); &cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf,
&mipmapDesc));
for (int i = 0; i < cudaExtResObj.planeCount; i++) { }
cudaExtent extent = {};
memset(&extent, 0, sizeof(extent));
extent.width = cudaExtResObj.imageWidth[i];
extent.height = cudaExtResObj.imageHeight[i];
extent.depth = 0;
cudaChannelFormatDesc desc;
switch (channelCount) {
case 1:
default:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0, cudaChannelFormatKindUnsigned);
break;
case 3:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, 0, cudaChannelFormatKindUnsigned);
break;
case 4:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, bitsPerPixel, cudaChannelFormatKindUnsigned);
break;
}
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
mipmapDesc.offset = 0;
mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent;
mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf, &mipmapDesc));
}
} }
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) {
{ cudaResourceDesc resourceDesc;
cudaResourceDesc resourceDesc; memset(&resourceDesc, 0, sizeof(resourceDesc));
memset(&resourceDesc, 0, sizeof(resourceDesc)); resourceDesc.resType = cudaResourceTypeArray;
resourceDesc.resType = cudaResourceTypeArray; resourceDesc.res.array.array = d_mipLevelArray;
resourceDesc.res.array.array = d_mipLevelArray;
cudaSurfaceObject_t surfaceObject; cudaSurfaceObject_t surfaceObject;
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc)); checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
return surfaceObject; return surfaceObject;
} }
static cudaStream_t createCudaStream(int deviceId) static cudaStream_t createCudaStream(int deviceId) {
{ checkCudaErrors(cudaSetDevice(deviceId));
checkCudaErrors(cudaSetDevice(deviceId)); cudaStream_t stream;
cudaStream_t stream; checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); return stream;
return stream;
} }
// CUDA setup buffers/synchronization objects for interop via NvSci API. // CUDA setup buffers/synchronization objects for interop via NvSci API.
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj, void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId) NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
{ int deviceId) {
checkCudaErrors(cudaSetDevice(deviceId)); checkCudaErrors(cudaSetDevice(deviceId));
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj); cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj); cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
cudaImportNvSciImage(cudaExtResObj, inputBufObj); cudaImportNvSciImage(cudaExtResObj, inputBufObj);
cudaExtResObj.d_mipLevelArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount); cudaExtResObj.d_mipLevelArray =
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *) malloc(sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount); (cudaArray_t *)malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; ++i) { for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
uint32_t mipLevelId = 0; uint32_t mipLevelId = 0;
checkCudaErrors(cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i], cudaExtResObj.d_mipmapArray[i], mipLevelId)); checkCudaErrors(
cudaExtResObj.cudaSurfaceNvmediaBuf[i] = createCudaSurface(cudaExtResObj.d_mipLevelArray[i]); cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i],
} cudaExtResObj.d_mipmapArray[i], mipLevelId));
cudaExtResObj.cudaSurfaceNvmediaBuf[i] =
createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
}
cudaExtResObj.stream = createCudaStream(deviceId); cudaExtResObj.stream = createCudaStream(deviceId);
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage, sizeof(unsigned int) * cudaExtResObj.imageWidth[0] * cudaExtResObj.imageHeight[0])); checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage,
sizeof(unsigned int) *
cudaExtResObj.imageWidth[0] *
cudaExtResObj.imageHeight[0]));
} }
// CUDA clean up buffers used **with** NvSci API. // CUDA clean up buffers used **with** NvSci API.
void cleanupCuda(cudaExternalResInterop& cudaExtResObj) void cleanupCuda(cudaExternalResInterop &cudaExtResObj) {
{ for (int i = 0; i < cudaExtResObj.planeCount; i++) {
for (int i=0; i < cudaExtResObj.planeCount; i++) { checkCudaErrors(
checkCudaErrors(cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i])); cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i])); checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
} }
free(cudaExtResObj.d_mipmapArray); free(cudaExtResObj.d_mipmapArray);
free(cudaExtResObj.d_mipLevelArray); free(cudaExtResObj.d_mipLevelArray);
free(cudaExtResObj.cudaSurfaceNvmediaBuf); free(cudaExtResObj.cudaSurfaceNvmediaBuf);
free(cudaExtResObj.imageWidth); free(cudaExtResObj.imageWidth);
free(cudaExtResObj.imageHeight); free(cudaExtResObj.imageHeight);
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem)); checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem));
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem)); checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem));
checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf)); checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf));
checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream)); checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream));
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage)); checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
} }
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *cudaWaitFence, void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *cudaSignalFence, int deviceId, int iterations) NvSciSyncFence *cudaWaitFence,
{ NvSciSyncFence *cudaSignalFence, int deviceId,
checkCudaErrors(cudaSetDevice(deviceId)); int iterations) {
static int64_t launch = 0; checkCudaErrors(cudaSetDevice(deviceId));
static int64_t launch = 0;
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence, cudaExtResObj.stream); waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence,
cudaExtResObj.stream);
// run cuda kernel over surface object of the LUMA surface part to extract grayscale. // run cuda kernel over surface object of the LUMA surface part to extract
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0], cudaExtResObj.imageHeight[0]); // grayscale.
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0],
cudaExtResObj.imageHeight[0]);
// signal fence till the second last iterations for NvMedia2DBlit to wait for cuda signal // signal fence till the second last iterations for NvMedia2DBlit to wait for
// and for final iteration as there is no corresponding NvMedia operation pending // cuda signal and for final iteration as there is no corresponding NvMedia
// therefore we end with cudaStreamSynchronize() // operation pending therefore we end with cudaStreamSynchronize()
if (launch < iterations-1) { if (launch < iterations - 1) {
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence, cudaExtResObj.stream); signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence,
} cudaExtResObj.stream);
else { } else {
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream)); checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
} }
launch++; launch++;
} }
// CUDA imports and operates on NvSci buffer/synchronization objects // CUDA imports and operates on NvSci buffer/synchronization objects
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) {
{ checkCudaErrors(cudaSetDevice(deviceId));
checkCudaErrors(cudaSetDevice(deviceId)); cudaResObj.d_yuvArray =
cudaResObj.d_yuvArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * ctx->numSurfaces); (cudaArray_t *)malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t*) malloc(sizeof(cudaSurfaceObject_t) * ctx->numSurfaces); cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
cudaChannelFormatDesc channelDesc; sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
switch (ctx->bytesPerPixel) { cudaChannelFormatDesc channelDesc;
case 1: switch (ctx->bytesPerPixel) {
default: case 1:
channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned); default:
break; channelDesc =
} cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
}
for(int k = 0; k < ctx->numSurfaces; k++) { for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMallocArray(&cudaResObj.d_yuvArray[k], &channelDesc, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, checkCudaErrors(cudaMallocArray(
ctx->heightSurface * ctx->yScalePtr[k])); &cudaResObj.d_yuvArray[k], &channelDesc,
cudaResObj.cudaSurfaceNvmediaBuf[k] = createCudaSurface(cudaResObj.d_yuvArray[k]); ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
} ctx->heightSurface * ctx->yScalePtr[k]));
checkCudaErrors(cudaMalloc(&cudaResObj.d_outputImage, sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface)); cudaResObj.cudaSurfaceNvmediaBuf[k] =
createCudaSurface(cudaResObj.d_yuvArray[k]);
}
checkCudaErrors(cudaMalloc(
&cudaResObj.d_outputImage,
sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
cudaResObj.stream = createCudaStream(deviceId); cudaResObj.stream = createCudaStream(deviceId);
} }
// CUDA clean up buffers used **without** NvSci API. // CUDA clean up buffers used **without** NvSci API.
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) {
{ for (int k = 0; k < ctx->numSurfaces; k++) {
for(int k = 0; k < ctx->numSurfaces; k++) { checkCudaErrors(
checkCudaErrors(cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k])); cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k])); checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
} }
free(cudaResObj.cudaSurfaceNvmediaBuf); free(cudaResObj.cudaSurfaceNvmediaBuf);
checkCudaErrors(cudaStreamDestroy(cudaResObj.stream)); checkCudaErrors(cudaStreamDestroy(cudaResObj.stream));
checkCudaErrors(cudaFree(cudaResObj.d_outputImage)); checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
} }
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj, int deviceId, int32_t imageWidth, int32_t imageHeight) static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj,
{ int deviceId, int32_t imageWidth,
int32_t imageHeight) {
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage; unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth)); checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif #endif
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1); dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth, imageHeight); yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(
cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth,
imageHeight);
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaResObj.stream)); checkCudaErrors(
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream)); cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage,
char outputFilename[1024]; sizeof(unsigned int) * imageHeight * imageWidth,
std::string image_filename = "Grayscale"; cudaMemcpyDeviceToHost, cudaResObj.stream));
strcpy(outputFilename, image_filename.c_str()); checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm"); char outputFilename[1024];
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight); std::string image_filename = "Grayscale";
printf("Wrote '%s'\n", outputFilename); strcpy(outputFilename, image_filename.c_str());
checkCudaErrors(cudaFreeHost(h_dstImage)); strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
#else #else
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream)); checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
#endif #endif
} }
// CUDA operates **without** NvSci APIs buffer/synchronization objects. // CUDA operates **without** NvSci APIs buffer/synchronization objects.
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj,
{ int deviceId) {
for(int k = 0; k < ctx->numSurfaces; k++) { for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMemcpy2DToArray(cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k], checkCudaErrors(cudaMemcpy2DToArray(
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice)); ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
} ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
// run cuda kernel over surface object of the LUMA surface part to extract grayscale. }
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface, ctx->heightSurface); // run cuda kernel over surface object of the LUMA surface part to extract
// grayscale.
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface,
ctx->heightSurface);
} }

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __CUDA_BUFIMPORT_KERNEL_H__ #ifndef __CUDA_BUFIMPORT_KERNEL_H__
#define __CUDA_BUFIMPORT_KERNEL_H__ #define __CUDA_BUFIMPORT_KERNEL_H__
@ -35,38 +34,39 @@
#include "nvscisync.h" #include "nvscisync.h"
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
struct cudaExternalResInterop struct cudaExternalResInterop {
{ cudaMipmappedArray_t *d_mipmapArray;
cudaMipmappedArray_t *d_mipmapArray; cudaArray_t *d_mipLevelArray;
cudaArray_t *d_mipLevelArray; cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf; cudaStream_t stream;
cudaStream_t stream; cudaExternalMemory_t extMemImageBuf;
cudaExternalMemory_t extMemImageBuf; cudaExternalSemaphore_t waitSem;
cudaExternalSemaphore_t waitSem; cudaExternalSemaphore_t signalSem;
cudaExternalSemaphore_t signalSem;
int32_t planeCount; int32_t planeCount;
int32_t *imageWidth; uint64_t *planeOffset;
int32_t *imageHeight; int32_t *imageWidth;
unsigned int *d_outputImage; int32_t *imageHeight;
unsigned int *d_outputImage;
}; };
struct cudaResources struct cudaResources {
{ cudaArray_t *d_yuvArray;
cudaArray_t *d_yuvArray; cudaStream_t stream;
cudaStream_t stream; cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf; unsigned int *d_outputImage;
unsigned int *d_outputImage;
}; };
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *fence, void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *cudaSignalfence, int deviceId, int iterations); NvSciSyncFence *fence, NvSciSyncFence *cudaSignalfence,
int deviceId, int iterations);
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId); void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj, void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId); NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
int deviceId);
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId); void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void cleanupCuda(cudaExternalResInterop& cudaObjs); void cleanupCuda(cudaExternalResInterop &cudaObjs);
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj); void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj);
#endif #endif

View File

@ -55,144 +55,156 @@
fflush(stdout); \ fflush(stdout); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
} \ } \
} while (0) } while (0)
static void cleanup(Blit2DTest* ctx, NvMediaStatus status) static void cleanup(Blit2DTest* ctx, NvMediaStatus status) {
{ if (ctx->i2d != NULL) {
if (ctx->i2d != NULL) { NvMedia2DDestroy(ctx->i2d);
NvMedia2DDestroy(ctx->i2d); }
}
if (ctx->device != NULL) { if (ctx->device != NULL) {
NvMediaDeviceDestroy(ctx->device); NvMediaDeviceDestroy(ctx->device);
} }
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }
int main(int argc, char* argv[]) {
TestArgs args;
Blit2DTest ctx;
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
int main (int argc, char *argv[]) int cudaDeviceId;
{ uint64_t startTime, endTime;
TestArgs args; uint64_t operationStartTime, operationEndTime;
Blit2DTest ctx; double processingTime;
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
int cudaDeviceId; /* Read configuration from command line and config file */
uint64_t startTime, endTime; memset(&args, 0, sizeof(TestArgs));
uint64_t operationStartTime, operationEndTime; memset(&ctx, 0, sizeof(Blit2DTest));
double processingTime;
/* Read configuration from command line and config file */ /* ParseArgs parses the command line and the 2D configuration file and
memset(&args, 0, sizeof(TestArgs)); * populates all initParams and run time configuration in to appropriate
memset(&ctx, 0, sizeof(Blit2DTest)); * structures within args
*/
/* ParseArgs parses the command line and the 2D configuration file and populates all initParams if (ParseArgs(argc, argv, &args)) {
* and run time configuration in to appropriate structures within args PrintUsage();
*/ return -1;
if (ParseArgs(argc, argv, &args)) { }
PrintUsage(); /* Check version */
return -1; NvMediaVersion version;
} status = NvMedia2DGetVersion(&version);
/* Check version */ if (status == NVMEDIA_STATUS_OK) {
NvMediaVersion version; printf("Library version: %u.%u\n", version.major, version.minor);
status = NvMedia2DGetVersion(&version); printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR,
if (status == NVMEDIA_STATUS_OK) { NVMEDIA_2D_VERSION_MINOR);
printf("Library version: %u.%u\n", version.major, version.minor); if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR, NVMEDIA_2D_VERSION_MINOR); (version.minor != NVMEDIA_2D_VERSION_MINOR)) {
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) || printf("Library and Header mismatch!\n");
(version.minor != NVMEDIA_2D_VERSION_MINOR)) { cleanup(&ctx, status);
printf("Library and Header mismatch!\n");
cleanup(&ctx, status);
}
} }
}
// Create NvMedia device // Create NvMedia device
ctx.device = NvMediaDeviceCreate(); ctx.device = NvMediaDeviceCreate();
if(!ctx.device) { if (!ctx.device) {
printf("%s: Failed to create NvMedia device\n", __func__); printf("%s: Failed to create NvMedia device\n", __func__);
cleanup(&ctx, status); cleanup(&ctx, status);
} }
// Create 2D blitter // Create 2D blitter
ctx.i2d = NvMedia2DCreate(ctx.device); ctx.i2d = NvMedia2DCreate(ctx.device);
if(!ctx.i2d) { if (!ctx.i2d) {
printf("%s: Failed to create NvMedia 2D i2d\n", __func__); printf("%s: Failed to create NvMedia 2D i2d\n", __func__);
cleanup(&ctx, status); cleanup(&ctx, status);
} }
cudaDeviceId = findCudaDevice(argc, (const char**)argv); cudaDeviceId = findCudaDevice(argc, (const char**)argv);
// NvMedia-CUDA operations without NvSCI APIs starts // NvMedia-CUDA operations without NvSCI APIs starts
cudaResources cudaResObj; cudaResources cudaResObj;
GetTimeMicroSec(&startTime); GetTimeMicroSec(&startTime);
setupNvMedia(&args, &ctx); setupNvMedia(&args, &ctx);
setupCuda(&ctx, cudaResObj, cudaDeviceId); setupCuda(&ctx, cudaResObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime); GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) for (int i = 0; i < args.iterations; i++) {
{ runNvMediaBlit2D(&args, &ctx);
runNvMediaBlit2D(&args, &ctx); runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
runCudaOperation(&ctx, cudaResObj, cudaDeviceId); }
} GetTimeMicroSec(&operationEndTime);
GetTimeMicroSec(&operationEndTime);
cleanupNvMedia(&ctx); cleanupNvMedia(&ctx);
cleanupCuda(&ctx, cudaResObj); cleanupCuda(&ctx, cudaResObj);
GetTimeMicroSec(&endTime); GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations without NvSCI APIs ends // NvMedia-CUDA operations without NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime)/1000.0; processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
processingTime = (double)(endTime - startTime)/1000.0; "Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs "
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); "%.4f ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"without NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj; NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj; NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
cudaExternalResInterop cudaExtResObj; cudaExternalResInterop cudaExtResObj;
// NvMedia-CUDA operations via interop with NvSCI APIs starts // NvMedia-CUDA operations via interop with NvSCI APIs starts
GetTimeMicroSec(&startTime); GetTimeMicroSec(&startTime);
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId); setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId); setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId); setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj,
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId); nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj,
cudaSignalerSyncObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime); GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) for (int i = 0; i < args.iterations; i++) {
{ runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence,
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence, &nvMediaSignalerFence); &nvMediaSignalerFence);
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence, cudaDeviceId, args.iterations); runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence,
} cudaDeviceId, args.iterations);
GetTimeMicroSec(&operationEndTime); }
GetTimeMicroSec(&operationEndTime);
cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj); cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj);
cleanupCuda(cudaExtResObj); cleanupCuda(cudaExtResObj);
cleanupNvSciSync(nvMediaSignalerSyncObj); cleanupNvSciSync(nvMediaSignalerSyncObj);
cleanupNvSciSync(cudaSignalerSyncObj); cleanupNvSciSync(cudaSignalerSyncObj);
cleanupNvSciBuf(srcNvSciBufobj); cleanupNvSciBuf(srcNvSciBufobj);
cleanupNvSciBuf(dstNvSciBufobj); cleanupNvSciBuf(dstNvSciBufobj);
GetTimeMicroSec(&endTime); GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations via interop with NvSCI APIs ends // NvMedia-CUDA operations via interop with NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime)/1000.0; processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
processingTime = (double)(endTime - startTime)/1000.0; "Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f "
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); "ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"with NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
if (ctx.i2d != NULL) { if (ctx.i2d != NULL) {
NvMedia2DDestroy(ctx.i2d); NvMedia2DDestroy(ctx.i2d);
} }
if (ctx.device != NULL) { if (ctx.device != NULL) {
NvMediaDeviceDestroy(ctx.device); NvMediaDeviceDestroy(ctx.device);
} }
if (status == NVMEDIA_STATUS_OK) { if (status == NVMEDIA_STATUS_OK) {
return 0; return 0;
} } else {
else { return 1;
return 1; }
}
} }

View File

@ -38,463 +38,434 @@
#include "nvmedia_2d_nvscisync.h" #include "nvmedia_2d_nvscisync.h"
#include "nvsci_setup.h" #include "nvsci_setup.h"
NvMediaImage * NvMediaImage *NvMediaImageCreateUsingNvScibuf(NvMediaDevice *device,
NvMediaImageCreateUsingNvScibuf( NvMediaSurfaceType type,
NvMediaDevice *device, const NvMediaSurfAllocAttr *attrs,
NvMediaSurfaceType type, uint32_t numAttrs, uint32_t flags,
const NvMediaSurfAllocAttr *attrs, NvSciBufObj &bufobj,
uint32_t numAttrs, int cudaDeviceId) {
uint32_t flags, NvSciBufModule module = NULL;
NvSciBufObj &bufobj, NvSciError err = NvSciError_Success;
int cudaDeviceId) NvMediaStatus status = NVMEDIA_STATUS_OK;
{ NvSciBufAttrList attrlist = NULL;
NvSciBufModule module = NULL; NvSciBufAttrList conflictlist = NULL;
NvSciError err = NvSciError_Success; NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
NvMediaStatus status = NVMEDIA_STATUS_OK; NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
NvSciBufAttrList attrlist = NULL; &access_perm, sizeof(access_perm)};
NvSciBufAttrList conflictlist = NULL; NvSciBufAttrKeyValuePair pairArrayOut[10];
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
&access_perm,
sizeof(access_perm)};
NvSciBufAttrKeyValuePair pairArrayOut[10];
NvMediaImage *image = NULL; NvMediaImage *image = NULL;
err = NvSciBufModuleOpen(&module); err = NvSciBufModuleOpen(&module);
if(err != NvSciError_Success) { if (err != NvSciError_Success) {
printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err); printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err);
goto fail_cleanup; goto fail_cleanup;
} }
err = NvSciBufAttrListCreate(module, &attrlist); err = NvSciBufAttrListCreate(module, &attrlist);
if(err != NvSciError_Success) { if (err != NvSciError_Success) {
printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err); printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err);
goto fail_cleanup; goto fail_cleanup;
} }
err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1); err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1);
if(err != NvSciError_Success) { if (err != NvSciError_Success) {
printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err); printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err);
goto fail_cleanup; goto fail_cleanup;
} }
status = NvMediaImageFillNvSciBufAttrs(device, status =
type, NvMediaImageFillNvSciBufAttrs(device, type, attrs, numAttrs, 0, attrlist);
attrs,
numAttrs,
0,
attrlist);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
if(status != NVMEDIA_STATUS_OK) { setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
setupNvSciBuf(bufobj, attrlist, cudaDeviceId); status = NvMediaImageCreateFromNvSciBuf(device, bufobj, &image);
status = NvMediaImageCreateFromNvSciBuf(device, if (status != NVMEDIA_STATUS_OK) {
bufobj, printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
&image); goto fail_cleanup;
}
if(status != NVMEDIA_STATUS_OK) { NvSciBufAttrListFree(attrlist);
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
NvSciBufAttrListFree(attrlist); if (module != NULL) {
NvSciBufModuleClose(module);
}
if(module != NULL) { return image;
NvSciBufModuleClose(module);
}
return image;
fail_cleanup: fail_cleanup:
if(attrlist != NULL) { if (attrlist != NULL) {
NvSciBufAttrListFree(attrlist); NvSciBufAttrListFree(attrlist);
} }
if(bufobj != NULL) { if (bufobj != NULL) {
NvSciBufObjFree(bufobj); NvSciBufObjFree(bufobj);
bufobj = NULL; bufobj = NULL;
} }
if(module != NULL) { if (module != NULL) {
NvSciBufModuleClose(module); NvSciBufModuleClose(module);
} }
NvMediaImageDestroy(image); NvMediaImageDestroy(image);
return NULL; return NULL;
} }
/* Create NvMediaImage surface based on the input attributes. /* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success * Returns NVMEDIA_STATUS_OK on success
*/ */
static NvMediaStatus static NvMediaStatus createSurface(Blit2DTest *ctx,
createSurface(Blit2DTest *ctx, NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfFormatAttr *surfFormatAttrs, NvMediaSurfAllocAttr *surfAllocAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs, uint32_t numSurfAllocAttrs,
uint32_t numSurfAllocAttrs, NvMediaImage **image, NvSciBufObj &bufObj,
NvMediaImage **image, int cudaDeviceId) {
NvSciBufObj &bufObj, NvMediaSurfaceType surfType;
int cudaDeviceId)
{
NvMediaSurfaceType surfType;
/* create source image */ /* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX); surfType =
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */ NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
surfType, /* surface type */ *image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
surfAllocAttrs, surfType, /* surface type */
numSurfAllocAttrs, surfAllocAttrs, numSurfAllocAttrs, 0,
0, bufObj, cudaDeviceId);
bufObj,
cudaDeviceId);
if(*image == NULL) { if (*image == NULL) {
printf ("Unable to create image\n"); printf("Unable to create image\n");
return NVMEDIA_STATUS_ERROR; return NVMEDIA_STATUS_ERROR;
} }
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value); InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n", /* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/ __func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
/* Create NvMediaImage surface based on the input attributes. /* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success * Returns NVMEDIA_STATUS_OK on success
*/ */
static NvMediaStatus static NvMediaStatus createSurfaceNonNvSCI(
createSurfaceNonNvSCI(Blit2DTest *ctx, Blit2DTest *ctx, NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfFormatAttr *surfFormatAttrs, NvMediaSurfAllocAttr *surfAllocAttrs, uint32_t numSurfAllocAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs, NvMediaImage **image) {
uint32_t numSurfAllocAttrs, NvMediaSurfaceType surfType;
NvMediaImage **image)
{
NvMediaSurfaceType surfType;
/* create source image */ /* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX); surfType =
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs, numSurfAllocAttrs, 0); *image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs,
numSurfAllocAttrs, 0);
if(*image == NULL) { if (*image == NULL) {
printf ("Unable to create image\n"); printf("Unable to create image\n");
return NVMEDIA_STATUS_ERROR; return NVMEDIA_STATUS_ERROR;
} }
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value); InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n", /* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/ __func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
static void destroySurface(NvMediaImage *image) { NvMediaImageDestroy(image); }
static void destroySurface(NvMediaImage *image) static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs *args,
{ NvSciSyncObj &nvMediaSignalerSyncObj,
NvMediaImageDestroy(image); NvSciSyncFence *preSyncFence,
} NvSciSyncFence *fence) {
NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap;
status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &nvMediaSignalerSyncObj, if (status != NVMEDIA_STATUS_OK) {
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
{ return status;
NvMediaStatus status; }
NvMediaImageSurfaceMap surfaceMap;
status = ReadImage(args->inputFileName, /* fileName */ if ((args->srcRect.x1 <= args->srcRect.x0) ||
0, /* frameNum */ (args->srcRect.y1 <= args->srcRect.y0)) {
args->srcSurfAllocAttrs[0].value, /* source image width */ ctx->srcRect = NULL;
args->srcSurfAllocAttrs[1].value, /* source image height */ } else {
ctx->srcImage, /* srcImage */ ctx->srcRect = &(args->srcRect);
NVMEDIA_TRUE, /* uvOrderFlag */ }
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
if ((args->dstRect.x1 <= args->dstRect.x0) ||
(args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL;
} else {
ctx->dstRect = &(args->dstRect);
}
static int64_t launch = 0;
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to
// wait
// for cuda signal on fence.
if (launch) {
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status); printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
return status; status);
return status;
} }
NvSciSyncFenceClear(preSyncFence);
}
launch++;
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) { status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
ctx->srcRect = NULL; if (status != NVMEDIA_STATUS_OK) {
} else { printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
ctx->srcRect = &(args->srcRect); status);
} return status;
}
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) { /* 2DBlit processing on input image */
ctx->dstRect = NULL; status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
} else { ctx->dstImage, /* dstSurface */
ctx->dstRect = &(args->dstRect); ctx->dstRect, /* dstRect */
} ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
static int64_t launch = 0; if (status != NVMEDIA_STATUS_OK) {
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to wait printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
// for cuda signal on fence. return status;
if (launch) }
{
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status);
return status;
}
NvSciSyncFenceClear(preSyncFence);
}
launch++;
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj); status =
if(status != NVMEDIA_STATUS_OK) { NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status); if (status != NVMEDIA_STATUS_OK) {
return status; printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
} return status;
}
/* 2DBlit processing on input image */ return NVMEDIA_STATUS_OK;
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status;
}
status = NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
return status;
}
return NVMEDIA_STATUS_OK;
} }
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args) static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs *args) {
{ NvMediaStatus status;
NvMediaStatus status; NvMediaImageSurfaceMap surfaceMap;
NvMediaImageSurfaceMap surfaceMap;
status = ReadImage(args->inputFileName, /* fileName */ status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */ 0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */ args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */ args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */ ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */ NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */ 1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */ MSB_ALIGNED); /* pixelAlignment */
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status); printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
return status; return status;
} }
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) { if ((args->srcRect.x1 <= args->srcRect.x0) ||
ctx->srcRect = NULL; (args->srcRect.y1 <= args->srcRect.y0)) {
} else { ctx->srcRect = NULL;
ctx->srcRect = &(args->srcRect); } else {
} ctx->srcRect = &(args->srcRect);
}
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) { if ((args->dstRect.x1 <= args->dstRect.x0) ||
ctx->dstRect = NULL; (args->dstRect.y1 <= args->dstRect.y0)) {
} else { ctx->dstRect = NULL;
ctx->dstRect = &(args->dstRect); } else {
} ctx->dstRect = &(args->dstRect);
}
/* 2DBlit processing on input image */ /* 2DBlit processing on input image */
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */ status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */ ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */ ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */ ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */ ctx->srcRect, /* srcRect */
&args->blitParams, /* params */ &args->blitParams, /* params */
NULL); /* paramsOut */ NULL); /* paramsOut */
if(status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status); printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status; return status;
} }
/* Write output image into buffer */ /* Write output image into buffer */
ctx->bytesPerPixel = 1; ctx->bytesPerPixel = 1;
WriteImageToAllocatedBuffer(ctx, ctx->dstImage, WriteImageToAllocatedBuffer(ctx, ctx->dstImage, NVMEDIA_TRUE, NVMEDIA_FALSE,
NVMEDIA_TRUE,
NVMEDIA_FALSE,
ctx->bytesPerPixel); ctx->bytesPerPixel);
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
static void cleanup(Blit2DTest* ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) static void cleanup(Blit2DTest *ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) {
{ if (ctx->srcImage != NULL) {
if (ctx->srcImage != NULL) { NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage); destroySurface(ctx->srcImage);
destroySurface(ctx->srcImage); }
} if (ctx->dstImage != NULL) {
if (ctx->dstImage != NULL) { NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage);
NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage); destroySurface(ctx->dstImage);
destroySurface(ctx->dstImage); }
} if (status != NVMEDIA_STATUS_OK) {
if (status != NVMEDIA_STATUS_OK) { exit(EXIT_FAILURE);
exit(EXIT_FAILURE); }
}
} }
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj) void cleanupNvMedia(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ NvSciSyncObj &preSyncObj) {
NvMediaStatus status; NvMediaStatus status;
cleanup(ctx); cleanup(ctx);
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj); status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
if(status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n",__func__); printf("%s: NvMediaImageSciBufInit failed\n", __func__);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj); status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj);
if(status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n",__func__); printf("%s: NvMediaImageSciBufInit failed\n", __func__);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
NvMediaImageNvSciBufDeinit(); NvMediaImageNvSciBufDeinit();
} }
void cleanupNvMedia(Blit2DTest* ctx) void cleanupNvMedia(Blit2DTest *ctx) {
{ cleanup(ctx);
cleanup(ctx); free(ctx->dstBuffPitches);
free(ctx->dstBuffPitches); free(ctx->dstBuffer);
free(ctx->dstBuffer); free(ctx->dstBuff);
free(ctx->dstBuff);
} }
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj, void setupNvMedia(TestArgs *args, Blit2DTest *ctx, NvSciBufObj &srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj, NvSciBufObj &dstNvSciBufobj, NvSciSyncObj &syncObj,
int cudaDeviceId) NvSciSyncObj &preSyncObj, int cudaDeviceId) {
{ NvMediaStatus status;
NvMediaStatus status; status = NvMediaImageNvSciBufInit();
status = NvMediaImageNvSciBufInit(); if (status != NVMEDIA_STATUS_OK) {
if(status != NVMEDIA_STATUS_OK) { printf("%s: NvMediaImageSciBufInit failed\n", __func__);
printf("%s: NvMediaImageSciBufInit failed\n",__func__); cleanup(ctx, status);
cleanup(ctx, status); }
}
// Create source surface // Create source surface
status = createSurface(ctx, status = createSurface(ctx, args->srcSurfFormatAttrs, args->srcSurfAllocAttrs,
args->srcSurfFormatAttrs, args->numSurfAllocAttrs, &ctx->srcImage,
args->srcSurfAllocAttrs, srcNvSciBufobj, cudaDeviceId);
args->numSurfAllocAttrs, if (status != NVMEDIA_STATUS_OK) {
&ctx->srcImage, printf("%s: Unable to create buffer pools\n", __func__);
srcNvSciBufobj, cleanup(ctx, status);
cudaDeviceId); }
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create destination surface // Create destination surface
status = createSurface(ctx, status = createSurface(ctx, args->dstSurfFormatAttrs, args->dstSurfAllocAttrs,
args->dstSurfFormatAttrs, args->numSurfAllocAttrs, &ctx->dstImage,
args->dstSurfAllocAttrs, dstNvSciBufobj, cudaDeviceId);
args->numSurfAllocAttrs, if (status != NVMEDIA_STATUS_OK) {
&ctx->dstImage, printf("%s: Unable to create buffer pools\n", __func__);
dstNvSciBufobj, cleanup(ctx, status);
cudaDeviceId); }
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
//Register source Surface // Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ); status =
if ( status != NVMEDIA_STATUS_OK) { NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
printf("%s: Unable to register source surface\n", __func__); if (status != NVMEDIA_STATUS_OK) {
cleanup(ctx, status); printf("%s: Unable to register source surface\n", __func__);
} cleanup(ctx, status);
//Register destination Surface }
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE); // Register destination Surface
if ( status != NVMEDIA_STATUS_OK) { status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
printf("%s: Unable to register destination surface\n", __func__); NVMEDIA_ACCESS_MODE_READ_WRITE);
cleanup(ctx, status); if (status != NVMEDIA_STATUS_OK) {
} printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status);
}
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj); status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__); printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
} }
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj); status =
if (status != NVMEDIA_STATUS_OK) { NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__); if (status != NVMEDIA_STATUS_OK) {
} printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
}
} }
// Create NvMedia src & dst image without NvSciBuf // Create NvMedia src & dst image without NvSciBuf
void setupNvMedia(TestArgs* args, Blit2DTest* ctx) void setupNvMedia(TestArgs *args, Blit2DTest *ctx) {
{ NvMediaStatus status;
NvMediaStatus status;
// Create source surface // Create source surface
status = createSurfaceNonNvSCI(ctx, status = createSurfaceNonNvSCI(ctx, args->srcSurfFormatAttrs,
args->srcSurfFormatAttrs, args->srcSurfAllocAttrs,
args->srcSurfAllocAttrs, args->numSurfAllocAttrs, &ctx->srcImage);
args->numSurfAllocAttrs, if (status != NVMEDIA_STATUS_OK) {
&ctx->srcImage); printf("%s: Unable to create buffer pools\n", __func__);
if(status != NVMEDIA_STATUS_OK) { cleanup(ctx, status);
printf("%s: Unable to create buffer pools\n", __func__); }
cleanup(ctx, status);
}
// Create destination surface // Create destination surface
status = createSurfaceNonNvSCI(ctx, status = createSurfaceNonNvSCI(ctx, args->dstSurfFormatAttrs,
args->dstSurfFormatAttrs, args->dstSurfAllocAttrs,
args->dstSurfAllocAttrs, args->numSurfAllocAttrs, &ctx->dstImage);
args->numSurfAllocAttrs, if (status != NVMEDIA_STATUS_OK) {
&ctx->dstImage); printf("%s: Unable to create buffer pools\n", __func__);
if(status != NVMEDIA_STATUS_OK) { cleanup(ctx, status);
printf("%s: Unable to create buffer pools\n", __func__); }
cleanup(ctx, status);
}
//Register source Surface // Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ); status =
if ( status != NVMEDIA_STATUS_OK) { NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
printf("%s: Unable to register source surface\n", __func__); if (status != NVMEDIA_STATUS_OK) {
cleanup(ctx, status); printf("%s: Unable to register source surface\n", __func__);
} cleanup(ctx, status);
}
//Register destination Surface // Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE); status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
if ( status != NVMEDIA_STATUS_OK) { NVMEDIA_ACCESS_MODE_READ_WRITE);
printf("%s: Unable to register destination surface\n", __func__); if (status != NVMEDIA_STATUS_OK) {
cleanup(ctx, status); printf("%s: Unable to register destination surface\n", __func__);
} cleanup(ctx, status);
}
// Allocate buffer for writing image & set image parameters in Blit2DTest. // Allocate buffer for writing image & set image parameters in Blit2DTest.
ctx->bytesPerPixel = 1; ctx->bytesPerPixel = 1;
AllocateBufferToWriteImage(ctx, AllocateBufferToWriteImage(ctx, ctx->dstImage, NVMEDIA_TRUE, /* uvOrderFlag */
ctx->dstImage, NVMEDIA_FALSE); /* appendFlag */
NVMEDIA_TRUE, /* uvOrderFlag */
NVMEDIA_FALSE); /* appendFlag */
} }
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx) void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx) {
{ // Blit2D function
// Blit2D function NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args); if (status != NVMEDIA_STATUS_OK) {
if(status != NVMEDIA_STATUS_OK) { printf("%s: Blit2D failed\n", __func__);
printf("%s: Blit2D failed\n", __func__); cleanup(ctx, status);
cleanup(ctx, status); }
}
} }
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &nvMediaSignalerSyncObj, void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) NvSciSyncObj &nvMediaSignalerSyncObj,
{ NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) {
// Blit2D function // Blit2D function
NvMediaStatus status = blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence); NvMediaStatus status =
if(status != NVMEDIA_STATUS_OK) { blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
printf("%s: Blit2D failed\n", __func__); if (status != NVMEDIA_STATUS_OK) {
cleanup(ctx, status); printf("%s: Blit2D failed\n", __func__);
} cleanup(ctx, status);
}
} }

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __NVMEDIA_PRODUCER_H__ #ifndef __NVMEDIA_PRODUCER_H__
#define __NVMEDIA_PRODUCER_H__ #define __NVMEDIA_PRODUCER_H__
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
@ -36,13 +35,14 @@
#include "nvmedia_image_nvscibuf.h" #include "nvmedia_image_nvscibuf.h"
#include "nvscisync.h" #include "nvscisync.h"
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &syncObj, void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj& syncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence); NvSciSyncFence* preSyncFence, NvSciSyncFence* fence);
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx); void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj, void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj& srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj, NvSciBufObj& dstNvSciBufobj, NvSciSyncObj& syncObj,
int cudaDeviceId); NvSciSyncObj& preSyncObj, int cudaDeviceId);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx); void setupNvMedia(TestArgs* args, Blit2DTest* ctx);
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj); void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj& syncObj,
NvSciSyncObj& preSyncObj);
void cleanupNvMedia(Blit2DTest* ctx); void cleanupNvMedia(Blit2DTest* ctx);
#endif #endif

View File

@ -32,7 +32,6 @@
#include "nvsci_setup.h" #include "nvsci_setup.h"
#include "nvmedia_2d_nvscisync.h" #include "nvmedia_2d_nvscisync.h"
#define checkNvSciErrors(call) \ #define checkNvSciErrors(call) \
do { \ do { \
NvSciError _status = call; \ NvSciError _status = call; \
@ -44,111 +43,115 @@
fflush(stdout); \ fflush(stdout); \
exit(EXIT_FAILURE); \ exit(EXIT_FAILURE); \
} \ } \
} while (0) } while (0)
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId) void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ int cudaDeviceId) {
NvSciSyncModule sciSyncModule; NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule)); checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList; NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2]; NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList; NvSciSyncAttrList syncReconciledList, syncConflictList;
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER); NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(
if(status != NVMEDIA_STATUS_OK) { ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__); if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE); printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
} exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(cudaDeviceId)); checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId, cudaNvSciSyncAttrWait)); checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId,
cudaNvSciSyncAttrWait));
syncUnreconciledList[0] = signalerAttrList; syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList; syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList)); checkNvSciErrors(NvSciSyncAttrListReconcile(
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj)); syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList); NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList); NvSciSyncAttrListFree(waiterAttrList);
if(syncConflictList != nullptr) { if (syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList); NvSciSyncAttrListFree(syncConflictList);
} }
} }
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId) void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ int cudaDeviceId) {
NvSciSyncModule sciSyncModule; NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule)); checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList; NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2]; NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList; NvSciSyncAttrList syncReconciledList, syncConflictList;
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER); NvMediaStatus status =
if(status != NVMEDIA_STATUS_OK) { NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__); if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE); printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
} exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(cudaDeviceId)); checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal)); checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(
signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
syncUnreconciledList[0] = signalerAttrList; syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList; syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList)); checkNvSciErrors(NvSciSyncAttrListReconcile(
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj)); syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList); NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList); NvSciSyncAttrListFree(waiterAttrList);
if(syncConflictList != nullptr) { if (syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList); NvSciSyncAttrListFree(syncConflictList);
} }
} }
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
int cudaDeviceId) {
CUuuid devUUID;
NvSciBufAttrList conflictlist;
NvSciBufAttrList bufUnreconciledAttrlist[1];
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId) CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId);
{ if (res != CUDA_SUCCESS) {
CUuuid devUUID; fprintf(stderr, "Driver API error = %04d \n", res);
NvSciBufAttrList conflictlist; exit(EXIT_FAILURE);
NvSciBufAttrList bufUnreconciledAttrlist[1]; }
CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId); NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId,
if (res != CUDA_SUCCESS) { &devUUID, sizeof(devUUID)};
fprintf(stderr, "Driver API error = %04d \n", res);
exit(EXIT_FAILURE);
}
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId, &devUUID, sizeof(devUUID)}; // set CUDA GPU ID to attribute list
checkNvSciErrors(NvSciBufAttrListSetAttrs(
nvmediaAttrlist, attr_gpuid,
sizeof(attr_gpuid) / sizeof(NvSciBufAttrKeyValuePair)));
// set CUDA GPU ID to attribute list bufUnreconciledAttrlist[0] = nvmediaAttrlist;
checkNvSciErrors(NvSciBufAttrListSetAttrs(nvmediaAttrlist, attr_gpuid, sizeof(attr_gpuid)/sizeof(NvSciBufAttrKeyValuePair)));
bufUnreconciledAttrlist[0] = nvmediaAttrlist; checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(
bufUnreconciledAttrlist, 1, &bufobj, &conflictlist));
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(bufUnreconciledAttrlist, if (conflictlist != NULL) {
1, NvSciBufAttrListFree(conflictlist);
&bufobj, }
&conflictlist));
if (conflictlist != NULL) {
NvSciBufAttrListFree(conflictlist);
}
} }
void cleanupNvSciBuf(NvSciBufObj &Bufobj) void cleanupNvSciBuf(NvSciBufObj &Bufobj) {
{ if (Bufobj != NULL) {
if (Bufobj != NULL) { NvSciBufObjFree(Bufobj);
NvSciBufObjFree(Bufobj); }
}
} }
void cleanupNvSciSync(NvSciSyncObj &syncObj) void cleanupNvSciSync(NvSciSyncObj &syncObj) {
{ if (NvSciSyncObjFree != NULL) {
if (NvSciSyncObjFree != NULL) { NvSciSyncObjFree(syncObj);
NvSciSyncObjFree(syncObj); }
}
} }

View File

@ -25,16 +25,18 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __NVSCI_SETUP_H__ #ifndef __NVSCI_SETUP_H__
#define __NVSCI_SETUP_H__ #define __NVSCI_SETUP_H__
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
#include <nvscibuf.h> #include <nvscibuf.h>
#include <nvscisync.h> #include <nvscisync.h>
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId); void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId); int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId); void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
int cudaDeviceId);
void cleanupNvSciBuf(NvSciBufObj &Bufobj); void cleanupNvSciBuf(NvSciBufObj &Bufobj);
void cleanupNvSciSync(NvSciSyncObj &syncObj); void cleanupNvSciSync(NvSciSyncObj &syncObj);
#endif #endif