cudaNvSciNvMedia plane offset correction

This commit is contained in:
Rutwik Choughule 2021-06-10 17:33:24 +05:30
parent ba5a483c6e
commit 2aeaf51b11
7 changed files with 907 additions and 863 deletions

View File

@ -36,7 +36,6 @@
// Enable this to 1 if require cuda processed output to ppm file. // Enable this to 1 if require cuda processed output to ppm file.
#define WRITE_OUTPUT_IMAGE 0 #define WRITE_OUTPUT_IMAGE 0
#define checkNvSciErrors(call) \ #define checkNvSciErrors(call) \
do { \ do { \
NvSciError _status = call; \ NvSciError _status = call; \
@ -50,24 +49,26 @@
} \ } \
} while (0) } while (0)
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject, unsigned int *dstImage, int32_t imageWidth, int32_t imageHeight) __global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject,
{ unsigned int *dstImage,
int32_t imageWidth, int32_t imageHeight) {
size_t x = blockIdx.x * blockDim.x + threadIdx.x; size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y; size_t y = blockIdx.y * blockDim.y + threadIdx.y;
uchar4 *dstImageUchar4 = (uchar4 *)dstImage; uchar4 *dstImageUchar4 = (uchar4 *)dstImage;
for ( ; x < imageWidth && y < imageHeight; x += gridDim.x*blockDim.x, y += gridDim.y*blockDim.y) for (; x < imageWidth && y < imageHeight;
{ x += gridDim.x * blockDim.x, y += gridDim.y * blockDim.y) {
int colInBytes = x * sizeof(unsigned char); int colInBytes = x * sizeof(unsigned char);
unsigned char luma = surf2Dread<unsigned char>(surfaceObject, colInBytes, y); unsigned char luma =
surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0); uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
dstImageUchar4[y * imageWidth + x] = grayscalePix; dstImageUchar4[y * imageWidth + x] = grayscalePix;
} }
} }
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem, NvSciSyncObj &syncObj) static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem,
{ NvSciSyncObj &syncObj) {
cudaExternalSemaphoreHandleDesc extSemDesc; cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc)); memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync; extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
@ -76,8 +77,8 @@ static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem, NvSciSyncObj &s
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc)); checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
} }
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem, NvSciSyncFence *fence, static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem,
cudaStream_t stream) { NvSciSyncFence *fence, cudaStream_t stream) {
cudaExternalSemaphoreWaitParams waitParams; cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams)); memset(&waitParams, 0, sizeof(waitParams));
// For cross-process signaler-waiter applications need to use NvSciIpc // For cross-process signaler-waiter applications need to use NvSciIpc
@ -86,10 +87,12 @@ static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem, NvSciSyncFen
waitParams.params.nvSciSync.fence = (void *)fence; waitParams.params.nvSciSync.fence = (void *)fence;
waitParams.flags = 0; waitParams.flags = 0;
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream)); checkCudaErrors(
cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
} }
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem, NvSciSyncFence *fence, static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem,
NvSciSyncFence *fence,
cudaStream_t stream) { cudaStream_t stream) {
cudaExternalSemaphoreSignalParams signalParams; cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams)); memset(&signalParams, 0, sizeof(signalParams));
@ -99,37 +102,43 @@ static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem, NvSciSyn
signalParams.params.nvSciSync.fence = (void *)fence; signalParams.params.nvSciSync.fence = (void *)fence;
signalParams.flags = 0; signalParams.flags = 0;
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, checkCudaErrors(
1, stream)); cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, 1, stream));
} }
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop &cudaExtResObj,
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop& cudaExtResObj, int32_t imageWidth, int32_t imageHeight) int32_t imageWidth, int32_t imageHeight) {
{
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage; unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth)); checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif #endif
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1); dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage, imageWidth, imageHeight); yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(
cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage,
imageWidth, imageHeight);
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaExtResObj.stream)); checkCudaErrors(
cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage,
sizeof(unsigned int) * imageHeight * imageWidth,
cudaMemcpyDeviceToHost, cudaExtResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream)); checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
char outputFilename[1024]; char outputFilename[1024];
std::string image_filename = "Grayscale"; std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str()); strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm"); strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight); sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename); printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage)); checkCudaErrors(cudaFreeHost(h_dstImage));
#endif #endif
} }
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBufObj& inputBufObj) static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj,
{ NvSciBufObj &inputBufObj) {
NvSciBufModule module = NULL; NvSciBufModule module = NULL;
NvSciBufAttrList attrlist = NULL; NvSciBufAttrList attrlist = NULL;
NvSciBufAttrKeyValuePair pairArrayOut[10]; NvSciBufAttrKeyValuePair pairArrayOut[10];
@ -148,19 +157,29 @@ static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBuf
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel; pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneOffset;
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs)); checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
uint64_t size = *(uint64_t *)pairArrayOut[0].value; uint64_t size = *(uint64_t *)pairArrayOut[0].value;
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value; uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value; cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
cudaExtResObj.imageWidth = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount); cudaExtResObj.imageWidth =
cudaExtResObj.imageHeight = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount); (int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.imageHeight =
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.planeOffset =
(uint64_t *)malloc(sizeof(uint64_t) * cudaExtResObj.planeCount);
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value, cudaExtResObj.planeCount * sizeof(int32_t)); memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value,
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value, cudaExtResObj.planeCount * sizeof(int32_t)); cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value,
cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.planeOffset, (uint64_t *)pairArrayOut[7].value,
cudaExtResObj.planeCount * sizeof(uint64_t));
NvSciBufAttrValImageLayoutType layout = *(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value; NvSciBufAttrValImageLayoutType layout =
*(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
uint32_t bitsPerPixel = *(uint32_t *)pairArrayOut[6].value; uint32_t bitsPerPixel = *(uint32_t *)pairArrayOut[6].value;
if (layout != NvSciBufImage_BlockLinearType) { if (layout != NvSciBufImage_BlockLinearType) {
@ -173,9 +192,11 @@ static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBuf
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf; memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj; memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size; memHandleDesc.size = size;
checkCudaErrors(cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc)); checkCudaErrors(
cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t*) malloc(sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount); cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t *)malloc(
sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; i++) { for (int i = 0; i < cudaExtResObj.planeCount; i++) {
cudaExtent extent = {}; cudaExtent extent = {};
@ -187,31 +208,37 @@ static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBuf
switch (channelCount) { switch (channelCount) {
case 1: case 1:
default: default:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0, cudaChannelFormatKindUnsigned); desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0,
cudaChannelFormatKindUnsigned);
break; break;
case 2: case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0, cudaChannelFormatKindUnsigned); desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0,
cudaChannelFormatKindUnsigned);
break; break;
case 3: case 3:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, 0, cudaChannelFormatKindUnsigned); desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
0, cudaChannelFormatKindUnsigned);
break; break;
case 4: case 4:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, bitsPerPixel, cudaChannelFormatKindUnsigned); desc =
cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
bitsPerPixel, cudaChannelFormatKindUnsigned);
break; break;
} }
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0}; cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
mipmapDesc.offset = 0; mipmapDesc.offset = cudaExtResObj.planeOffset[i];
mipmapDesc.formatDesc = desc; mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent; mipmapDesc.extent = extent;
mipmapDesc.flags = 0; mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1; mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf, &mipmapDesc)); checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(
&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf,
&mipmapDesc));
} }
} }
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) {
{
cudaResourceDesc resourceDesc; cudaResourceDesc resourceDesc;
memset(&resourceDesc, 0, sizeof(resourceDesc)); memset(&resourceDesc, 0, sizeof(resourceDesc));
resourceDesc.resType = cudaResourceTypeArray; resourceDesc.resType = cudaResourceTypeArray;
@ -222,8 +249,7 @@ static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray)
return surfaceObject; return surfaceObject;
} }
static cudaStream_t createCudaStream(int deviceId) static cudaStream_t createCudaStream(int deviceId) {
{
checkCudaErrors(cudaSetDevice(deviceId)); checkCudaErrors(cudaSetDevice(deviceId));
cudaStream_t stream; cudaStream_t stream;
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
@ -232,31 +258,39 @@ static cudaStream_t createCudaStream(int deviceId)
// CUDA setup buffers/synchronization objects for interop via NvSci API. // CUDA setup buffers/synchronization objects for interop via NvSci API.
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj, void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId) NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
{ int deviceId) {
checkCudaErrors(cudaSetDevice(deviceId)); checkCudaErrors(cudaSetDevice(deviceId));
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj); cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj); cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
cudaImportNvSciImage(cudaExtResObj, inputBufObj); cudaImportNvSciImage(cudaExtResObj, inputBufObj);
cudaExtResObj.d_mipLevelArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount); cudaExtResObj.d_mipLevelArray =
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *) malloc(sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount); (cudaArray_t *)malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; ++i) { for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
uint32_t mipLevelId = 0; uint32_t mipLevelId = 0;
checkCudaErrors(cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i], cudaExtResObj.d_mipmapArray[i], mipLevelId)); checkCudaErrors(
cudaExtResObj.cudaSurfaceNvmediaBuf[i] = createCudaSurface(cudaExtResObj.d_mipLevelArray[i]); cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i],
cudaExtResObj.d_mipmapArray[i], mipLevelId));
cudaExtResObj.cudaSurfaceNvmediaBuf[i] =
createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
} }
cudaExtResObj.stream = createCudaStream(deviceId); cudaExtResObj.stream = createCudaStream(deviceId);
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage, sizeof(unsigned int) * cudaExtResObj.imageWidth[0] * cudaExtResObj.imageHeight[0])); checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage,
sizeof(unsigned int) *
cudaExtResObj.imageWidth[0] *
cudaExtResObj.imageHeight[0]));
} }
// CUDA clean up buffers used **with** NvSci API. // CUDA clean up buffers used **with** NvSci API.
void cleanupCuda(cudaExternalResInterop& cudaExtResObj) void cleanupCuda(cudaExternalResInterop &cudaExtResObj) {
{
for (int i = 0; i < cudaExtResObj.planeCount; i++) { for (int i = 0; i < cudaExtResObj.planeCount; i++) {
checkCudaErrors(cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i])); checkCudaErrors(
cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i])); checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
} }
free(cudaExtResObj.d_mipmapArray); free(cudaExtResObj.d_mipmapArray);
@ -271,58 +305,69 @@ void cleanupCuda(cudaExternalResInterop& cudaExtResObj)
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage)); checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
} }
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *cudaWaitFence, void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *cudaSignalFence, int deviceId, int iterations) NvSciSyncFence *cudaWaitFence,
{ NvSciSyncFence *cudaSignalFence, int deviceId,
int iterations) {
checkCudaErrors(cudaSetDevice(deviceId)); checkCudaErrors(cudaSetDevice(deviceId));
static int64_t launch = 0; static int64_t launch = 0;
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence, cudaExtResObj.stream); waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence,
cudaExtResObj.stream);
// run cuda kernel over surface object of the LUMA surface part to extract grayscale. // run cuda kernel over surface object of the LUMA surface part to extract
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0], cudaExtResObj.imageHeight[0]); // grayscale.
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0],
cudaExtResObj.imageHeight[0]);
// signal fence till the second last iterations for NvMedia2DBlit to wait for cuda signal // signal fence till the second last iterations for NvMedia2DBlit to wait for
// and for final iteration as there is no corresponding NvMedia operation pending // cuda signal and for final iteration as there is no corresponding NvMedia
// therefore we end with cudaStreamSynchronize() // operation pending therefore we end with cudaStreamSynchronize()
if (launch < iterations - 1) { if (launch < iterations - 1) {
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence, cudaExtResObj.stream); signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence,
} cudaExtResObj.stream);
else { } else {
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream)); checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
} }
launch++; launch++;
} }
// CUDA imports and operates on NvSci buffer/synchronization objects // CUDA imports and operates on NvSci buffer/synchronization objects
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) {
{
checkCudaErrors(cudaSetDevice(deviceId)); checkCudaErrors(cudaSetDevice(deviceId));
cudaResObj.d_yuvArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * ctx->numSurfaces); cudaResObj.d_yuvArray =
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t*) malloc(sizeof(cudaSurfaceObject_t) * ctx->numSurfaces); (cudaArray_t *)malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
cudaChannelFormatDesc channelDesc; cudaChannelFormatDesc channelDesc;
switch (ctx->bytesPerPixel) { switch (ctx->bytesPerPixel) {
case 1: case 1:
default: default:
channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned); channelDesc =
cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
break; break;
} }
for (int k = 0; k < ctx->numSurfaces; k++) { for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMallocArray(&cudaResObj.d_yuvArray[k], &channelDesc, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, checkCudaErrors(cudaMallocArray(
&cudaResObj.d_yuvArray[k], &channelDesc,
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k])); ctx->heightSurface * ctx->yScalePtr[k]));
cudaResObj.cudaSurfaceNvmediaBuf[k] = createCudaSurface(cudaResObj.d_yuvArray[k]); cudaResObj.cudaSurfaceNvmediaBuf[k] =
createCudaSurface(cudaResObj.d_yuvArray[k]);
} }
checkCudaErrors(cudaMalloc(&cudaResObj.d_outputImage, sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface)); checkCudaErrors(cudaMalloc(
&cudaResObj.d_outputImage,
sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
cudaResObj.stream = createCudaStream(deviceId); cudaResObj.stream = createCudaStream(deviceId);
} }
// CUDA clean up buffers used **without** NvSci API. // CUDA clean up buffers used **without** NvSci API.
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) {
{
for (int k = 0; k < ctx->numSurfaces; k++) { for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k])); checkCudaErrors(
cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k])); checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
} }
@ -332,25 +377,33 @@ void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj)
checkCudaErrors(cudaFree(cudaResObj.d_outputImage)); checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
} }
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj, int deviceId, int32_t imageWidth, int32_t imageHeight) static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj,
{ int deviceId, int32_t imageWidth,
int32_t imageHeight) {
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage; unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth)); checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif #endif
dim3 block(16, 16, 1); dim3 block(16, 16, 1);
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1); dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth, imageHeight); yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(
cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth,
imageHeight);
#if WRITE_OUTPUT_IMAGE #if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaResObj.stream)); checkCudaErrors(
cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage,
sizeof(unsigned int) * imageHeight * imageWidth,
cudaMemcpyDeviceToHost, cudaResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream)); checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
char outputFilename[1024]; char outputFilename[1024];
std::string image_filename = "Grayscale"; std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str()); strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm"); strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight); sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename); printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage)); checkCudaErrors(cudaFreeHost(h_dstImage));
#else #else
@ -359,14 +412,17 @@ static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj, int devi
} }
// CUDA operates **without** NvSci APIs buffer/synchronization objects. // CUDA operates **without** NvSci APIs buffer/synchronization objects.
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj,
{ int deviceId) {
for (int k = 0; k < ctx->numSurfaces; k++) { for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMemcpy2DToArray(cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k], checkCudaErrors(cudaMemcpy2DToArray(
cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice)); ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
} }
// run cuda kernel over surface object of the LUMA surface part to extract grayscale. // run cuda kernel over surface object of the LUMA surface part to extract
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface, ctx->heightSurface); // grayscale.
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface,
ctx->heightSurface);
} }

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __CUDA_BUFIMPORT_KERNEL_H__ #ifndef __CUDA_BUFIMPORT_KERNEL_H__
#define __CUDA_BUFIMPORT_KERNEL_H__ #define __CUDA_BUFIMPORT_KERNEL_H__
@ -35,8 +34,7 @@
#include "nvscisync.h" #include "nvscisync.h"
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
struct cudaExternalResInterop struct cudaExternalResInterop {
{
cudaMipmappedArray_t *d_mipmapArray; cudaMipmappedArray_t *d_mipmapArray;
cudaArray_t *d_mipLevelArray; cudaArray_t *d_mipLevelArray;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf; cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
@ -46,25 +44,27 @@ struct cudaExternalResInterop
cudaExternalSemaphore_t signalSem; cudaExternalSemaphore_t signalSem;
int32_t planeCount; int32_t planeCount;
uint64_t *planeOffset;
int32_t *imageWidth; int32_t *imageWidth;
int32_t *imageHeight; int32_t *imageHeight;
unsigned int *d_outputImage; unsigned int *d_outputImage;
}; };
struct cudaResources struct cudaResources {
{
cudaArray_t *d_yuvArray; cudaArray_t *d_yuvArray;
cudaStream_t stream; cudaStream_t stream;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf; cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
unsigned int *d_outputImage; unsigned int *d_outputImage;
}; };
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *fence, void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *cudaSignalfence, int deviceId, int iterations); NvSciSyncFence *fence, NvSciSyncFence *cudaSignalfence,
int deviceId, int iterations);
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId); void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj, void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId); NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
int deviceId);
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId); void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void cleanupCuda(cudaExternalResInterop &cudaObjs); void cleanupCuda(cudaExternalResInterop &cudaObjs);
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj); void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj);

View File

@ -57,8 +57,7 @@
} \ } \
} while (0) } while (0)
static void cleanup(Blit2DTest* ctx, NvMediaStatus status) static void cleanup(Blit2DTest* ctx, NvMediaStatus status) {
{
if (ctx->i2d != NULL) { if (ctx->i2d != NULL) {
NvMedia2DDestroy(ctx->i2d); NvMedia2DDestroy(ctx->i2d);
} }
@ -71,9 +70,7 @@ static void cleanup(Blit2DTest* ctx, NvMediaStatus status)
} }
} }
int main(int argc, char* argv[]) {
int main (int argc, char *argv[])
{
TestArgs args; TestArgs args;
Blit2DTest ctx; Blit2DTest ctx;
NvMediaStatus status = NVMEDIA_STATUS_ERROR; NvMediaStatus status = NVMEDIA_STATUS_ERROR;
@ -89,8 +86,9 @@ int main (int argc, char *argv[])
memset(&args, 0, sizeof(TestArgs)); memset(&args, 0, sizeof(TestArgs));
memset(&ctx, 0, sizeof(Blit2DTest)); memset(&ctx, 0, sizeof(Blit2DTest));
/* ParseArgs parses the command line and the 2D configuration file and populates all initParams /* ParseArgs parses the command line and the 2D configuration file and
* and run time configuration in to appropriate structures within args * populates all initParams and run time configuration in to appropriate
* structures within args
*/ */
if (ParseArgs(argc, argv, &args)) { if (ParseArgs(argc, argv, &args)) {
PrintUsage(); PrintUsage();
@ -101,7 +99,8 @@ int main (int argc, char *argv[])
status = NvMedia2DGetVersion(&version); status = NvMedia2DGetVersion(&version);
if (status == NVMEDIA_STATUS_OK) { if (status == NVMEDIA_STATUS_OK) {
printf("Library version: %u.%u\n", version.major, version.minor); printf("Library version: %u.%u\n", version.major, version.minor);
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR, NVMEDIA_2D_VERSION_MINOR); printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR,
NVMEDIA_2D_VERSION_MINOR);
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) || if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
(version.minor != NVMEDIA_2D_VERSION_MINOR)) { (version.minor != NVMEDIA_2D_VERSION_MINOR)) {
printf("Library and Header mismatch!\n"); printf("Library and Header mismatch!\n");
@ -132,8 +131,7 @@ int main (int argc, char *argv[])
setupCuda(&ctx, cudaResObj, cudaDeviceId); setupCuda(&ctx, cudaResObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime); GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) for (int i = 0; i < args.iterations; i++) {
{
runNvMediaBlit2D(&args, &ctx); runNvMediaBlit2D(&args, &ctx);
runCudaOperation(&ctx, cudaResObj, cudaDeviceId); runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
} }
@ -145,9 +143,15 @@ int main (int argc, char *argv[])
// NvMedia-CUDA operations without NvSCI APIs ends // NvMedia-CUDA operations without NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0; processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
"Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs "
"%.4f ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0; processingTime = (double)(endTime - startTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"without NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj; NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj; NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
@ -156,14 +160,17 @@ int main (int argc, char *argv[])
GetTimeMicroSec(&startTime); GetTimeMicroSec(&startTime);
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId); setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId); setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId); setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj,
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId); nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj,
cudaSignalerSyncObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime); GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) for (int i = 0; i < args.iterations; i++) {
{ runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence,
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence, &nvMediaSignalerFence); &nvMediaSignalerFence);
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence, cudaDeviceId, args.iterations); runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence,
cudaDeviceId, args.iterations);
} }
GetTimeMicroSec(&operationEndTime); GetTimeMicroSec(&operationEndTime);
@ -177,9 +184,15 @@ int main (int argc, char *argv[])
// NvMedia-CUDA operations via interop with NvSCI APIs ends // NvMedia-CUDA operations via interop with NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0; processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
"Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f "
"ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0; processingTime = (double)(endTime - startTime) / 1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations); printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"with NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
if (ctx.i2d != NULL) { if (ctx.i2d != NULL) {
NvMedia2DDestroy(ctx.i2d); NvMedia2DDestroy(ctx.i2d);
@ -191,8 +204,7 @@ int main (int argc, char *argv[])
if (status == NVMEDIA_STATUS_OK) { if (status == NVMEDIA_STATUS_OK) {
return 0; return 0;
} } else {
else {
return 1; return 1;
} }
} }

View File

@ -38,16 +38,12 @@
#include "nvmedia_2d_nvscisync.h" #include "nvmedia_2d_nvscisync.h"
#include "nvsci_setup.h" #include "nvsci_setup.h"
NvMediaImage * NvMediaImage *NvMediaImageCreateUsingNvScibuf(NvMediaDevice *device,
NvMediaImageCreateUsingNvScibuf(
NvMediaDevice *device,
NvMediaSurfaceType type, NvMediaSurfaceType type,
const NvMediaSurfAllocAttr *attrs, const NvMediaSurfAllocAttr *attrs,
uint32_t numAttrs, uint32_t numAttrs, uint32_t flags,
uint32_t flags,
NvSciBufObj &bufobj, NvSciBufObj &bufobj,
int cudaDeviceId) int cudaDeviceId) {
{
NvSciBufModule module = NULL; NvSciBufModule module = NULL;
NvSciError err = NvSciError_Success; NvSciError err = NvSciError_Success;
NvMediaStatus status = NVMEDIA_STATUS_OK; NvMediaStatus status = NVMEDIA_STATUS_OK;
@ -55,8 +51,7 @@ NvMediaImageCreateUsingNvScibuf(
NvSciBufAttrList conflictlist = NULL; NvSciBufAttrList conflictlist = NULL;
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite; NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm, NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
&access_perm, &access_perm, sizeof(access_perm)};
sizeof(access_perm)};
NvSciBufAttrKeyValuePair pairArrayOut[10]; NvSciBufAttrKeyValuePair pairArrayOut[10];
NvMediaImage *image = NULL; NvMediaImage *image = NULL;
@ -79,13 +74,8 @@ NvMediaImageCreateUsingNvScibuf(
goto fail_cleanup; goto fail_cleanup;
} }
status = NvMediaImageFillNvSciBufAttrs(device, status =
type, NvMediaImageFillNvSciBufAttrs(device, type, attrs, numAttrs, 0, attrlist);
attrs,
numAttrs,
0,
attrlist);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err); printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
@ -94,9 +84,7 @@ NvMediaImageCreateUsingNvScibuf(
setupNvSciBuf(bufobj, attrlist, cudaDeviceId); setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
status = NvMediaImageCreateFromNvSciBuf(device, status = NvMediaImageCreateFromNvSciBuf(device, bufobj, &image);
bufobj,
&image);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err); printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
@ -130,26 +118,21 @@ fail_cleanup:
/* Create NvMediaImage surface based on the input attributes. /* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success * Returns NVMEDIA_STATUS_OK on success
*/ */
static NvMediaStatus static NvMediaStatus createSurface(Blit2DTest *ctx,
createSurface(Blit2DTest *ctx,
NvMediaSurfFormatAttr *surfFormatAttrs, NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs, NvMediaSurfAllocAttr *surfAllocAttrs,
uint32_t numSurfAllocAttrs, uint32_t numSurfAllocAttrs,
NvMediaImage **image, NvMediaImage **image, NvSciBufObj &bufObj,
NvSciBufObj &bufObj, int cudaDeviceId) {
int cudaDeviceId)
{
NvMediaSurfaceType surfType; NvMediaSurfaceType surfType;
/* create source image */ /* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX); surfType =
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */ *image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
surfType, /* surface type */ surfType, /* surface type */
surfAllocAttrs, surfAllocAttrs, numSurfAllocAttrs, 0,
numSurfAllocAttrs, bufObj, cudaDeviceId);
0,
bufObj,
cudaDeviceId);
if (*image == NULL) { if (*image == NULL) {
printf("Unable to create image\n"); printf("Unable to create image\n");
@ -158,7 +141,8 @@ createSurface(Blit2DTest *ctx,
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value); InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n", /* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/ __func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
@ -166,19 +150,18 @@ createSurface(Blit2DTest *ctx,
/* Create NvMediaImage surface based on the input attributes. /* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success * Returns NVMEDIA_STATUS_OK on success
*/ */
static NvMediaStatus static NvMediaStatus createSurfaceNonNvSCI(
createSurfaceNonNvSCI(Blit2DTest *ctx, Blit2DTest *ctx, NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfFormatAttr *surfFormatAttrs, NvMediaSurfAllocAttr *surfAllocAttrs, uint32_t numSurfAllocAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs, NvMediaImage **image) {
uint32_t numSurfAllocAttrs,
NvMediaImage **image)
{
NvMediaSurfaceType surfType; NvMediaSurfaceType surfType;
/* create source image */ /* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX); surfType =
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs, numSurfAllocAttrs, 0); *image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs,
numSurfAllocAttrs, 0);
if (*image == NULL) { if (*image == NULL) {
printf("Unable to create image\n"); printf("Unable to create image\n");
@ -187,21 +170,18 @@ createSurfaceNonNvSCI(Blit2DTest *ctx,
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value); InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n", /* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/ __func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
static void destroySurface(NvMediaImage *image) { NvMediaImageDestroy(image); }
static void destroySurface(NvMediaImage *image) static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs *args,
{ NvSciSyncObj &nvMediaSignalerSyncObj,
NvMediaImageDestroy(image); NvSciSyncFence *preSyncFence,
} NvSciSyncFence *fence) {
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &nvMediaSignalerSyncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence)
{
NvMediaStatus status; NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap; NvMediaImageSurfaceMap surfaceMap;
@ -219,26 +199,29 @@ static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &
return status; return status;
} }
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) { if ((args->srcRect.x1 <= args->srcRect.x0) ||
(args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL; ctx->srcRect = NULL;
} else { } else {
ctx->srcRect = &(args->srcRect); ctx->srcRect = &(args->srcRect);
} }
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) { if ((args->dstRect.x1 <= args->dstRect.x0) ||
(args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL; ctx->dstRect = NULL;
} else { } else {
ctx->dstRect = &(args->dstRect); ctx->dstRect = &(args->dstRect);
} }
static int64_t launch = 0; static int64_t launch = 0;
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to wait // Start inserting pre-fence from second launch inorder to for NvMedia2Blit to
// wait
// for cuda signal on fence. // for cuda signal on fence.
if (launch) if (launch) {
{
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence); status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status); printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
status);
return status; return status;
} }
NvSciSyncFenceClear(preSyncFence); NvSciSyncFenceClear(preSyncFence);
@ -247,7 +230,8 @@ static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj); status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status); printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
status);
return status; return status;
} }
@ -265,7 +249,8 @@ static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &
return status; return status;
} }
status = NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence); status =
NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status); printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
return status; return status;
@ -274,8 +259,7 @@ static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args) static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs *args) {
{
NvMediaStatus status; NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap; NvMediaImageSurfaceMap surfaceMap;
@ -293,13 +277,15 @@ static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args)
return status; return status;
} }
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) { if ((args->srcRect.x1 <= args->srcRect.x0) ||
(args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL; ctx->srcRect = NULL;
} else { } else {
ctx->srcRect = &(args->srcRect); ctx->srcRect = &(args->srcRect);
} }
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) { if ((args->dstRect.x1 <= args->dstRect.x0) ||
(args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL; ctx->dstRect = NULL;
} else { } else {
ctx->dstRect = &(args->dstRect); ctx->dstRect = &(args->dstRect);
@ -320,16 +306,13 @@ static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args)
/* Write output image into buffer */ /* Write output image into buffer */
ctx->bytesPerPixel = 1; ctx->bytesPerPixel = 1;
WriteImageToAllocatedBuffer(ctx, ctx->dstImage, WriteImageToAllocatedBuffer(ctx, ctx->dstImage, NVMEDIA_TRUE, NVMEDIA_FALSE,
NVMEDIA_TRUE,
NVMEDIA_FALSE,
ctx->bytesPerPixel); ctx->bytesPerPixel);
return NVMEDIA_STATUS_OK; return NVMEDIA_STATUS_OK;
} }
static void cleanup(Blit2DTest* ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) static void cleanup(Blit2DTest *ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) {
{
if (ctx->srcImage != NULL) { if (ctx->srcImage != NULL) {
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage); NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
destroySurface(ctx->srcImage); destroySurface(ctx->srcImage);
@ -343,8 +326,8 @@ static void cleanup(Blit2DTest* ctx, NvMediaStatus status = NVMEDIA_STATUS_OK)
} }
} }
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj) void cleanupNvMedia(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ NvSciSyncObj &preSyncObj) {
NvMediaStatus status; NvMediaStatus status;
cleanup(ctx); cleanup(ctx);
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj); status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
@ -360,8 +343,7 @@ void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyn
NvMediaImageNvSciBufDeinit(); NvMediaImageNvSciBufDeinit();
} }
void cleanupNvMedia(Blit2DTest* ctx) void cleanupNvMedia(Blit2DTest *ctx) {
{
cleanup(ctx); cleanup(ctx);
free(ctx->dstBuffPitches); free(ctx->dstBuffPitches);
free(ctx->dstBuffer); free(ctx->dstBuffer);
@ -369,9 +351,8 @@ void cleanupNvMedia(Blit2DTest* ctx)
} }
void setupNvMedia(TestArgs *args, Blit2DTest *ctx, NvSciBufObj &srcNvSciBufobj, void setupNvMedia(TestArgs *args, Blit2DTest *ctx, NvSciBufObj &srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj, NvSciBufObj &dstNvSciBufobj, NvSciSyncObj &syncObj,
int cudaDeviceId) NvSciSyncObj &preSyncObj, int cudaDeviceId) {
{
NvMediaStatus status; NvMediaStatus status;
status = NvMediaImageNvSciBufInit(); status = NvMediaImageNvSciBufInit();
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
@ -380,39 +361,33 @@ void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
} }
// Create source surface // Create source surface
status = createSurface(ctx, status = createSurface(ctx, args->srcSurfFormatAttrs, args->srcSurfAllocAttrs,
args->srcSurfFormatAttrs, args->numSurfAllocAttrs, &ctx->srcImage,
args->srcSurfAllocAttrs, srcNvSciBufobj, cudaDeviceId);
args->numSurfAllocAttrs,
&ctx->srcImage,
srcNvSciBufobj,
cudaDeviceId);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__); printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Create destination surface // Create destination surface
status = createSurface(ctx, status = createSurface(ctx, args->dstSurfFormatAttrs, args->dstSurfAllocAttrs,
args->dstSurfFormatAttrs, args->numSurfAllocAttrs, &ctx->dstImage,
args->dstSurfAllocAttrs, dstNvSciBufobj, cudaDeviceId);
args->numSurfAllocAttrs,
&ctx->dstImage,
dstNvSciBufobj,
cudaDeviceId);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__); printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Register source Surface // Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ); status =
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__); printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Register destination Surface // Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE); status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
NVMEDIA_ACCESS_MODE_READ_WRITE);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__); printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
@ -423,48 +398,46 @@ void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__); printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
} }
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj); status =
NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__); printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
} }
} }
// Create NvMedia src & dst image without NvSciBuf // Create NvMedia src & dst image without NvSciBuf
void setupNvMedia(TestArgs* args, Blit2DTest* ctx) void setupNvMedia(TestArgs *args, Blit2DTest *ctx) {
{
NvMediaStatus status; NvMediaStatus status;
// Create source surface // Create source surface
status = createSurfaceNonNvSCI(ctx, status = createSurfaceNonNvSCI(ctx, args->srcSurfFormatAttrs,
args->srcSurfFormatAttrs,
args->srcSurfAllocAttrs, args->srcSurfAllocAttrs,
args->numSurfAllocAttrs, args->numSurfAllocAttrs, &ctx->srcImage);
&ctx->srcImage);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__); printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Create destination surface // Create destination surface
status = createSurfaceNonNvSCI(ctx, status = createSurfaceNonNvSCI(ctx, args->dstSurfFormatAttrs,
args->dstSurfFormatAttrs,
args->dstSurfAllocAttrs, args->dstSurfAllocAttrs,
args->numSurfAllocAttrs, args->numSurfAllocAttrs, &ctx->dstImage);
&ctx->dstImage);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__); printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Register source Surface // Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ); status =
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__); printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
} }
// Register destination Surface // Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE); status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
NVMEDIA_ACCESS_MODE_READ_WRITE);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__); printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);
@ -472,14 +445,11 @@ void setupNvMedia(TestArgs* args, Blit2DTest* ctx)
// Allocate buffer for writing image & set image parameters in Blit2DTest. // Allocate buffer for writing image & set image parameters in Blit2DTest.
ctx->bytesPerPixel = 1; ctx->bytesPerPixel = 1;
AllocateBufferToWriteImage(ctx, AllocateBufferToWriteImage(ctx, ctx->dstImage, NVMEDIA_TRUE, /* uvOrderFlag */
ctx->dstImage,
NVMEDIA_TRUE, /* uvOrderFlag */
NVMEDIA_FALSE); /* appendFlag */ NVMEDIA_FALSE); /* appendFlag */
} }
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx) void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx) {
{
// Blit2D function // Blit2D function
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args); NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
@ -488,11 +458,12 @@ void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx)
} }
} }
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &nvMediaSignalerSyncObj, void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) NvSciSyncObj &nvMediaSignalerSyncObj,
{ NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) {
// Blit2D function // Blit2D function
NvMediaStatus status = blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence); NvMediaStatus status =
blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: Blit2D failed\n", __func__); printf("%s: Blit2D failed\n", __func__);
cleanup(ctx, status); cleanup(ctx, status);

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __NVMEDIA_PRODUCER_H__ #ifndef __NVMEDIA_PRODUCER_H__
#define __NVMEDIA_PRODUCER_H__ #define __NVMEDIA_PRODUCER_H__
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
@ -40,9 +39,10 @@ void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &syncObj,
NvSciSyncFence* preSyncFence, NvSciSyncFence* fence); NvSciSyncFence* preSyncFence, NvSciSyncFence* fence);
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx); void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj& srcNvSciBufobj, void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj& srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj, NvSciBufObj& dstNvSciBufobj, NvSciSyncObj& syncObj,
int cudaDeviceId); NvSciSyncObj& preSyncObj, int cudaDeviceId);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx); void setupNvMedia(TestArgs* args, Blit2DTest* ctx);
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj); void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj& syncObj,
NvSciSyncObj& preSyncObj);
void cleanupNvMedia(Blit2DTest* ctx); void cleanupNvMedia(Blit2DTest* ctx);
#endif #endif

View File

@ -32,7 +32,6 @@
#include "nvsci_setup.h" #include "nvsci_setup.h"
#include "nvmedia_2d_nvscisync.h" #include "nvmedia_2d_nvscisync.h"
#define checkNvSciErrors(call) \ #define checkNvSciErrors(call) \
do { \ do { \
NvSciError _status = call; \ NvSciError _status = call; \
@ -46,8 +45,8 @@
} \ } \
} while (0) } while (0)
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId) void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ int cudaDeviceId) {
NvSciSyncModule sciSyncModule; NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule)); checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList; NvSciSyncAttrList signalerAttrList, waiterAttrList;
@ -57,18 +56,21 @@ void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int c
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER); NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(
ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__); printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
checkCudaErrors(cudaSetDevice(cudaDeviceId)); checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId, cudaNvSciSyncAttrWait)); checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId,
cudaNvSciSyncAttrWait));
syncUnreconciledList[0] = signalerAttrList; syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList; syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList)); checkNvSciErrors(NvSciSyncAttrListReconcile(
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj)); checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList); NvSciSyncAttrListFree(signalerAttrList);
@ -78,8 +80,8 @@ void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int c
} }
} }
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId) void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
{ int cudaDeviceId) {
NvSciSyncModule sciSyncModule; NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule)); checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList; NvSciSyncAttrList signalerAttrList, waiterAttrList;
@ -89,18 +91,21 @@ void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cuda
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList)); checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER); NvMediaStatus status =
NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
if (status != NVMEDIA_STATUS_OK) { if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__); printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
checkCudaErrors(cudaSetDevice(cudaDeviceId)); checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal)); checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(
signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
syncUnreconciledList[0] = signalerAttrList; syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList; syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList)); checkNvSciErrors(NvSciSyncAttrListReconcile(
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj)); checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList); NvSciSyncAttrListFree(signalerAttrList);
@ -110,9 +115,8 @@ void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cuda
} }
} }
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId) int cudaDeviceId) {
{
CUuuid devUUID; CUuuid devUUID;
NvSciBufAttrList conflictlist; NvSciBufAttrList conflictlist;
NvSciBufAttrList bufUnreconciledAttrlist[1]; NvSciBufAttrList bufUnreconciledAttrlist[1];
@ -123,31 +127,30 @@ void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int c
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId, &devUUID, sizeof(devUUID)}; NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId,
&devUUID, sizeof(devUUID)};
// set CUDA GPU ID to attribute list // set CUDA GPU ID to attribute list
checkNvSciErrors(NvSciBufAttrListSetAttrs(nvmediaAttrlist, attr_gpuid, sizeof(attr_gpuid)/sizeof(NvSciBufAttrKeyValuePair))); checkNvSciErrors(NvSciBufAttrListSetAttrs(
nvmediaAttrlist, attr_gpuid,
sizeof(attr_gpuid) / sizeof(NvSciBufAttrKeyValuePair)));
bufUnreconciledAttrlist[0] = nvmediaAttrlist; bufUnreconciledAttrlist[0] = nvmediaAttrlist;
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(bufUnreconciledAttrlist, checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(
1, bufUnreconciledAttrlist, 1, &bufobj, &conflictlist));
&bufobj,
&conflictlist));
if (conflictlist != NULL) { if (conflictlist != NULL) {
NvSciBufAttrListFree(conflictlist); NvSciBufAttrListFree(conflictlist);
} }
} }
void cleanupNvSciBuf(NvSciBufObj &Bufobj) void cleanupNvSciBuf(NvSciBufObj &Bufobj) {
{
if (Bufobj != NULL) { if (Bufobj != NULL) {
NvSciBufObjFree(Bufobj); NvSciBufObjFree(Bufobj);
} }
} }
void cleanupNvSciSync(NvSciSyncObj &syncObj) void cleanupNvSciSync(NvSciSyncObj &syncObj) {
{
if (NvSciSyncObjFree != NULL) { if (NvSciSyncObjFree != NULL) {
NvSciSyncObjFree(syncObj); NvSciSyncObjFree(syncObj);
} }

View File

@ -25,16 +25,18 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef __NVSCI_SETUP_H__ #ifndef __NVSCI_SETUP_H__
#define __NVSCI_SETUP_H__ #define __NVSCI_SETUP_H__
#include "nvmedia_utils/cmdline.h" #include "nvmedia_utils/cmdline.h"
#include <nvscibuf.h> #include <nvscibuf.h>
#include <nvscisync.h> #include <nvscisync.h>
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId); void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId); int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId); void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
int cudaDeviceId);
void cleanupNvSciBuf(NvSciBufObj &Bufobj); void cleanupNvSciBuf(NvSciBufObj &Bufobj);
void cleanupNvSciSync(NvSciSyncObj &syncObj); void cleanupNvSciSync(NvSciSyncObj &syncObj);
#endif #endif