cudaNvSciNvMedia plane offset correction

This commit is contained in:
Rutwik Choughule 2021-06-10 17:33:24 +05:30
parent ba5a483c6e
commit 2aeaf51b11
7 changed files with 907 additions and 863 deletions

View File

@ -36,7 +36,6 @@
// Enable this to 1 if require cuda processed output to ppm file.
#define WRITE_OUTPUT_IMAGE 0
#define checkNvSciErrors(call) \
do { \
NvSciError _status = call; \
@ -48,325 +47,382 @@
fflush(stdout); \
exit(EXIT_FAILURE); \
} \
} while (0)
} while (0)
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject, unsigned int *dstImage, int32_t imageWidth, int32_t imageHeight)
{
size_t x = blockIdx.x*blockDim.x + threadIdx.x;
size_t y = blockIdx.y*blockDim.y + threadIdx.y;
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject,
unsigned int *dstImage,
int32_t imageWidth, int32_t imageHeight) {
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
uchar4* dstImageUchar4 = (uchar4*)dstImage;
for ( ; x < imageWidth && y < imageHeight; x += gridDim.x*blockDim.x, y += gridDim.y*blockDim.y)
{
int colInBytes = x * sizeof(unsigned char);
unsigned char luma = surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
uchar4 *dstImageUchar4 = (uchar4 *)dstImage;
for (; x < imageWidth && y < imageHeight;
x += gridDim.x * blockDim.x, y += gridDim.y * blockDim.y) {
int colInBytes = x * sizeof(unsigned char);
unsigned char luma =
surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
dstImageUchar4[y*imageWidth + x] = grayscalePix;
}
dstImageUchar4[y * imageWidth + x] = grayscalePix;
}
}
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem, NvSciSyncObj &syncObj)
{
cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem,
NvSciSyncObj &syncObj) {
cudaExternalSemaphoreHandleDesc extSemDesc;
memset(&extSemDesc, 0, sizeof(extSemDesc));
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
}
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem, NvSciSyncFence *fence,
cudaStream_t stream) {
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
waitParams.params.nvSciSync.fence = (void *)fence;
waitParams.flags = 0;
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem,
NvSciSyncFence *fence, cudaStream_t stream) {
cudaExternalSemaphoreWaitParams waitParams;
memset(&waitParams, 0, sizeof(waitParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
waitParams.params.nvSciSync.fence = (void *)fence;
waitParams.flags = 0;
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
checkCudaErrors(
cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
}
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem, NvSciSyncFence *fence,
cudaStream_t stream) {
cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
signalParams.params.nvSciSync.fence = (void *)fence;
signalParams.flags = 0;
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem,
NvSciSyncFence *fence,
cudaStream_t stream) {
cudaExternalSemaphoreSignalParams signalParams;
memset(&signalParams, 0, sizeof(signalParams));
// For cross-process signaler-waiter applications need to use NvSciIpc
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
// across process. This step is optional in single-process.
signalParams.params.nvSciSync.fence = (void *)fence;
signalParams.flags = 0;
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams,
1, stream));
checkCudaErrors(
cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, 1, stream));
}
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop& cudaExtResObj, int32_t imageWidth, int32_t imageHeight)
{
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop &cudaExtResObj,
int32_t imageWidth, int32_t imageHeight) {
#if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth));
unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif
dim3 block(16, 16, 1);
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1);
dim3 block(16, 16, 1);
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage, imageWidth, imageHeight);
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(
cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage,
imageWidth, imageHeight);
#if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaExtResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
char outputFilename[1024];
std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
checkCudaErrors(
cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage,
sizeof(unsigned int) * imageHeight * imageWidth,
cudaMemcpyDeviceToHost, cudaExtResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
char outputFilename[1024];
std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
#endif
}
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBufObj& inputBufObj)
{
NvSciBufModule module = NULL;
NvSciBufAttrList attrlist = NULL;
NvSciBufAttrKeyValuePair pairArrayOut[10];
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj,
NvSciBufObj &inputBufObj) {
NvSciBufModule module = NULL;
NvSciBufAttrList attrlist = NULL;
NvSciBufAttrKeyValuePair pairArrayOut[10];
checkNvSciErrors(NvSciBufModuleOpen(&module));
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist));
checkNvSciErrors(NvSciBufModuleOpen(&module));
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist));
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist));
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
int numAttrs = 0;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
int numAttrs = 0;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneOffset;
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
cudaExtResObj.imageWidth = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount);
cudaExtResObj.imageHeight = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount);
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
cudaExtResObj.imageWidth =
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.imageHeight =
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
cudaExtResObj.planeOffset =
(uint64_t *)malloc(sizeof(uint64_t) * cudaExtResObj.planeCount);
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value, cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value, cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value,
cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value,
cudaExtResObj.planeCount * sizeof(int32_t));
memcpy(cudaExtResObj.planeOffset, (uint64_t *)pairArrayOut[7].value,
cudaExtResObj.planeCount * sizeof(uint64_t));
NvSciBufAttrValImageLayoutType layout = *(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
uint32_t bitsPerPixel = *(uint32_t*)pairArrayOut[6].value;
NvSciBufAttrValImageLayoutType layout =
*(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
uint32_t bitsPerPixel = *(uint32_t *)pairArrayOut[6].value;
if (layout != NvSciBufImage_BlockLinearType) {
printf("Image layout is not block linear.. waiving execution\n");
exit(EXIT_WAIVED);
if (layout != NvSciBufImage_BlockLinearType) {
printf("Image layout is not block linear.. waiving execution\n");
exit(EXIT_WAIVED);
}
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(
cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t *)malloc(
sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
cudaExtent extent = {};
memset(&extent, 0, sizeof(extent));
extent.width = cudaExtResObj.imageWidth[i];
extent.height = cudaExtResObj.imageHeight[i];
extent.depth = 0;
cudaChannelFormatDesc desc;
switch (channelCount) {
case 1:
default:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0,
cudaChannelFormatKindUnsigned);
break;
case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0,
cudaChannelFormatKindUnsigned);
break;
case 3:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
0, cudaChannelFormatKindUnsigned);
break;
case 4:
desc =
cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
bitsPerPixel, cudaChannelFormatKindUnsigned);
break;
}
cudaExternalMemoryHandleDesc memHandleDesc;
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = inputBufObj;
memHandleDesc.size = size;
checkCudaErrors(cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t*) malloc(sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
cudaExtent extent = {};
memset(&extent, 0, sizeof(extent));
extent.width = cudaExtResObj.imageWidth[i];
extent.height = cudaExtResObj.imageHeight[i];
extent.depth = 0;
cudaChannelFormatDesc desc;
switch (channelCount) {
case 1:
default:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0, cudaChannelFormatKindUnsigned);
break;
case 3:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, 0, cudaChannelFormatKindUnsigned);
break;
case 4:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, bitsPerPixel, cudaChannelFormatKindUnsigned);
break;
}
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
mipmapDesc.offset = 0;
mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent;
mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf, &mipmapDesc));
}
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
mipmapDesc.offset = cudaExtResObj.planeOffset[i];
mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent;
mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1;
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(
&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf,
&mipmapDesc));
}
}
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray)
{
cudaResourceDesc resourceDesc;
memset(&resourceDesc, 0, sizeof(resourceDesc));
resourceDesc.resType = cudaResourceTypeArray;
resourceDesc.res.array.array = d_mipLevelArray;
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) {
cudaResourceDesc resourceDesc;
memset(&resourceDesc, 0, sizeof(resourceDesc));
resourceDesc.resType = cudaResourceTypeArray;
resourceDesc.res.array.array = d_mipLevelArray;
cudaSurfaceObject_t surfaceObject;
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
return surfaceObject;
cudaSurfaceObject_t surfaceObject;
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
return surfaceObject;
}
static cudaStream_t createCudaStream(int deviceId)
{
checkCudaErrors(cudaSetDevice(deviceId));
cudaStream_t stream;
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
return stream;
static cudaStream_t createCudaStream(int deviceId) {
checkCudaErrors(cudaSetDevice(deviceId));
cudaStream_t stream;
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
return stream;
}
// CUDA setup buffers/synchronization objects for interop via NvSci API.
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId)
{
checkCudaErrors(cudaSetDevice(deviceId));
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
int deviceId) {
checkCudaErrors(cudaSetDevice(deviceId));
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
cudaImportNvSciImage(cudaExtResObj, inputBufObj);
cudaExtResObj.d_mipLevelArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *) malloc(sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
cudaImportNvSciImage(cudaExtResObj, inputBufObj);
cudaExtResObj.d_mipLevelArray =
(cudaArray_t *)malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
uint32_t mipLevelId = 0;
checkCudaErrors(cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i], cudaExtResObj.d_mipmapArray[i], mipLevelId));
cudaExtResObj.cudaSurfaceNvmediaBuf[i] = createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
}
for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
uint32_t mipLevelId = 0;
checkCudaErrors(
cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i],
cudaExtResObj.d_mipmapArray[i], mipLevelId));
cudaExtResObj.cudaSurfaceNvmediaBuf[i] =
createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
}
cudaExtResObj.stream = createCudaStream(deviceId);
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage, sizeof(unsigned int) * cudaExtResObj.imageWidth[0] * cudaExtResObj.imageHeight[0]));
cudaExtResObj.stream = createCudaStream(deviceId);
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage,
sizeof(unsigned int) *
cudaExtResObj.imageWidth[0] *
cudaExtResObj.imageHeight[0]));
}
// CUDA clean up buffers used **with** NvSci API.
void cleanupCuda(cudaExternalResInterop& cudaExtResObj)
{
for (int i=0; i < cudaExtResObj.planeCount; i++) {
checkCudaErrors(cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
}
free(cudaExtResObj.d_mipmapArray);
free(cudaExtResObj.d_mipLevelArray);
free(cudaExtResObj.cudaSurfaceNvmediaBuf);
free(cudaExtResObj.imageWidth);
free(cudaExtResObj.imageHeight);
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem));
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem));
checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf));
checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream));
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
void cleanupCuda(cudaExternalResInterop &cudaExtResObj) {
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
checkCudaErrors(
cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
}
free(cudaExtResObj.d_mipmapArray);
free(cudaExtResObj.d_mipLevelArray);
free(cudaExtResObj.cudaSurfaceNvmediaBuf);
free(cudaExtResObj.imageWidth);
free(cudaExtResObj.imageHeight);
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem));
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem));
checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf));
checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream));
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
}
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *cudaWaitFence,
NvSciSyncFence *cudaSignalFence, int deviceId, int iterations)
{
checkCudaErrors(cudaSetDevice(deviceId));
static int64_t launch = 0;
void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *cudaWaitFence,
NvSciSyncFence *cudaSignalFence, int deviceId,
int iterations) {
checkCudaErrors(cudaSetDevice(deviceId));
static int64_t launch = 0;
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence, cudaExtResObj.stream);
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence,
cudaExtResObj.stream);
// run cuda kernel over surface object of the LUMA surface part to extract grayscale.
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0], cudaExtResObj.imageHeight[0]);
// run cuda kernel over surface object of the LUMA surface part to extract
// grayscale.
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0],
cudaExtResObj.imageHeight[0]);
// signal fence till the second last iterations for NvMedia2DBlit to wait for cuda signal
// and for final iteration as there is no corresponding NvMedia operation pending
// therefore we end with cudaStreamSynchronize()
if (launch < iterations-1) {
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence, cudaExtResObj.stream);
}
else {
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
}
launch++;
// signal fence till the second last iterations for NvMedia2DBlit to wait for
// cuda signal and for final iteration as there is no corresponding NvMedia
// operation pending therefore we end with cudaStreamSynchronize()
if (launch < iterations - 1) {
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence,
cudaExtResObj.stream);
} else {
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
}
launch++;
}
// CUDA imports and operates on NvSci buffer/synchronization objects
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId)
{
checkCudaErrors(cudaSetDevice(deviceId));
cudaResObj.d_yuvArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t*) malloc(sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
cudaChannelFormatDesc channelDesc;
switch (ctx->bytesPerPixel) {
case 1:
default:
channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
}
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) {
checkCudaErrors(cudaSetDevice(deviceId));
cudaResObj.d_yuvArray =
(cudaArray_t *)malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
cudaChannelFormatDesc channelDesc;
switch (ctx->bytesPerPixel) {
case 1:
default:
channelDesc =
cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
}
for(int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMallocArray(&cudaResObj.d_yuvArray[k], &channelDesc, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k]));
cudaResObj.cudaSurfaceNvmediaBuf[k] = createCudaSurface(cudaResObj.d_yuvArray[k]);
}
checkCudaErrors(cudaMalloc(&cudaResObj.d_outputImage, sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMallocArray(
&cudaResObj.d_yuvArray[k], &channelDesc,
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k]));
cudaResObj.cudaSurfaceNvmediaBuf[k] =
createCudaSurface(cudaResObj.d_yuvArray[k]);
}
checkCudaErrors(cudaMalloc(
&cudaResObj.d_outputImage,
sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
cudaResObj.stream = createCudaStream(deviceId);
cudaResObj.stream = createCudaStream(deviceId);
}
// CUDA clean up buffers used **without** NvSci API.
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj)
{
for(int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
}
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) {
for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(
cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
}
free(cudaResObj.cudaSurfaceNvmediaBuf);
free(cudaResObj.cudaSurfaceNvmediaBuf);
checkCudaErrors(cudaStreamDestroy(cudaResObj.stream));
checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
checkCudaErrors(cudaStreamDestroy(cudaResObj.stream));
checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
}
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj, int deviceId, int32_t imageWidth, int32_t imageHeight)
{
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj,
int deviceId, int32_t imageWidth,
int32_t imageHeight) {
#if WRITE_OUTPUT_IMAGE
unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth));
unsigned int *h_dstImage;
checkCudaErrors(cudaMallocHost(
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
#endif
dim3 block(16, 16, 1);
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1);
dim3 block(16, 16, 1);
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth, imageHeight);
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(
cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth,
imageHeight);
#if WRITE_OUTPUT_IMAGE
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
char outputFilename[1024];
std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
checkCudaErrors(
cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage,
sizeof(unsigned int) * imageHeight * imageWidth,
cudaMemcpyDeviceToHost, cudaResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
char outputFilename[1024];
std::string image_filename = "Grayscale";
strcpy(outputFilename, image_filename.c_str());
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
imageHeight);
printf("Wrote '%s'\n", outputFilename);
checkCudaErrors(cudaFreeHost(h_dstImage));
#else
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
#endif
}
// CUDA operates **without** NvSci APIs buffer/synchronization objects.
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId)
{
for(int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMemcpy2DToArray(cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
}
// run cuda kernel over surface object of the LUMA surface part to extract grayscale.
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface, ctx->heightSurface);
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj,
int deviceId) {
for (int k = 0; k < ctx->numSurfaces; k++) {
checkCudaErrors(cudaMemcpy2DToArray(
cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
}
// run cuda kernel over surface object of the LUMA surface part to extract
// grayscale.
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface,
ctx->heightSurface);
}

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CUDA_BUFIMPORT_KERNEL_H__
#define __CUDA_BUFIMPORT_KERNEL_H__
@ -35,38 +34,39 @@
#include "nvscisync.h"
#include "nvmedia_utils/cmdline.h"
struct cudaExternalResInterop
{
cudaMipmappedArray_t *d_mipmapArray;
cudaArray_t *d_mipLevelArray;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
cudaStream_t stream;
cudaExternalMemory_t extMemImageBuf;
cudaExternalSemaphore_t waitSem;
cudaExternalSemaphore_t signalSem;
struct cudaExternalResInterop {
cudaMipmappedArray_t *d_mipmapArray;
cudaArray_t *d_mipLevelArray;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
cudaStream_t stream;
cudaExternalMemory_t extMemImageBuf;
cudaExternalSemaphore_t waitSem;
cudaExternalSemaphore_t signalSem;
int32_t planeCount;
int32_t *imageWidth;
int32_t *imageHeight;
unsigned int *d_outputImage;
int32_t planeCount;
uint64_t *planeOffset;
int32_t *imageWidth;
int32_t *imageHeight;
unsigned int *d_outputImage;
};
struct cudaResources
{
cudaArray_t *d_yuvArray;
cudaStream_t stream;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
unsigned int *d_outputImage;
struct cudaResources {
cudaArray_t *d_yuvArray;
cudaStream_t stream;
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
unsigned int *d_outputImage;
};
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *fence,
NvSciSyncFence *cudaSignalfence, int deviceId, int iterations);
void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
NvSciSyncFence *fence, NvSciSyncFence *cudaSignalfence,
int deviceId, int iterations);
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId);
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
int deviceId);
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
void cleanupCuda(cudaExternalResInterop& cudaObjs);
void cleanupCuda(cudaExternalResInterop &cudaObjs);
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj);
#endif

View File

@ -55,144 +55,156 @@
fflush(stdout); \
exit(EXIT_FAILURE); \
} \
} while (0)
} while (0)
static void cleanup(Blit2DTest* ctx, NvMediaStatus status)
{
if (ctx->i2d != NULL) {
NvMedia2DDestroy(ctx->i2d);
}
static void cleanup(Blit2DTest* ctx, NvMediaStatus status) {
if (ctx->i2d != NULL) {
NvMedia2DDestroy(ctx->i2d);
}
if (ctx->device != NULL) {
NvMediaDeviceDestroy(ctx->device);
}
if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE);
}
if (ctx->device != NULL) {
NvMediaDeviceDestroy(ctx->device);
}
if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE);
}
}
int main(int argc, char* argv[]) {
TestArgs args;
Blit2DTest ctx;
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
int main (int argc, char *argv[])
{
TestArgs args;
Blit2DTest ctx;
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
int cudaDeviceId;
uint64_t startTime, endTime;
uint64_t operationStartTime, operationEndTime;
double processingTime;
int cudaDeviceId;
uint64_t startTime, endTime;
uint64_t operationStartTime, operationEndTime;
double processingTime;
/* Read configuration from command line and config file */
memset(&args, 0, sizeof(TestArgs));
memset(&ctx, 0, sizeof(Blit2DTest));
/* Read configuration from command line and config file */
memset(&args, 0, sizeof(TestArgs));
memset(&ctx, 0, sizeof(Blit2DTest));
/* ParseArgs parses the command line and the 2D configuration file and populates all initParams
* and run time configuration in to appropriate structures within args
*/
if (ParseArgs(argc, argv, &args)) {
PrintUsage();
return -1;
}
/* Check version */
NvMediaVersion version;
status = NvMedia2DGetVersion(&version);
if (status == NVMEDIA_STATUS_OK) {
printf("Library version: %u.%u\n", version.major, version.minor);
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR, NVMEDIA_2D_VERSION_MINOR);
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
(version.minor != NVMEDIA_2D_VERSION_MINOR)) {
printf("Library and Header mismatch!\n");
cleanup(&ctx, status);
}
/* ParseArgs parses the command line and the 2D configuration file and
* populates all initParams and run time configuration in to appropriate
* structures within args
*/
if (ParseArgs(argc, argv, &args)) {
PrintUsage();
return -1;
}
/* Check version */
NvMediaVersion version;
status = NvMedia2DGetVersion(&version);
if (status == NVMEDIA_STATUS_OK) {
printf("Library version: %u.%u\n", version.major, version.minor);
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR,
NVMEDIA_2D_VERSION_MINOR);
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
(version.minor != NVMEDIA_2D_VERSION_MINOR)) {
printf("Library and Header mismatch!\n");
cleanup(&ctx, status);
}
}
// Create NvMedia device
ctx.device = NvMediaDeviceCreate();
if(!ctx.device) {
printf("%s: Failed to create NvMedia device\n", __func__);
cleanup(&ctx, status);
}
// Create NvMedia device
ctx.device = NvMediaDeviceCreate();
if (!ctx.device) {
printf("%s: Failed to create NvMedia device\n", __func__);
cleanup(&ctx, status);
}
// Create 2D blitter
ctx.i2d = NvMedia2DCreate(ctx.device);
if(!ctx.i2d) {
printf("%s: Failed to create NvMedia 2D i2d\n", __func__);
cleanup(&ctx, status);
}
// Create 2D blitter
ctx.i2d = NvMedia2DCreate(ctx.device);
if (!ctx.i2d) {
printf("%s: Failed to create NvMedia 2D i2d\n", __func__);
cleanup(&ctx, status);
}
cudaDeviceId = findCudaDevice(argc, (const char**)argv);
cudaDeviceId = findCudaDevice(argc, (const char**)argv);
// NvMedia-CUDA operations without NvSCI APIs starts
cudaResources cudaResObj;
GetTimeMicroSec(&startTime);
setupNvMedia(&args, &ctx);
setupCuda(&ctx, cudaResObj, cudaDeviceId);
// NvMedia-CUDA operations without NvSCI APIs starts
cudaResources cudaResObj;
GetTimeMicroSec(&startTime);
setupNvMedia(&args, &ctx);
setupCuda(&ctx, cudaResObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++)
{
runNvMediaBlit2D(&args, &ctx);
runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
}
GetTimeMicroSec(&operationEndTime);
GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) {
runNvMediaBlit2D(&args, &ctx);
runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
}
GetTimeMicroSec(&operationEndTime);
cleanupNvMedia(&ctx);
cleanupCuda(&ctx, cudaResObj);
GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations without NvSCI APIs ends
cleanupNvMedia(&ctx);
cleanupCuda(&ctx, cudaResObj);
GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations without NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime)/1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
processingTime = (double)(endTime - startTime)/1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs "
"%.4f ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"without NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
cudaExternalResInterop cudaExtResObj;
// NvMedia-CUDA operations via interop with NvSCI APIs starts
GetTimeMicroSec(&startTime);
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
cudaExternalResInterop cudaExtResObj;
// NvMedia-CUDA operations via interop with NvSCI APIs starts
GetTimeMicroSec(&startTime);
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj,
nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj,
cudaSignalerSyncObj, cudaDeviceId);
GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++)
{
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence, &nvMediaSignalerFence);
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence, cudaDeviceId, args.iterations);
}
GetTimeMicroSec(&operationEndTime);
GetTimeMicroSec(&operationStartTime);
for (int i = 0; i < args.iterations; i++) {
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence,
&nvMediaSignalerFence);
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence,
cudaDeviceId, args.iterations);
}
GetTimeMicroSec(&operationEndTime);
cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj);
cleanupCuda(cudaExtResObj);
cleanupNvSciSync(nvMediaSignalerSyncObj);
cleanupNvSciSync(cudaSignalerSyncObj);
cleanupNvSciBuf(srcNvSciBufobj);
cleanupNvSciBuf(dstNvSciBufobj);
GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations via interop with NvSCI APIs ends
cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj);
cleanupCuda(cudaExtResObj);
cleanupNvSciSync(nvMediaSignalerSyncObj);
cleanupNvSciSync(cudaSignalerSyncObj);
cleanupNvSciBuf(srcNvSciBufobj);
cleanupNvSciBuf(dstNvSciBufobj);
GetTimeMicroSec(&endTime);
// NvMedia-CUDA operations via interop with NvSCI APIs ends
processingTime = (double)(operationEndTime - operationStartTime)/1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
processingTime = (double)(endTime - startTime)/1000.0;
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f "
"ms with %zu iterations\n",
processingTime, args.iterations);
processingTime = (double)(endTime - startTime) / 1000.0;
printf(
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
"with NvSCI APIs %.4f ms with %zu iterations\n",
processingTime, args.iterations);
if (ctx.i2d != NULL) {
NvMedia2DDestroy(ctx.i2d);
}
if (ctx.i2d != NULL) {
NvMedia2DDestroy(ctx.i2d);
}
if (ctx.device != NULL) {
NvMediaDeviceDestroy(ctx.device);
}
if (ctx.device != NULL) {
NvMediaDeviceDestroy(ctx.device);
}
if (status == NVMEDIA_STATUS_OK) {
return 0;
}
else {
return 1;
}
if (status == NVMEDIA_STATUS_OK) {
return 0;
} else {
return 1;
}
}

View File

@ -38,463 +38,434 @@
#include "nvmedia_2d_nvscisync.h"
#include "nvsci_setup.h"
NvMediaImage *
NvMediaImageCreateUsingNvScibuf(
NvMediaDevice *device,
NvMediaSurfaceType type,
const NvMediaSurfAllocAttr *attrs,
uint32_t numAttrs,
uint32_t flags,
NvSciBufObj &bufobj,
int cudaDeviceId)
{
NvSciBufModule module = NULL;
NvSciError err = NvSciError_Success;
NvMediaStatus status = NVMEDIA_STATUS_OK;
NvSciBufAttrList attrlist = NULL;
NvSciBufAttrList conflictlist = NULL;
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
&access_perm,
sizeof(access_perm)};
NvSciBufAttrKeyValuePair pairArrayOut[10];
NvMediaImage *NvMediaImageCreateUsingNvScibuf(NvMediaDevice *device,
NvMediaSurfaceType type,
const NvMediaSurfAllocAttr *attrs,
uint32_t numAttrs, uint32_t flags,
NvSciBufObj &bufobj,
int cudaDeviceId) {
NvSciBufModule module = NULL;
NvSciError err = NvSciError_Success;
NvMediaStatus status = NVMEDIA_STATUS_OK;
NvSciBufAttrList attrlist = NULL;
NvSciBufAttrList conflictlist = NULL;
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
&access_perm, sizeof(access_perm)};
NvSciBufAttrKeyValuePair pairArrayOut[10];
NvMediaImage *image = NULL;
NvMediaImage *image = NULL;
err = NvSciBufModuleOpen(&module);
if(err != NvSciError_Success) {
printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
err = NvSciBufModuleOpen(&module);
if (err != NvSciError_Success) {
printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
err = NvSciBufAttrListCreate(module, &attrlist);
if(err != NvSciError_Success) {
printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
err = NvSciBufAttrListCreate(module, &attrlist);
if (err != NvSciError_Success) {
printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1);
if(err != NvSciError_Success) {
printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1);
if (err != NvSciError_Success) {
printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
status = NvMediaImageFillNvSciBufAttrs(device,
type,
attrs,
numAttrs,
0,
attrlist);
status =
NvMediaImageFillNvSciBufAttrs(device, type, attrs, numAttrs, 0, attrlist);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
if(status != NVMEDIA_STATUS_OK) {
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
status = NvMediaImageCreateFromNvSciBuf(device, bufobj, &image);
status = NvMediaImageCreateFromNvSciBuf(device,
bufobj,
&image);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
if(status != NVMEDIA_STATUS_OK) {
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
goto fail_cleanup;
}
NvSciBufAttrListFree(attrlist);
NvSciBufAttrListFree(attrlist);
if (module != NULL) {
NvSciBufModuleClose(module);
}
if(module != NULL) {
NvSciBufModuleClose(module);
}
return image;
return image;
fail_cleanup:
if(attrlist != NULL) {
NvSciBufAttrListFree(attrlist);
}
if(bufobj != NULL) {
NvSciBufObjFree(bufobj);
bufobj = NULL;
}
if (attrlist != NULL) {
NvSciBufAttrListFree(attrlist);
}
if (bufobj != NULL) {
NvSciBufObjFree(bufobj);
bufobj = NULL;
}
if(module != NULL) {
NvSciBufModuleClose(module);
}
NvMediaImageDestroy(image);
return NULL;
if (module != NULL) {
NvSciBufModuleClose(module);
}
NvMediaImageDestroy(image);
return NULL;
}
/* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success
*/
static NvMediaStatus
createSurface(Blit2DTest *ctx,
NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs,
uint32_t numSurfAllocAttrs,
NvMediaImage **image,
NvSciBufObj &bufObj,
int cudaDeviceId)
{
NvMediaSurfaceType surfType;
static NvMediaStatus createSurface(Blit2DTest *ctx,
NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs,
uint32_t numSurfAllocAttrs,
NvMediaImage **image, NvSciBufObj &bufObj,
int cudaDeviceId) {
NvMediaSurfaceType surfType;
/* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
surfType, /* surface type */
surfAllocAttrs,
numSurfAllocAttrs,
0,
bufObj,
cudaDeviceId);
/* create source image */
surfType =
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
surfType, /* surface type */
surfAllocAttrs, numSurfAllocAttrs, 0,
bufObj, cudaDeviceId);
if(*image == NULL) {
printf ("Unable to create image\n");
return NVMEDIA_STATUS_ERROR;
}
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
if (*image == NULL) {
printf("Unable to create image\n");
return NVMEDIA_STATUS_ERROR;
}
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK;
return NVMEDIA_STATUS_OK;
}
/* Create NvMediaImage surface based on the input attributes.
* Returns NVMEDIA_STATUS_OK on success
*/
static NvMediaStatus
createSurfaceNonNvSCI(Blit2DTest *ctx,
NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs,
uint32_t numSurfAllocAttrs,
NvMediaImage **image)
{
NvMediaSurfaceType surfType;
static NvMediaStatus createSurfaceNonNvSCI(
Blit2DTest *ctx, NvMediaSurfFormatAttr *surfFormatAttrs,
NvMediaSurfAllocAttr *surfAllocAttrs, uint32_t numSurfAllocAttrs,
NvMediaImage **image) {
NvMediaSurfaceType surfType;
/* create source image */
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
/* create source image */
surfType =
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs, numSurfAllocAttrs, 0);
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs,
numSurfAllocAttrs, 0);
if(*image == NULL) {
printf ("Unable to create image\n");
return NVMEDIA_STATUS_ERROR;
}
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
if (*image == NULL) {
printf("Unable to create image\n");
return NVMEDIA_STATUS_ERROR;
}
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
surfType);*/
return NVMEDIA_STATUS_OK;
return NVMEDIA_STATUS_OK;
}
static void destroySurface(NvMediaImage *image) { NvMediaImageDestroy(image); }
static void destroySurface(NvMediaImage *image)
{
NvMediaImageDestroy(image);
}
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs *args,
NvSciSyncObj &nvMediaSignalerSyncObj,
NvSciSyncFence *preSyncFence,
NvSciSyncFence *fence) {
NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap;
status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &nvMediaSignalerSyncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence)
{
NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap;
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
return status;
}
status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
if ((args->srcRect.x1 <= args->srcRect.x0) ||
(args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL;
} else {
ctx->srcRect = &(args->srcRect);
}
if ((args->dstRect.x1 <= args->dstRect.x0) ||
(args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL;
} else {
ctx->dstRect = &(args->dstRect);
}
static int64_t launch = 0;
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to
// wait
// for cuda signal on fence.
if (launch) {
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
return status;
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
status);
return status;
}
NvSciSyncFenceClear(preSyncFence);
}
launch++;
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL;
} else {
ctx->srcRect = &(args->srcRect);
}
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
status);
return status;
}
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL;
} else {
ctx->dstRect = &(args->dstRect);
}
/* 2DBlit processing on input image */
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
static int64_t launch = 0;
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to wait
// for cuda signal on fence.
if (launch)
{
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status);
return status;
}
NvSciSyncFenceClear(preSyncFence);
}
launch++;
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status;
}
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status);
return status;
}
status =
NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
return status;
}
/* 2DBlit processing on input image */
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status;
}
status = NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
return status;
}
return NVMEDIA_STATUS_OK;
return NVMEDIA_STATUS_OK;
}
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args)
{
NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap;
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs *args) {
NvMediaStatus status;
NvMediaImageSurfaceMap surfaceMap;
status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
status = ReadImage(args->inputFileName, /* fileName */
0, /* frameNum */
args->srcSurfAllocAttrs[0].value, /* source image width */
args->srcSurfAllocAttrs[1].value, /* source image height */
ctx->srcImage, /* srcImage */
NVMEDIA_TRUE, /* uvOrderFlag */
1, /* bytesPerPixel */
MSB_ALIGNED); /* pixelAlignment */
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
return status;
}
if (status != NVMEDIA_STATUS_OK) {
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
return status;
}
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL;
} else {
ctx->srcRect = &(args->srcRect);
}
if ((args->srcRect.x1 <= args->srcRect.x0) ||
(args->srcRect.y1 <= args->srcRect.y0)) {
ctx->srcRect = NULL;
} else {
ctx->srcRect = &(args->srcRect);
}
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL;
} else {
ctx->dstRect = &(args->dstRect);
}
if ((args->dstRect.x1 <= args->dstRect.x0) ||
(args->dstRect.y1 <= args->dstRect.y0)) {
ctx->dstRect = NULL;
} else {
ctx->dstRect = &(args->dstRect);
}
/* 2DBlit processing on input image */
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status;
}
/* 2DBlit processing on input image */
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
ctx->dstImage, /* dstSurface */
ctx->dstRect, /* dstRect */
ctx->srcImage, /* srcSurface */
ctx->srcRect, /* srcRect */
&args->blitParams, /* params */
NULL); /* paramsOut */
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
return status;
}
/* Write output image into buffer */
ctx->bytesPerPixel = 1;
WriteImageToAllocatedBuffer(ctx, ctx->dstImage,
NVMEDIA_TRUE,
NVMEDIA_FALSE,
/* Write output image into buffer */
ctx->bytesPerPixel = 1;
WriteImageToAllocatedBuffer(ctx, ctx->dstImage, NVMEDIA_TRUE, NVMEDIA_FALSE,
ctx->bytesPerPixel);
return NVMEDIA_STATUS_OK;
return NVMEDIA_STATUS_OK;
}
static void cleanup(Blit2DTest* ctx, NvMediaStatus status = NVMEDIA_STATUS_OK)
{
if (ctx->srcImage != NULL) {
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
destroySurface(ctx->srcImage);
}
if (ctx->dstImage != NULL) {
NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage);
destroySurface(ctx->dstImage);
}
if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE);
}
static void cleanup(Blit2DTest *ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) {
if (ctx->srcImage != NULL) {
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
destroySurface(ctx->srcImage);
}
if (ctx->dstImage != NULL) {
NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage);
destroySurface(ctx->dstImage);
}
if (status != NVMEDIA_STATUS_OK) {
exit(EXIT_FAILURE);
}
}
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj)
{
NvMediaStatus status;
cleanup(ctx);
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
exit(EXIT_FAILURE);
}
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
exit(EXIT_FAILURE);
}
NvMediaImageNvSciBufDeinit();
void cleanupNvMedia(Blit2DTest *ctx, NvSciSyncObj &syncObj,
NvSciSyncObj &preSyncObj) {
NvMediaStatus status;
cleanup(ctx);
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
exit(EXIT_FAILURE);
}
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
exit(EXIT_FAILURE);
}
NvMediaImageNvSciBufDeinit();
}
void cleanupNvMedia(Blit2DTest* ctx)
{
cleanup(ctx);
free(ctx->dstBuffPitches);
free(ctx->dstBuffer);
free(ctx->dstBuff);
void cleanupNvMedia(Blit2DTest *ctx) {
cleanup(ctx);
free(ctx->dstBuffPitches);
free(ctx->dstBuffer);
free(ctx->dstBuff);
}
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj,
int cudaDeviceId)
{
NvMediaStatus status;
status = NvMediaImageNvSciBufInit();
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
cleanup(ctx, status);
}
void setupNvMedia(TestArgs *args, Blit2DTest *ctx, NvSciBufObj &srcNvSciBufobj,
NvSciBufObj &dstNvSciBufobj, NvSciSyncObj &syncObj,
NvSciSyncObj &preSyncObj, int cudaDeviceId) {
NvMediaStatus status;
status = NvMediaImageNvSciBufInit();
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
cleanup(ctx, status);
}
// Create source surface
status = createSurface(ctx,
args->srcSurfFormatAttrs,
args->srcSurfAllocAttrs,
args->numSurfAllocAttrs,
&ctx->srcImage,
srcNvSciBufobj,
cudaDeviceId);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create source surface
status = createSurface(ctx, args->srcSurfFormatAttrs, args->srcSurfAllocAttrs,
args->numSurfAllocAttrs, &ctx->srcImage,
srcNvSciBufobj, cudaDeviceId);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create destination surface
status = createSurface(ctx,
args->dstSurfFormatAttrs,
args->dstSurfAllocAttrs,
args->numSurfAllocAttrs,
&ctx->dstImage,
dstNvSciBufobj,
cudaDeviceId);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create destination surface
status = createSurface(ctx, args->dstSurfFormatAttrs, args->dstSurfAllocAttrs,
args->numSurfAllocAttrs, &ctx->dstImage,
dstNvSciBufobj, cudaDeviceId);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
//Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if ( status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status);
}
//Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE);
if ( status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status);
}
// Register source Surface
status =
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status);
}
// Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
NVMEDIA_ACCESS_MODE_READ_WRITE);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status);
}
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
}
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
}
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
}
status =
NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
}
}
// Create NvMedia src & dst image without NvSciBuf
void setupNvMedia(TestArgs* args, Blit2DTest* ctx)
{
NvMediaStatus status;
void setupNvMedia(TestArgs *args, Blit2DTest *ctx) {
NvMediaStatus status;
// Create source surface
status = createSurfaceNonNvSCI(ctx,
args->srcSurfFormatAttrs,
args->srcSurfAllocAttrs,
args->numSurfAllocAttrs,
&ctx->srcImage);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create source surface
status = createSurfaceNonNvSCI(ctx, args->srcSurfFormatAttrs,
args->srcSurfAllocAttrs,
args->numSurfAllocAttrs, &ctx->srcImage);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create destination surface
status = createSurfaceNonNvSCI(ctx,
args->dstSurfFormatAttrs,
args->dstSurfAllocAttrs,
args->numSurfAllocAttrs,
&ctx->dstImage);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
// Create destination surface
status = createSurfaceNonNvSCI(ctx, args->dstSurfFormatAttrs,
args->dstSurfAllocAttrs,
args->numSurfAllocAttrs, &ctx->dstImage);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to create buffer pools\n", __func__);
cleanup(ctx, status);
}
//Register source Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if ( status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status);
}
// Register source Surface
status =
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register source surface\n", __func__);
cleanup(ctx, status);
}
//Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE);
if ( status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status);
}
// Register destination Surface
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
NVMEDIA_ACCESS_MODE_READ_WRITE);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Unable to register destination surface\n", __func__);
cleanup(ctx, status);
}
// Allocate buffer for writing image & set image parameters in Blit2DTest.
ctx->bytesPerPixel = 1;
AllocateBufferToWriteImage(ctx,
ctx->dstImage,
NVMEDIA_TRUE, /* uvOrderFlag */
NVMEDIA_FALSE); /* appendFlag */
// Allocate buffer for writing image & set image parameters in Blit2DTest.
ctx->bytesPerPixel = 1;
AllocateBufferToWriteImage(ctx, ctx->dstImage, NVMEDIA_TRUE, /* uvOrderFlag */
NVMEDIA_FALSE); /* appendFlag */
}
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx)
{
// Blit2D function
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Blit2D failed\n", __func__);
cleanup(ctx, status);
}
void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx) {
// Blit2D function
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Blit2D failed\n", __func__);
cleanup(ctx, status);
}
}
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &nvMediaSignalerSyncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence)
{
// Blit2D function
NvMediaStatus status = blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: Blit2D failed\n", __func__);
cleanup(ctx, status);
}
void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx,
NvSciSyncObj &nvMediaSignalerSyncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) {
// Blit2D function
NvMediaStatus status =
blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: Blit2D failed\n", __func__);
cleanup(ctx, status);
}
}

View File

@ -25,7 +25,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __NVMEDIA_PRODUCER_H__
#define __NVMEDIA_PRODUCER_H__
#include "nvmedia_utils/cmdline.h"
@ -36,13 +35,14 @@
#include "nvmedia_image_nvscibuf.h"
#include "nvscisync.h"
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &syncObj,
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence);
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj& syncObj,
NvSciSyncFence* preSyncFence, NvSciSyncFence* fence);
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj,
int cudaDeviceId);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj& srcNvSciBufobj,
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj& syncObj,
NvSciSyncObj& preSyncObj, int cudaDeviceId);
void setupNvMedia(TestArgs* args, Blit2DTest* ctx);
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj);
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj& syncObj,
NvSciSyncObj& preSyncObj);
void cleanupNvMedia(Blit2DTest* ctx);
#endif

View File

@ -32,7 +32,6 @@
#include "nvsci_setup.h"
#include "nvmedia_2d_nvscisync.h"
#define checkNvSciErrors(call) \
do { \
NvSciError _status = call; \
@ -44,111 +43,115 @@
fflush(stdout); \
exit(EXIT_FAILURE); \
} \
} while (0)
} while (0)
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId)
{
NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList;
void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId) {
NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList;
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__);
exit(EXIT_FAILURE);
}
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(
ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId, cudaNvSciSyncAttrWait));
checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId,
cudaNvSciSyncAttrWait));
syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList);
if(syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList);
}
NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList);
if (syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList);
}
}
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId)
{
NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList;
void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId) {
NvSciSyncModule sciSyncModule;
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
NvSciSyncAttrList signalerAttrList, waiterAttrList;
NvSciSyncAttrList syncUnreconciledList[2];
NvSciSyncAttrList syncReconciledList, syncConflictList;
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
if(status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__);
exit(EXIT_FAILURE);
}
NvMediaStatus status =
NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
if (status != NVMEDIA_STATUS_OK) {
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
checkCudaErrors(cudaSetDevice(cudaDeviceId));
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(
signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
syncUnreconciledList[0] = signalerAttrList;
syncUnreconciledList[1] = waiterAttrList;
checkNvSciErrors(NvSciSyncAttrListReconcile(
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList);
if(syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList);
}
NvSciSyncAttrListFree(signalerAttrList);
NvSciSyncAttrListFree(waiterAttrList);
if (syncConflictList != nullptr) {
NvSciSyncAttrListFree(syncConflictList);
}
}
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
int cudaDeviceId) {
CUuuid devUUID;
NvSciBufAttrList conflictlist;
NvSciBufAttrList bufUnreconciledAttrlist[1];
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId)
{
CUuuid devUUID;
NvSciBufAttrList conflictlist;
NvSciBufAttrList bufUnreconciledAttrlist[1];
CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId);
if (res != CUDA_SUCCESS) {
fprintf(stderr, "Driver API error = %04d \n", res);
exit(EXIT_FAILURE);
}
CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId);
if (res != CUDA_SUCCESS) {
fprintf(stderr, "Driver API error = %04d \n", res);
exit(EXIT_FAILURE);
}
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId,
&devUUID, sizeof(devUUID)};
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId, &devUUID, sizeof(devUUID)};
// set CUDA GPU ID to attribute list
checkNvSciErrors(NvSciBufAttrListSetAttrs(
nvmediaAttrlist, attr_gpuid,
sizeof(attr_gpuid) / sizeof(NvSciBufAttrKeyValuePair)));
// set CUDA GPU ID to attribute list
checkNvSciErrors(NvSciBufAttrListSetAttrs(nvmediaAttrlist, attr_gpuid, sizeof(attr_gpuid)/sizeof(NvSciBufAttrKeyValuePair)));
bufUnreconciledAttrlist[0] = nvmediaAttrlist;
bufUnreconciledAttrlist[0] = nvmediaAttrlist;
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(bufUnreconciledAttrlist,
1,
&bufobj,
&conflictlist));
if (conflictlist != NULL) {
NvSciBufAttrListFree(conflictlist);
}
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(
bufUnreconciledAttrlist, 1, &bufobj, &conflictlist));
if (conflictlist != NULL) {
NvSciBufAttrListFree(conflictlist);
}
}
void cleanupNvSciBuf(NvSciBufObj &Bufobj)
{
if (Bufobj != NULL) {
NvSciBufObjFree(Bufobj);
}
void cleanupNvSciBuf(NvSciBufObj &Bufobj) {
if (Bufobj != NULL) {
NvSciBufObjFree(Bufobj);
}
}
void cleanupNvSciSync(NvSciSyncObj &syncObj)
{
if (NvSciSyncObjFree != NULL) {
NvSciSyncObjFree(syncObj);
}
void cleanupNvSciSync(NvSciSyncObj &syncObj) {
if (NvSciSyncObjFree != NULL) {
NvSciSyncObjFree(syncObj);
}
}

View File

@ -25,16 +25,18 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __NVSCI_SETUP_H__
#define __NVSCI_SETUP_H__
#include "nvmedia_utils/cmdline.h"
#include <nvscibuf.h>
#include <nvscisync.h>
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId);
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId);
void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId);
void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
int cudaDeviceId);
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
int cudaDeviceId);
void cleanupNvSciBuf(NvSciBufObj &Bufobj);
void cleanupNvSciSync(NvSciSyncObj &syncObj);
#endif