mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2024-11-24 15:59:15 +08:00
cudaNvSciNvMedia plane offset correction
This commit is contained in:
parent
ba5a483c6e
commit
2aeaf51b11
|
@ -36,7 +36,6 @@
|
|||
// Enable this to 1 if require cuda processed output to ppm file.
|
||||
#define WRITE_OUTPUT_IMAGE 0
|
||||
|
||||
|
||||
#define checkNvSciErrors(call) \
|
||||
do { \
|
||||
NvSciError _status = call; \
|
||||
|
@ -48,325 +47,382 @@
|
|||
fflush(stdout); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
} while (0)
|
||||
|
||||
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject, unsigned int *dstImage, int32_t imageWidth, int32_t imageHeight)
|
||||
{
|
||||
size_t x = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y*blockDim.y + threadIdx.y;
|
||||
__global__ static void yuvToGrayscale(cudaSurfaceObject_t surfaceObject,
|
||||
unsigned int *dstImage,
|
||||
int32_t imageWidth, int32_t imageHeight) {
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
uchar4* dstImageUchar4 = (uchar4*)dstImage;
|
||||
for ( ; x < imageWidth && y < imageHeight; x += gridDim.x*blockDim.x, y += gridDim.y*blockDim.y)
|
||||
{
|
||||
int colInBytes = x * sizeof(unsigned char);
|
||||
unsigned char luma = surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
|
||||
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
|
||||
uchar4 *dstImageUchar4 = (uchar4 *)dstImage;
|
||||
for (; x < imageWidth && y < imageHeight;
|
||||
x += gridDim.x * blockDim.x, y += gridDim.y * blockDim.y) {
|
||||
int colInBytes = x * sizeof(unsigned char);
|
||||
unsigned char luma =
|
||||
surf2Dread<unsigned char>(surfaceObject, colInBytes, y);
|
||||
uchar4 grayscalePix = make_uchar4(luma, luma, luma, 0);
|
||||
|
||||
dstImageUchar4[y*imageWidth + x] = grayscalePix;
|
||||
}
|
||||
dstImageUchar4[y * imageWidth + x] = grayscalePix;
|
||||
}
|
||||
}
|
||||
|
||||
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem, NvSciSyncObj &syncObj)
|
||||
{
|
||||
cudaExternalSemaphoreHandleDesc extSemDesc;
|
||||
memset(&extSemDesc, 0, sizeof(extSemDesc));
|
||||
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
|
||||
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
|
||||
static void cudaImportNvSciSync(cudaExternalSemaphore_t &extSem,
|
||||
NvSciSyncObj &syncObj) {
|
||||
cudaExternalSemaphoreHandleDesc extSemDesc;
|
||||
memset(&extSemDesc, 0, sizeof(extSemDesc));
|
||||
extSemDesc.type = cudaExternalSemaphoreHandleTypeNvSciSync;
|
||||
extSemDesc.handle.nvSciSyncObj = (void *)syncObj;
|
||||
|
||||
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
|
||||
checkCudaErrors(cudaImportExternalSemaphore(&extSem, &extSemDesc));
|
||||
}
|
||||
|
||||
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem, NvSciSyncFence *fence,
|
||||
cudaStream_t stream) {
|
||||
cudaExternalSemaphoreWaitParams waitParams;
|
||||
memset(&waitParams, 0, sizeof(waitParams));
|
||||
// For cross-process signaler-waiter applications need to use NvSciIpc
|
||||
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
|
||||
// across process. This step is optional in single-process.
|
||||
waitParams.params.nvSciSync.fence = (void *)fence;
|
||||
waitParams.flags = 0;
|
||||
static void waitExternalSemaphore(cudaExternalSemaphore_t &waitSem,
|
||||
NvSciSyncFence *fence, cudaStream_t stream) {
|
||||
cudaExternalSemaphoreWaitParams waitParams;
|
||||
memset(&waitParams, 0, sizeof(waitParams));
|
||||
// For cross-process signaler-waiter applications need to use NvSciIpc
|
||||
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
|
||||
// across process. This step is optional in single-process.
|
||||
waitParams.params.nvSciSync.fence = (void *)fence;
|
||||
waitParams.flags = 0;
|
||||
|
||||
checkCudaErrors(cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
|
||||
checkCudaErrors(
|
||||
cudaWaitExternalSemaphoresAsync(&waitSem, &waitParams, 1, stream));
|
||||
}
|
||||
|
||||
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem, NvSciSyncFence *fence,
|
||||
cudaStream_t stream) {
|
||||
cudaExternalSemaphoreSignalParams signalParams;
|
||||
memset(&signalParams, 0, sizeof(signalParams));
|
||||
// For cross-process signaler-waiter applications need to use NvSciIpc
|
||||
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
|
||||
// across process. This step is optional in single-process.
|
||||
signalParams.params.nvSciSync.fence = (void *)fence;
|
||||
signalParams.flags = 0;
|
||||
static void signalExternalSemaphore(cudaExternalSemaphore_t &signalSem,
|
||||
NvSciSyncFence *fence,
|
||||
cudaStream_t stream) {
|
||||
cudaExternalSemaphoreSignalParams signalParams;
|
||||
memset(&signalParams, 0, sizeof(signalParams));
|
||||
// For cross-process signaler-waiter applications need to use NvSciIpc
|
||||
// and NvSciSync[Export|Import] utilities to share the NvSciSyncFence
|
||||
// across process. This step is optional in single-process.
|
||||
signalParams.params.nvSciSync.fence = (void *)fence;
|
||||
signalParams.flags = 0;
|
||||
|
||||
checkCudaErrors(cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams,
|
||||
1, stream));
|
||||
checkCudaErrors(
|
||||
cudaSignalExternalSemaphoresAsync(&signalSem, &signalParams, 1, stream));
|
||||
}
|
||||
|
||||
|
||||
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop& cudaExtResObj, int32_t imageWidth, int32_t imageHeight)
|
||||
{
|
||||
static void yuvToGrayscaleCudaKernel(cudaExternalResInterop &cudaExtResObj,
|
||||
int32_t imageWidth, int32_t imageHeight) {
|
||||
#if WRITE_OUTPUT_IMAGE
|
||||
unsigned int *h_dstImage;
|
||||
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth));
|
||||
unsigned int *h_dstImage;
|
||||
checkCudaErrors(cudaMallocHost(
|
||||
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
|
||||
#endif
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1);
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
|
||||
|
||||
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage, imageWidth, imageHeight);
|
||||
yuvToGrayscale<<<grid, block, 0, cudaExtResObj.stream>>>(
|
||||
cudaExtResObj.cudaSurfaceNvmediaBuf[0], cudaExtResObj.d_outputImage,
|
||||
imageWidth, imageHeight);
|
||||
|
||||
#if WRITE_OUTPUT_IMAGE
|
||||
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaExtResObj.stream));
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
|
||||
char outputFilename[1024];
|
||||
std::string image_filename = "Grayscale";
|
||||
strcpy(outputFilename, image_filename.c_str());
|
||||
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
|
||||
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight);
|
||||
printf("Wrote '%s'\n", outputFilename);
|
||||
checkCudaErrors(cudaFreeHost(h_dstImage));
|
||||
checkCudaErrors(
|
||||
cudaMemcpyAsync(h_dstImage, cudaExtResObj.d_outputImage,
|
||||
sizeof(unsigned int) * imageHeight * imageWidth,
|
||||
cudaMemcpyDeviceToHost, cudaExtResObj.stream));
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
|
||||
char outputFilename[1024];
|
||||
std::string image_filename = "Grayscale";
|
||||
strcpy(outputFilename, image_filename.c_str());
|
||||
strcpy(outputFilename + image_filename.length(), "_nvsci_out.ppm");
|
||||
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
|
||||
imageHeight);
|
||||
printf("Wrote '%s'\n", outputFilename);
|
||||
checkCudaErrors(cudaFreeHost(h_dstImage));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj, NvSciBufObj& inputBufObj)
|
||||
{
|
||||
NvSciBufModule module = NULL;
|
||||
NvSciBufAttrList attrlist = NULL;
|
||||
NvSciBufAttrKeyValuePair pairArrayOut[10];
|
||||
static void cudaImportNvSciImage(cudaExternalResInterop &cudaExtResObj,
|
||||
NvSciBufObj &inputBufObj) {
|
||||
NvSciBufModule module = NULL;
|
||||
NvSciBufAttrList attrlist = NULL;
|
||||
NvSciBufAttrKeyValuePair pairArrayOut[10];
|
||||
|
||||
checkNvSciErrors(NvSciBufModuleOpen(&module));
|
||||
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist));
|
||||
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist));
|
||||
checkNvSciErrors(NvSciBufModuleOpen(&module));
|
||||
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrlist));
|
||||
checkNvSciErrors(NvSciBufObjGetAttrList(inputBufObj, &attrlist));
|
||||
|
||||
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
|
||||
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * 10);
|
||||
|
||||
int numAttrs = 0;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
|
||||
int numAttrs = 0;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Size;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneChannelCount;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneCount;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneWidth;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneHeight;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_Layout;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;
|
||||
pairArrayOut[numAttrs++].key = NvSciBufImageAttrKey_PlaneOffset;
|
||||
|
||||
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
|
||||
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrlist, pairArrayOut, numAttrs));
|
||||
|
||||
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
|
||||
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
|
||||
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
|
||||
cudaExtResObj.imageWidth = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount);
|
||||
cudaExtResObj.imageHeight = (int32_t*) malloc(sizeof(int32_t)*cudaExtResObj.planeCount);
|
||||
uint64_t size = *(uint64_t *)pairArrayOut[0].value;
|
||||
uint8_t channelCount = *(uint8_t *)pairArrayOut[1].value;
|
||||
cudaExtResObj.planeCount = *(int32_t *)pairArrayOut[2].value;
|
||||
cudaExtResObj.imageWidth =
|
||||
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
|
||||
cudaExtResObj.imageHeight =
|
||||
(int32_t *)malloc(sizeof(int32_t) * cudaExtResObj.planeCount);
|
||||
cudaExtResObj.planeOffset =
|
||||
(uint64_t *)malloc(sizeof(uint64_t) * cudaExtResObj.planeCount);
|
||||
|
||||
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value, cudaExtResObj.planeCount * sizeof(int32_t));
|
||||
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value, cudaExtResObj.planeCount * sizeof(int32_t));
|
||||
memcpy(cudaExtResObj.imageWidth, (int32_t *)pairArrayOut[3].value,
|
||||
cudaExtResObj.planeCount * sizeof(int32_t));
|
||||
memcpy(cudaExtResObj.imageHeight, (int32_t *)pairArrayOut[4].value,
|
||||
cudaExtResObj.planeCount * sizeof(int32_t));
|
||||
memcpy(cudaExtResObj.planeOffset, (uint64_t *)pairArrayOut[7].value,
|
||||
cudaExtResObj.planeCount * sizeof(uint64_t));
|
||||
|
||||
NvSciBufAttrValImageLayoutType layout = *(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
|
||||
uint32_t bitsPerPixel = *(uint32_t*)pairArrayOut[6].value;
|
||||
NvSciBufAttrValImageLayoutType layout =
|
||||
*(NvSciBufAttrValImageLayoutType *)pairArrayOut[5].value;
|
||||
uint32_t bitsPerPixel = *(uint32_t *)pairArrayOut[6].value;
|
||||
|
||||
if (layout != NvSciBufImage_BlockLinearType) {
|
||||
printf("Image layout is not block linear.. waiving execution\n");
|
||||
exit(EXIT_WAIVED);
|
||||
if (layout != NvSciBufImage_BlockLinearType) {
|
||||
printf("Image layout is not block linear.. waiving execution\n");
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
|
||||
cudaExternalMemoryHandleDesc memHandleDesc;
|
||||
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
|
||||
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
|
||||
memHandleDesc.handle.nvSciBufObject = inputBufObj;
|
||||
memHandleDesc.size = size;
|
||||
checkCudaErrors(
|
||||
cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
|
||||
|
||||
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t *)malloc(
|
||||
sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
|
||||
|
||||
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
|
||||
cudaExtent extent = {};
|
||||
memset(&extent, 0, sizeof(extent));
|
||||
extent.width = cudaExtResObj.imageWidth[i];
|
||||
extent.height = cudaExtResObj.imageHeight[i];
|
||||
extent.depth = 0;
|
||||
cudaChannelFormatDesc desc;
|
||||
switch (channelCount) {
|
||||
case 1:
|
||||
default:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0,
|
||||
cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 2:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0,
|
||||
cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 3:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
|
||||
0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 4:
|
||||
desc =
|
||||
cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel,
|
||||
bitsPerPixel, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaExternalMemoryHandleDesc memHandleDesc;
|
||||
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
|
||||
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
|
||||
memHandleDesc.handle.nvSciBufObject = inputBufObj;
|
||||
memHandleDesc.size = size;
|
||||
checkCudaErrors(cudaImportExternalMemory(&cudaExtResObj.extMemImageBuf, &memHandleDesc));
|
||||
|
||||
cudaExtResObj.d_mipmapArray = (cudaMipmappedArray_t*) malloc(sizeof(cudaMipmappedArray_t) * cudaExtResObj.planeCount);
|
||||
|
||||
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
|
||||
cudaExtent extent = {};
|
||||
memset(&extent, 0, sizeof(extent));
|
||||
extent.width = cudaExtResObj.imageWidth[i];
|
||||
extent.height = cudaExtResObj.imageHeight[i];
|
||||
extent.depth = 0;
|
||||
cudaChannelFormatDesc desc;
|
||||
switch (channelCount) {
|
||||
case 1:
|
||||
default:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 2:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 3:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, 0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
case 4:
|
||||
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, bitsPerPixel, bitsPerPixel, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
}
|
||||
|
||||
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
|
||||
mipmapDesc.offset = 0;
|
||||
mipmapDesc.formatDesc = desc;
|
||||
mipmapDesc.extent = extent;
|
||||
mipmapDesc.flags = 0;
|
||||
mipmapDesc.numLevels = 1;
|
||||
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf, &mipmapDesc));
|
||||
}
|
||||
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
|
||||
mipmapDesc.offset = cudaExtResObj.planeOffset[i];
|
||||
mipmapDesc.formatDesc = desc;
|
||||
mipmapDesc.extent = extent;
|
||||
mipmapDesc.flags = 0;
|
||||
mipmapDesc.numLevels = 1;
|
||||
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(
|
||||
&cudaExtResObj.d_mipmapArray[i], cudaExtResObj.extMemImageBuf,
|
||||
&mipmapDesc));
|
||||
}
|
||||
}
|
||||
|
||||
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray)
|
||||
{
|
||||
cudaResourceDesc resourceDesc;
|
||||
memset(&resourceDesc, 0, sizeof(resourceDesc));
|
||||
resourceDesc.resType = cudaResourceTypeArray;
|
||||
resourceDesc.res.array.array = d_mipLevelArray;
|
||||
static cudaSurfaceObject_t createCudaSurface(cudaArray_t &d_mipLevelArray) {
|
||||
cudaResourceDesc resourceDesc;
|
||||
memset(&resourceDesc, 0, sizeof(resourceDesc));
|
||||
resourceDesc.resType = cudaResourceTypeArray;
|
||||
resourceDesc.res.array.array = d_mipLevelArray;
|
||||
|
||||
cudaSurfaceObject_t surfaceObject;
|
||||
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
|
||||
return surfaceObject;
|
||||
cudaSurfaceObject_t surfaceObject;
|
||||
checkCudaErrors(cudaCreateSurfaceObject(&surfaceObject, &resourceDesc));
|
||||
return surfaceObject;
|
||||
}
|
||||
|
||||
static cudaStream_t createCudaStream(int deviceId)
|
||||
{
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaStream_t stream;
|
||||
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
static cudaStream_t createCudaStream(int deviceId) {
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaStream_t stream;
|
||||
checkCudaErrors(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
}
|
||||
|
||||
// CUDA setup buffers/synchronization objects for interop via NvSci API.
|
||||
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj,
|
||||
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId)
|
||||
{
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
|
||||
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
|
||||
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
|
||||
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
|
||||
int deviceId) {
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaImportNvSciSync(cudaExtResObj.waitSem, syncObj);
|
||||
cudaImportNvSciSync(cudaExtResObj.signalSem, cudaSignalerSyncObj);
|
||||
|
||||
cudaImportNvSciImage(cudaExtResObj, inputBufObj);
|
||||
cudaExtResObj.d_mipLevelArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
|
||||
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *) malloc(sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
|
||||
cudaImportNvSciImage(cudaExtResObj, inputBufObj);
|
||||
cudaExtResObj.d_mipLevelArray =
|
||||
(cudaArray_t *)malloc(sizeof(cudaArray_t) * cudaExtResObj.planeCount);
|
||||
cudaExtResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
|
||||
sizeof(cudaSurfaceObject_t) * cudaExtResObj.planeCount);
|
||||
|
||||
for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
|
||||
uint32_t mipLevelId = 0;
|
||||
checkCudaErrors(cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i], cudaExtResObj.d_mipmapArray[i], mipLevelId));
|
||||
cudaExtResObj.cudaSurfaceNvmediaBuf[i] = createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
|
||||
}
|
||||
for (int i = 0; i < cudaExtResObj.planeCount; ++i) {
|
||||
uint32_t mipLevelId = 0;
|
||||
checkCudaErrors(
|
||||
cudaGetMipmappedArrayLevel(&cudaExtResObj.d_mipLevelArray[i],
|
||||
cudaExtResObj.d_mipmapArray[i], mipLevelId));
|
||||
cudaExtResObj.cudaSurfaceNvmediaBuf[i] =
|
||||
createCudaSurface(cudaExtResObj.d_mipLevelArray[i]);
|
||||
}
|
||||
|
||||
cudaExtResObj.stream = createCudaStream(deviceId);
|
||||
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage, sizeof(unsigned int) * cudaExtResObj.imageWidth[0] * cudaExtResObj.imageHeight[0]));
|
||||
cudaExtResObj.stream = createCudaStream(deviceId);
|
||||
checkCudaErrors(cudaMalloc(&cudaExtResObj.d_outputImage,
|
||||
sizeof(unsigned int) *
|
||||
cudaExtResObj.imageWidth[0] *
|
||||
cudaExtResObj.imageHeight[0]));
|
||||
}
|
||||
|
||||
// CUDA clean up buffers used **with** NvSci API.
|
||||
void cleanupCuda(cudaExternalResInterop& cudaExtResObj)
|
||||
{
|
||||
for (int i=0; i < cudaExtResObj.planeCount; i++) {
|
||||
checkCudaErrors(cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
|
||||
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
|
||||
}
|
||||
free(cudaExtResObj.d_mipmapArray);
|
||||
free(cudaExtResObj.d_mipLevelArray);
|
||||
free(cudaExtResObj.cudaSurfaceNvmediaBuf);
|
||||
free(cudaExtResObj.imageWidth);
|
||||
free(cudaExtResObj.imageHeight);
|
||||
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem));
|
||||
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem));
|
||||
checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf));
|
||||
checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream));
|
||||
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
|
||||
void cleanupCuda(cudaExternalResInterop &cudaExtResObj) {
|
||||
for (int i = 0; i < cudaExtResObj.planeCount; i++) {
|
||||
checkCudaErrors(
|
||||
cudaDestroySurfaceObject(cudaExtResObj.cudaSurfaceNvmediaBuf[i]));
|
||||
checkCudaErrors(cudaFreeMipmappedArray(cudaExtResObj.d_mipmapArray[i]));
|
||||
}
|
||||
free(cudaExtResObj.d_mipmapArray);
|
||||
free(cudaExtResObj.d_mipLevelArray);
|
||||
free(cudaExtResObj.cudaSurfaceNvmediaBuf);
|
||||
free(cudaExtResObj.imageWidth);
|
||||
free(cudaExtResObj.imageHeight);
|
||||
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.waitSem));
|
||||
checkCudaErrors(cudaDestroyExternalSemaphore(cudaExtResObj.signalSem));
|
||||
checkCudaErrors(cudaDestroyExternalMemory(cudaExtResObj.extMemImageBuf));
|
||||
checkCudaErrors(cudaStreamDestroy(cudaExtResObj.stream));
|
||||
checkCudaErrors(cudaFree(cudaExtResObj.d_outputImage));
|
||||
}
|
||||
|
||||
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *cudaWaitFence,
|
||||
NvSciSyncFence *cudaSignalFence, int deviceId, int iterations)
|
||||
{
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
static int64_t launch = 0;
|
||||
void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
|
||||
NvSciSyncFence *cudaWaitFence,
|
||||
NvSciSyncFence *cudaSignalFence, int deviceId,
|
||||
int iterations) {
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
static int64_t launch = 0;
|
||||
|
||||
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence, cudaExtResObj.stream);
|
||||
waitExternalSemaphore(cudaExtResObj.waitSem, cudaWaitFence,
|
||||
cudaExtResObj.stream);
|
||||
|
||||
// run cuda kernel over surface object of the LUMA surface part to extract grayscale.
|
||||
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0], cudaExtResObj.imageHeight[0]);
|
||||
// run cuda kernel over surface object of the LUMA surface part to extract
|
||||
// grayscale.
|
||||
yuvToGrayscaleCudaKernel(cudaExtResObj, cudaExtResObj.imageWidth[0],
|
||||
cudaExtResObj.imageHeight[0]);
|
||||
|
||||
// signal fence till the second last iterations for NvMedia2DBlit to wait for cuda signal
|
||||
// and for final iteration as there is no corresponding NvMedia operation pending
|
||||
// therefore we end with cudaStreamSynchronize()
|
||||
if (launch < iterations-1) {
|
||||
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence, cudaExtResObj.stream);
|
||||
}
|
||||
else {
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
|
||||
}
|
||||
launch++;
|
||||
// signal fence till the second last iterations for NvMedia2DBlit to wait for
|
||||
// cuda signal and for final iteration as there is no corresponding NvMedia
|
||||
// operation pending therefore we end with cudaStreamSynchronize()
|
||||
if (launch < iterations - 1) {
|
||||
signalExternalSemaphore(cudaExtResObj.signalSem, cudaSignalFence,
|
||||
cudaExtResObj.stream);
|
||||
} else {
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaExtResObj.stream));
|
||||
}
|
||||
launch++;
|
||||
}
|
||||
|
||||
// CUDA imports and operates on NvSci buffer/synchronization objects
|
||||
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId)
|
||||
{
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaResObj.d_yuvArray = (cudaArray_t *) malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
|
||||
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t*) malloc(sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
|
||||
cudaChannelFormatDesc channelDesc;
|
||||
switch (ctx->bytesPerPixel) {
|
||||
case 1:
|
||||
default:
|
||||
channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
}
|
||||
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId) {
|
||||
checkCudaErrors(cudaSetDevice(deviceId));
|
||||
cudaResObj.d_yuvArray =
|
||||
(cudaArray_t *)malloc(sizeof(cudaArray_t) * ctx->numSurfaces);
|
||||
cudaResObj.cudaSurfaceNvmediaBuf = (cudaSurfaceObject_t *)malloc(
|
||||
sizeof(cudaSurfaceObject_t) * ctx->numSurfaces);
|
||||
cudaChannelFormatDesc channelDesc;
|
||||
switch (ctx->bytesPerPixel) {
|
||||
case 1:
|
||||
default:
|
||||
channelDesc =
|
||||
cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
break;
|
||||
}
|
||||
|
||||
for(int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(cudaMallocArray(&cudaResObj.d_yuvArray[k], &channelDesc, ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->heightSurface * ctx->yScalePtr[k]));
|
||||
cudaResObj.cudaSurfaceNvmediaBuf[k] = createCudaSurface(cudaResObj.d_yuvArray[k]);
|
||||
}
|
||||
checkCudaErrors(cudaMalloc(&cudaResObj.d_outputImage, sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
|
||||
for (int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(cudaMallocArray(
|
||||
&cudaResObj.d_yuvArray[k], &channelDesc,
|
||||
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->heightSurface * ctx->yScalePtr[k]));
|
||||
cudaResObj.cudaSurfaceNvmediaBuf[k] =
|
||||
createCudaSurface(cudaResObj.d_yuvArray[k]);
|
||||
}
|
||||
checkCudaErrors(cudaMalloc(
|
||||
&cudaResObj.d_outputImage,
|
||||
sizeof(unsigned int) * ctx->widthSurface * ctx->heightSurface));
|
||||
|
||||
cudaResObj.stream = createCudaStream(deviceId);
|
||||
cudaResObj.stream = createCudaStream(deviceId);
|
||||
}
|
||||
|
||||
// CUDA clean up buffers used **without** NvSci API.
|
||||
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj)
|
||||
{
|
||||
for(int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
|
||||
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
|
||||
}
|
||||
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj) {
|
||||
for (int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(
|
||||
cudaDestroySurfaceObject(cudaResObj.cudaSurfaceNvmediaBuf[k]));
|
||||
checkCudaErrors(cudaFreeArray(cudaResObj.d_yuvArray[k]));
|
||||
}
|
||||
|
||||
free(cudaResObj.cudaSurfaceNvmediaBuf);
|
||||
free(cudaResObj.cudaSurfaceNvmediaBuf);
|
||||
|
||||
checkCudaErrors(cudaStreamDestroy(cudaResObj.stream));
|
||||
checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
|
||||
checkCudaErrors(cudaStreamDestroy(cudaResObj.stream));
|
||||
checkCudaErrors(cudaFree(cudaResObj.d_outputImage));
|
||||
}
|
||||
|
||||
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj, int deviceId, int32_t imageWidth, int32_t imageHeight)
|
||||
{
|
||||
static void yuvToGrayscaleCudaKernelNonNvSci(cudaResources &cudaResObj,
|
||||
int deviceId, int32_t imageWidth,
|
||||
int32_t imageHeight) {
|
||||
#if WRITE_OUTPUT_IMAGE
|
||||
unsigned int *h_dstImage;
|
||||
checkCudaErrors(cudaMallocHost(&h_dstImage, sizeof(unsigned int)*imageHeight*imageWidth));
|
||||
unsigned int *h_dstImage;
|
||||
checkCudaErrors(cudaMallocHost(
|
||||
&h_dstImage, sizeof(unsigned int) * imageHeight * imageWidth));
|
||||
#endif
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid((imageWidth/block.x)+1, (imageHeight/block.y)+1, 1);
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid((imageWidth / block.x) + 1, (imageHeight / block.y) + 1, 1);
|
||||
|
||||
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth, imageHeight);
|
||||
yuvToGrayscale<<<grid, block, 0, cudaResObj.stream>>>(
|
||||
cudaResObj.cudaSurfaceNvmediaBuf[0], cudaResObj.d_outputImage, imageWidth,
|
||||
imageHeight);
|
||||
|
||||
#if WRITE_OUTPUT_IMAGE
|
||||
checkCudaErrors(cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage, sizeof(unsigned int)*imageHeight*imageWidth, cudaMemcpyDeviceToHost, cudaResObj.stream));
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
|
||||
char outputFilename[1024];
|
||||
std::string image_filename = "Grayscale";
|
||||
strcpy(outputFilename, image_filename.c_str());
|
||||
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
|
||||
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth, imageHeight);
|
||||
printf("Wrote '%s'\n", outputFilename);
|
||||
checkCudaErrors(cudaFreeHost(h_dstImage));
|
||||
checkCudaErrors(
|
||||
cudaMemcpyAsync(h_dstImage, cudaResObj.d_outputImage,
|
||||
sizeof(unsigned int) * imageHeight * imageWidth,
|
||||
cudaMemcpyDeviceToHost, cudaResObj.stream));
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
|
||||
char outputFilename[1024];
|
||||
std::string image_filename = "Grayscale";
|
||||
strcpy(outputFilename, image_filename.c_str());
|
||||
strcpy(outputFilename + image_filename.length(), "_non-nvsci_out.ppm");
|
||||
sdkSavePPM4ub(outputFilename, (unsigned char *)h_dstImage, imageWidth,
|
||||
imageHeight);
|
||||
printf("Wrote '%s'\n", outputFilename);
|
||||
checkCudaErrors(cudaFreeHost(h_dstImage));
|
||||
#else
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
|
||||
checkCudaErrors(cudaStreamSynchronize(cudaResObj.stream));
|
||||
#endif
|
||||
}
|
||||
|
||||
// CUDA operates **without** NvSci APIs buffer/synchronization objects.
|
||||
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId)
|
||||
{
|
||||
for(int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(cudaMemcpy2DToArray(cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
|
||||
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
|
||||
}
|
||||
// run cuda kernel over surface object of the LUMA surface part to extract grayscale.
|
||||
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface, ctx->heightSurface);
|
||||
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj,
|
||||
int deviceId) {
|
||||
for (int k = 0; k < ctx->numSurfaces; k++) {
|
||||
checkCudaErrors(cudaMemcpy2DToArray(
|
||||
cudaResObj.d_yuvArray[k], 0, 0, ctx->dstBuff[k],
|
||||
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->widthSurface * ctx->xScalePtr[k] * ctx->bytesPerPixel,
|
||||
ctx->heightSurface * ctx->yScalePtr[k], cudaMemcpyHostToDevice));
|
||||
}
|
||||
// run cuda kernel over surface object of the LUMA surface part to extract
|
||||
// grayscale.
|
||||
yuvToGrayscaleCudaKernelNonNvSci(cudaResObj, deviceId, ctx->widthSurface,
|
||||
ctx->heightSurface);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __CUDA_BUFIMPORT_KERNEL_H__
|
||||
#define __CUDA_BUFIMPORT_KERNEL_H__
|
||||
|
||||
|
@ -35,38 +34,39 @@
|
|||
#include "nvscisync.h"
|
||||
#include "nvmedia_utils/cmdline.h"
|
||||
|
||||
struct cudaExternalResInterop
|
||||
{
|
||||
cudaMipmappedArray_t *d_mipmapArray;
|
||||
cudaArray_t *d_mipLevelArray;
|
||||
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
|
||||
cudaStream_t stream;
|
||||
cudaExternalMemory_t extMemImageBuf;
|
||||
cudaExternalSemaphore_t waitSem;
|
||||
cudaExternalSemaphore_t signalSem;
|
||||
struct cudaExternalResInterop {
|
||||
cudaMipmappedArray_t *d_mipmapArray;
|
||||
cudaArray_t *d_mipLevelArray;
|
||||
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
|
||||
cudaStream_t stream;
|
||||
cudaExternalMemory_t extMemImageBuf;
|
||||
cudaExternalSemaphore_t waitSem;
|
||||
cudaExternalSemaphore_t signalSem;
|
||||
|
||||
int32_t planeCount;
|
||||
int32_t *imageWidth;
|
||||
int32_t *imageHeight;
|
||||
unsigned int *d_outputImage;
|
||||
int32_t planeCount;
|
||||
uint64_t *planeOffset;
|
||||
int32_t *imageWidth;
|
||||
int32_t *imageHeight;
|
||||
unsigned int *d_outputImage;
|
||||
};
|
||||
|
||||
struct cudaResources
|
||||
{
|
||||
cudaArray_t *d_yuvArray;
|
||||
cudaStream_t stream;
|
||||
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
|
||||
unsigned int *d_outputImage;
|
||||
struct cudaResources {
|
||||
cudaArray_t *d_yuvArray;
|
||||
cudaStream_t stream;
|
||||
cudaSurfaceObject_t *cudaSurfaceNvmediaBuf;
|
||||
unsigned int *d_outputImage;
|
||||
};
|
||||
|
||||
void runCudaOperation(cudaExternalResInterop& cudaExtResObj, NvSciSyncFence *fence,
|
||||
NvSciSyncFence *cudaSignalfence, int deviceId, int iterations);
|
||||
void runCudaOperation(cudaExternalResInterop &cudaExtResObj,
|
||||
NvSciSyncFence *fence, NvSciSyncFence *cudaSignalfence,
|
||||
int deviceId, int iterations);
|
||||
void runCudaOperation(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
|
||||
|
||||
void setupCuda(cudaExternalResInterop& cudaExtResObj, NvSciBufObj& inputBufObj,
|
||||
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj, int deviceId);
|
||||
void setupCuda(cudaExternalResInterop &cudaExtResObj, NvSciBufObj &inputBufObj,
|
||||
NvSciSyncObj &syncObj, NvSciSyncObj &cudaSignalerSyncObj,
|
||||
int deviceId);
|
||||
void setupCuda(Blit2DTest *ctx, cudaResources &cudaResObj, int deviceId);
|
||||
void cleanupCuda(cudaExternalResInterop& cudaObjs);
|
||||
void cleanupCuda(cudaExternalResInterop &cudaObjs);
|
||||
void cleanupCuda(Blit2DTest *ctx, cudaResources &cudaResObj);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -55,144 +55,156 @@
|
|||
fflush(stdout); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
} while (0)
|
||||
|
||||
static void cleanup(Blit2DTest* ctx, NvMediaStatus status)
|
||||
{
|
||||
if (ctx->i2d != NULL) {
|
||||
NvMedia2DDestroy(ctx->i2d);
|
||||
}
|
||||
static void cleanup(Blit2DTest* ctx, NvMediaStatus status) {
|
||||
if (ctx->i2d != NULL) {
|
||||
NvMedia2DDestroy(ctx->i2d);
|
||||
}
|
||||
|
||||
if (ctx->device != NULL) {
|
||||
NvMediaDeviceDestroy(ctx->device);
|
||||
}
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (ctx->device != NULL) {
|
||||
NvMediaDeviceDestroy(ctx->device);
|
||||
}
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
TestArgs args;
|
||||
Blit2DTest ctx;
|
||||
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
|
||||
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
|
||||
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
|
||||
|
||||
int main (int argc, char *argv[])
|
||||
{
|
||||
TestArgs args;
|
||||
Blit2DTest ctx;
|
||||
NvMediaStatus status = NVMEDIA_STATUS_ERROR;
|
||||
NvSciSyncFence nvMediaSignalerFence = NvSciSyncFenceInitializer;
|
||||
NvSciSyncFence cudaSignalerFence = NvSciSyncFenceInitializer;
|
||||
int cudaDeviceId;
|
||||
uint64_t startTime, endTime;
|
||||
uint64_t operationStartTime, operationEndTime;
|
||||
double processingTime;
|
||||
|
||||
int cudaDeviceId;
|
||||
uint64_t startTime, endTime;
|
||||
uint64_t operationStartTime, operationEndTime;
|
||||
double processingTime;
|
||||
/* Read configuration from command line and config file */
|
||||
memset(&args, 0, sizeof(TestArgs));
|
||||
memset(&ctx, 0, sizeof(Blit2DTest));
|
||||
|
||||
/* Read configuration from command line and config file */
|
||||
memset(&args, 0, sizeof(TestArgs));
|
||||
memset(&ctx, 0, sizeof(Blit2DTest));
|
||||
|
||||
/* ParseArgs parses the command line and the 2D configuration file and populates all initParams
|
||||
* and run time configuration in to appropriate structures within args
|
||||
*/
|
||||
if (ParseArgs(argc, argv, &args)) {
|
||||
PrintUsage();
|
||||
return -1;
|
||||
}
|
||||
/* Check version */
|
||||
NvMediaVersion version;
|
||||
status = NvMedia2DGetVersion(&version);
|
||||
if (status == NVMEDIA_STATUS_OK) {
|
||||
printf("Library version: %u.%u\n", version.major, version.minor);
|
||||
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR, NVMEDIA_2D_VERSION_MINOR);
|
||||
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
|
||||
(version.minor != NVMEDIA_2D_VERSION_MINOR)) {
|
||||
printf("Library and Header mismatch!\n");
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
/* ParseArgs parses the command line and the 2D configuration file and
|
||||
* populates all initParams and run time configuration in to appropriate
|
||||
* structures within args
|
||||
*/
|
||||
if (ParseArgs(argc, argv, &args)) {
|
||||
PrintUsage();
|
||||
return -1;
|
||||
}
|
||||
/* Check version */
|
||||
NvMediaVersion version;
|
||||
status = NvMedia2DGetVersion(&version);
|
||||
if (status == NVMEDIA_STATUS_OK) {
|
||||
printf("Library version: %u.%u\n", version.major, version.minor);
|
||||
printf("Header version: %u.%u\n", NVMEDIA_2D_VERSION_MAJOR,
|
||||
NVMEDIA_2D_VERSION_MINOR);
|
||||
if ((version.major != NVMEDIA_2D_VERSION_MAJOR) ||
|
||||
(version.minor != NVMEDIA_2D_VERSION_MINOR)) {
|
||||
printf("Library and Header mismatch!\n");
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
}
|
||||
|
||||
// Create NvMedia device
|
||||
ctx.device = NvMediaDeviceCreate();
|
||||
if(!ctx.device) {
|
||||
printf("%s: Failed to create NvMedia device\n", __func__);
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
// Create NvMedia device
|
||||
ctx.device = NvMediaDeviceCreate();
|
||||
if (!ctx.device) {
|
||||
printf("%s: Failed to create NvMedia device\n", __func__);
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
|
||||
// Create 2D blitter
|
||||
ctx.i2d = NvMedia2DCreate(ctx.device);
|
||||
if(!ctx.i2d) {
|
||||
printf("%s: Failed to create NvMedia 2D i2d\n", __func__);
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
// Create 2D blitter
|
||||
ctx.i2d = NvMedia2DCreate(ctx.device);
|
||||
if (!ctx.i2d) {
|
||||
printf("%s: Failed to create NvMedia 2D i2d\n", __func__);
|
||||
cleanup(&ctx, status);
|
||||
}
|
||||
|
||||
cudaDeviceId = findCudaDevice(argc, (const char**)argv);
|
||||
cudaDeviceId = findCudaDevice(argc, (const char**)argv);
|
||||
|
||||
// NvMedia-CUDA operations without NvSCI APIs starts
|
||||
cudaResources cudaResObj;
|
||||
GetTimeMicroSec(&startTime);
|
||||
setupNvMedia(&args, &ctx);
|
||||
setupCuda(&ctx, cudaResObj, cudaDeviceId);
|
||||
// NvMedia-CUDA operations without NvSCI APIs starts
|
||||
cudaResources cudaResObj;
|
||||
GetTimeMicroSec(&startTime);
|
||||
setupNvMedia(&args, &ctx);
|
||||
setupCuda(&ctx, cudaResObj, cudaDeviceId);
|
||||
|
||||
GetTimeMicroSec(&operationStartTime);
|
||||
for (int i = 0; i < args.iterations; i++)
|
||||
{
|
||||
runNvMediaBlit2D(&args, &ctx);
|
||||
runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
|
||||
}
|
||||
GetTimeMicroSec(&operationEndTime);
|
||||
GetTimeMicroSec(&operationStartTime);
|
||||
for (int i = 0; i < args.iterations; i++) {
|
||||
runNvMediaBlit2D(&args, &ctx);
|
||||
runCudaOperation(&ctx, cudaResObj, cudaDeviceId);
|
||||
}
|
||||
GetTimeMicroSec(&operationEndTime);
|
||||
|
||||
cleanupNvMedia(&ctx);
|
||||
cleanupCuda(&ctx, cudaResObj);
|
||||
GetTimeMicroSec(&endTime);
|
||||
// NvMedia-CUDA operations without NvSCI APIs ends
|
||||
cleanupNvMedia(&ctx);
|
||||
cleanupCuda(&ctx, cudaResObj);
|
||||
GetTimeMicroSec(&endTime);
|
||||
// NvMedia-CUDA operations without NvSCI APIs ends
|
||||
|
||||
processingTime = (double)(operationEndTime - operationStartTime)/1000.0;
|
||||
printf("Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
|
||||
processingTime = (double)(endTime - startTime)/1000.0;
|
||||
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup without NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
|
||||
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
|
||||
printf(
|
||||
"Overall Processing time of NvMedia-CUDA Operations without NvSCI APIs "
|
||||
"%.4f ms with %zu iterations\n",
|
||||
processingTime, args.iterations);
|
||||
processingTime = (double)(endTime - startTime) / 1000.0;
|
||||
printf(
|
||||
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
|
||||
"without NvSCI APIs %.4f ms with %zu iterations\n",
|
||||
processingTime, args.iterations);
|
||||
|
||||
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
|
||||
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
|
||||
cudaExternalResInterop cudaExtResObj;
|
||||
// NvMedia-CUDA operations via interop with NvSCI APIs starts
|
||||
GetTimeMicroSec(&startTime);
|
||||
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
|
||||
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
|
||||
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
|
||||
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
|
||||
NvSciBufObj dstNvSciBufobj, srcNvSciBufobj;
|
||||
NvSciSyncObj nvMediaSignalerSyncObj, cudaSignalerSyncObj;
|
||||
cudaExternalResInterop cudaExtResObj;
|
||||
// NvMedia-CUDA operations via interop with NvSCI APIs starts
|
||||
GetTimeMicroSec(&startTime);
|
||||
setupNvMediaSignalerNvSciSync(&ctx, nvMediaSignalerSyncObj, cudaDeviceId);
|
||||
setupCudaSignalerNvSciSync(&ctx, cudaSignalerSyncObj, cudaDeviceId);
|
||||
setupNvMedia(&args, &ctx, srcNvSciBufobj, dstNvSciBufobj,
|
||||
nvMediaSignalerSyncObj, cudaSignalerSyncObj, cudaDeviceId);
|
||||
setupCuda(cudaExtResObj, dstNvSciBufobj, nvMediaSignalerSyncObj,
|
||||
cudaSignalerSyncObj, cudaDeviceId);
|
||||
|
||||
GetTimeMicroSec(&operationStartTime);
|
||||
for (int i = 0; i < args.iterations; i++)
|
||||
{
|
||||
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence, &nvMediaSignalerFence);
|
||||
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence, cudaDeviceId, args.iterations);
|
||||
}
|
||||
GetTimeMicroSec(&operationEndTime);
|
||||
GetTimeMicroSec(&operationStartTime);
|
||||
for (int i = 0; i < args.iterations; i++) {
|
||||
runNvMediaBlit2D(&args, &ctx, nvMediaSignalerSyncObj, &cudaSignalerFence,
|
||||
&nvMediaSignalerFence);
|
||||
runCudaOperation(cudaExtResObj, &nvMediaSignalerFence, &cudaSignalerFence,
|
||||
cudaDeviceId, args.iterations);
|
||||
}
|
||||
GetTimeMicroSec(&operationEndTime);
|
||||
|
||||
cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj);
|
||||
cleanupCuda(cudaExtResObj);
|
||||
cleanupNvSciSync(nvMediaSignalerSyncObj);
|
||||
cleanupNvSciSync(cudaSignalerSyncObj);
|
||||
cleanupNvSciBuf(srcNvSciBufobj);
|
||||
cleanupNvSciBuf(dstNvSciBufobj);
|
||||
GetTimeMicroSec(&endTime);
|
||||
// NvMedia-CUDA operations via interop with NvSCI APIs ends
|
||||
cleanupNvMedia(&ctx, nvMediaSignalerSyncObj, cudaSignalerSyncObj);
|
||||
cleanupCuda(cudaExtResObj);
|
||||
cleanupNvSciSync(nvMediaSignalerSyncObj);
|
||||
cleanupNvSciSync(cudaSignalerSyncObj);
|
||||
cleanupNvSciBuf(srcNvSciBufobj);
|
||||
cleanupNvSciBuf(dstNvSciBufobj);
|
||||
GetTimeMicroSec(&endTime);
|
||||
// NvMedia-CUDA operations via interop with NvSCI APIs ends
|
||||
|
||||
processingTime = (double)(operationEndTime - operationStartTime)/1000.0;
|
||||
printf("Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
|
||||
processingTime = (double)(endTime - startTime)/1000.0;
|
||||
printf("Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup with NvSCI APIs %.4f ms with %zu iterations\n", processingTime, args.iterations);
|
||||
processingTime = (double)(operationEndTime - operationStartTime) / 1000.0;
|
||||
printf(
|
||||
"Overall Processing time of NvMedia-CUDA Operations with NvSCI APIs %.4f "
|
||||
"ms with %zu iterations\n",
|
||||
processingTime, args.iterations);
|
||||
processingTime = (double)(endTime - startTime) / 1000.0;
|
||||
printf(
|
||||
"Overall Processing time of NvMedia-CUDA Operations + allocation/cleanup "
|
||||
"with NvSCI APIs %.4f ms with %zu iterations\n",
|
||||
processingTime, args.iterations);
|
||||
|
||||
if (ctx.i2d != NULL) {
|
||||
NvMedia2DDestroy(ctx.i2d);
|
||||
}
|
||||
if (ctx.i2d != NULL) {
|
||||
NvMedia2DDestroy(ctx.i2d);
|
||||
}
|
||||
|
||||
if (ctx.device != NULL) {
|
||||
NvMediaDeviceDestroy(ctx.device);
|
||||
}
|
||||
if (ctx.device != NULL) {
|
||||
NvMediaDeviceDestroy(ctx.device);
|
||||
}
|
||||
|
||||
if (status == NVMEDIA_STATUS_OK) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return 1;
|
||||
}
|
||||
if (status == NVMEDIA_STATUS_OK) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,463 +38,434 @@
|
|||
#include "nvmedia_2d_nvscisync.h"
|
||||
#include "nvsci_setup.h"
|
||||
|
||||
NvMediaImage *
|
||||
NvMediaImageCreateUsingNvScibuf(
|
||||
NvMediaDevice *device,
|
||||
NvMediaSurfaceType type,
|
||||
const NvMediaSurfAllocAttr *attrs,
|
||||
uint32_t numAttrs,
|
||||
uint32_t flags,
|
||||
NvSciBufObj &bufobj,
|
||||
int cudaDeviceId)
|
||||
{
|
||||
NvSciBufModule module = NULL;
|
||||
NvSciError err = NvSciError_Success;
|
||||
NvMediaStatus status = NVMEDIA_STATUS_OK;
|
||||
NvSciBufAttrList attrlist = NULL;
|
||||
NvSciBufAttrList conflictlist = NULL;
|
||||
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
|
||||
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
|
||||
&access_perm,
|
||||
sizeof(access_perm)};
|
||||
NvSciBufAttrKeyValuePair pairArrayOut[10];
|
||||
NvMediaImage *NvMediaImageCreateUsingNvScibuf(NvMediaDevice *device,
|
||||
NvMediaSurfaceType type,
|
||||
const NvMediaSurfAllocAttr *attrs,
|
||||
uint32_t numAttrs, uint32_t flags,
|
||||
NvSciBufObj &bufobj,
|
||||
int cudaDeviceId) {
|
||||
NvSciBufModule module = NULL;
|
||||
NvSciError err = NvSciError_Success;
|
||||
NvMediaStatus status = NVMEDIA_STATUS_OK;
|
||||
NvSciBufAttrList attrlist = NULL;
|
||||
NvSciBufAttrList conflictlist = NULL;
|
||||
NvSciBufAttrValAccessPerm access_perm = NvSciBufAccessPerm_ReadWrite;
|
||||
NvSciBufAttrKeyValuePair attr_kvp = {NvSciBufGeneralAttrKey_RequiredPerm,
|
||||
&access_perm, sizeof(access_perm)};
|
||||
NvSciBufAttrKeyValuePair pairArrayOut[10];
|
||||
|
||||
NvMediaImage *image = NULL;
|
||||
NvMediaImage *image = NULL;
|
||||
|
||||
err = NvSciBufModuleOpen(&module);
|
||||
if(err != NvSciError_Success) {
|
||||
printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
err = NvSciBufModuleOpen(&module);
|
||||
if (err != NvSciError_Success) {
|
||||
printf("%s: NvSciBuffModuleOpen failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
|
||||
err = NvSciBufAttrListCreate(module, &attrlist);
|
||||
if(err != NvSciError_Success) {
|
||||
printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
err = NvSciBufAttrListCreate(module, &attrlist);
|
||||
if (err != NvSciError_Success) {
|
||||
printf("%s: SciBufAttrListCreate failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
|
||||
err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1);
|
||||
if(err != NvSciError_Success) {
|
||||
printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
err = NvSciBufAttrListSetAttrs(attrlist, &attr_kvp, 1);
|
||||
if (err != NvSciError_Success) {
|
||||
printf("%s: AccessPermSetAttr failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
|
||||
status = NvMediaImageFillNvSciBufAttrs(device,
|
||||
type,
|
||||
attrs,
|
||||
numAttrs,
|
||||
0,
|
||||
attrlist);
|
||||
status =
|
||||
NvMediaImageFillNvSciBufAttrs(device, type, attrs, numAttrs, 0, attrlist);
|
||||
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ImageFillSciBufAttrs failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
|
||||
|
||||
setupNvSciBuf(bufobj, attrlist, cudaDeviceId);
|
||||
status = NvMediaImageCreateFromNvSciBuf(device, bufobj, &image);
|
||||
|
||||
status = NvMediaImageCreateFromNvSciBuf(device,
|
||||
bufobj,
|
||||
&image);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ImageCreatefromSciBuf failed. Error: %d \n", __func__, err);
|
||||
goto fail_cleanup;
|
||||
}
|
||||
NvSciBufAttrListFree(attrlist);
|
||||
|
||||
NvSciBufAttrListFree(attrlist);
|
||||
if (module != NULL) {
|
||||
NvSciBufModuleClose(module);
|
||||
}
|
||||
|
||||
if(module != NULL) {
|
||||
NvSciBufModuleClose(module);
|
||||
}
|
||||
|
||||
return image;
|
||||
return image;
|
||||
|
||||
fail_cleanup:
|
||||
if(attrlist != NULL) {
|
||||
NvSciBufAttrListFree(attrlist);
|
||||
}
|
||||
if(bufobj != NULL) {
|
||||
NvSciBufObjFree(bufobj);
|
||||
bufobj = NULL;
|
||||
}
|
||||
if (attrlist != NULL) {
|
||||
NvSciBufAttrListFree(attrlist);
|
||||
}
|
||||
if (bufobj != NULL) {
|
||||
NvSciBufObjFree(bufobj);
|
||||
bufobj = NULL;
|
||||
}
|
||||
|
||||
if(module != NULL) {
|
||||
NvSciBufModuleClose(module);
|
||||
}
|
||||
NvMediaImageDestroy(image);
|
||||
return NULL;
|
||||
if (module != NULL) {
|
||||
NvSciBufModuleClose(module);
|
||||
}
|
||||
NvMediaImageDestroy(image);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create NvMediaImage surface based on the input attributes.
|
||||
* Returns NVMEDIA_STATUS_OK on success
|
||||
*/
|
||||
static NvMediaStatus
|
||||
createSurface(Blit2DTest *ctx,
|
||||
NvMediaSurfFormatAttr *surfFormatAttrs,
|
||||
NvMediaSurfAllocAttr *surfAllocAttrs,
|
||||
uint32_t numSurfAllocAttrs,
|
||||
NvMediaImage **image,
|
||||
NvSciBufObj &bufObj,
|
||||
int cudaDeviceId)
|
||||
{
|
||||
NvMediaSurfaceType surfType;
|
||||
static NvMediaStatus createSurface(Blit2DTest *ctx,
|
||||
NvMediaSurfFormatAttr *surfFormatAttrs,
|
||||
NvMediaSurfAllocAttr *surfAllocAttrs,
|
||||
uint32_t numSurfAllocAttrs,
|
||||
NvMediaImage **image, NvSciBufObj &bufObj,
|
||||
int cudaDeviceId) {
|
||||
NvMediaSurfaceType surfType;
|
||||
|
||||
/* create source image */
|
||||
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
|
||||
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
|
||||
surfType, /* surface type */
|
||||
surfAllocAttrs,
|
||||
numSurfAllocAttrs,
|
||||
0,
|
||||
bufObj,
|
||||
cudaDeviceId);
|
||||
/* create source image */
|
||||
surfType =
|
||||
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
|
||||
*image = NvMediaImageCreateUsingNvScibuf(ctx->device, /* device */
|
||||
surfType, /* surface type */
|
||||
surfAllocAttrs, numSurfAllocAttrs, 0,
|
||||
bufObj, cudaDeviceId);
|
||||
|
||||
if(*image == NULL) {
|
||||
printf ("Unable to create image\n");
|
||||
return NVMEDIA_STATUS_ERROR;
|
||||
}
|
||||
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
|
||||
if (*image == NULL) {
|
||||
printf("Unable to create image\n");
|
||||
return NVMEDIA_STATUS_ERROR;
|
||||
}
|
||||
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
|
||||
|
||||
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
|
||||
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/
|
||||
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
|
||||
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
|
||||
surfType);*/
|
||||
|
||||
return NVMEDIA_STATUS_OK;
|
||||
return NVMEDIA_STATUS_OK;
|
||||
}
|
||||
|
||||
/* Create NvMediaImage surface based on the input attributes.
|
||||
* Returns NVMEDIA_STATUS_OK on success
|
||||
*/
|
||||
static NvMediaStatus
|
||||
createSurfaceNonNvSCI(Blit2DTest *ctx,
|
||||
NvMediaSurfFormatAttr *surfFormatAttrs,
|
||||
NvMediaSurfAllocAttr *surfAllocAttrs,
|
||||
uint32_t numSurfAllocAttrs,
|
||||
NvMediaImage **image)
|
||||
{
|
||||
NvMediaSurfaceType surfType;
|
||||
static NvMediaStatus createSurfaceNonNvSCI(
|
||||
Blit2DTest *ctx, NvMediaSurfFormatAttr *surfFormatAttrs,
|
||||
NvMediaSurfAllocAttr *surfAllocAttrs, uint32_t numSurfAllocAttrs,
|
||||
NvMediaImage **image) {
|
||||
NvMediaSurfaceType surfType;
|
||||
|
||||
/* create source image */
|
||||
surfType = NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
|
||||
/* create source image */
|
||||
surfType =
|
||||
NvMediaSurfaceFormatGetType(surfFormatAttrs, NVM_SURF_FMT_ATTR_MAX);
|
||||
|
||||
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs, numSurfAllocAttrs, 0);
|
||||
*image = NvMediaImageCreateNew(ctx->device, surfType, surfAllocAttrs,
|
||||
numSurfAllocAttrs, 0);
|
||||
|
||||
if(*image == NULL) {
|
||||
printf ("Unable to create image\n");
|
||||
return NVMEDIA_STATUS_ERROR;
|
||||
}
|
||||
InitImage (*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
|
||||
if (*image == NULL) {
|
||||
printf("Unable to create image\n");
|
||||
return NVMEDIA_STATUS_ERROR;
|
||||
}
|
||||
InitImage(*image, surfAllocAttrs[0].value, surfAllocAttrs[1].value);
|
||||
|
||||
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
|
||||
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value, surfType);*/
|
||||
/* printf("%s: NvMediaImageCreate:: Image size: %ux%u Image type: %d\n",
|
||||
__func__, surfAllocAttrs[0].value, surfAllocAttrs[1].value,
|
||||
surfType);*/
|
||||
|
||||
return NVMEDIA_STATUS_OK;
|
||||
return NVMEDIA_STATUS_OK;
|
||||
}
|
||||
|
||||
static void destroySurface(NvMediaImage *image) { NvMediaImageDestroy(image); }
|
||||
|
||||
static void destroySurface(NvMediaImage *image)
|
||||
{
|
||||
NvMediaImageDestroy(image);
|
||||
}
|
||||
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs *args,
|
||||
NvSciSyncObj &nvMediaSignalerSyncObj,
|
||||
NvSciSyncFence *preSyncFence,
|
||||
NvSciSyncFence *fence) {
|
||||
NvMediaStatus status;
|
||||
NvMediaImageSurfaceMap surfaceMap;
|
||||
|
||||
status = ReadImage(args->inputFileName, /* fileName */
|
||||
0, /* frameNum */
|
||||
args->srcSurfAllocAttrs[0].value, /* source image width */
|
||||
args->srcSurfAllocAttrs[1].value, /* source image height */
|
||||
ctx->srcImage, /* srcImage */
|
||||
NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
1, /* bytesPerPixel */
|
||||
MSB_ALIGNED); /* pixelAlignment */
|
||||
|
||||
static NvMediaStatus blit2DImage(Blit2DTest *ctx, TestArgs* args, NvSciSyncObj &nvMediaSignalerSyncObj,
|
||||
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence)
|
||||
{
|
||||
NvMediaStatus status;
|
||||
NvMediaImageSurfaceMap surfaceMap;
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = ReadImage(args->inputFileName, /* fileName */
|
||||
0, /* frameNum */
|
||||
args->srcSurfAllocAttrs[0].value, /* source image width */
|
||||
args->srcSurfAllocAttrs[1].value, /* source image height */
|
||||
ctx->srcImage, /* srcImage */
|
||||
NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
1, /* bytesPerPixel */
|
||||
MSB_ALIGNED); /* pixelAlignment */
|
||||
if ((args->srcRect.x1 <= args->srcRect.x0) ||
|
||||
(args->srcRect.y1 <= args->srcRect.y0)) {
|
||||
ctx->srcRect = NULL;
|
||||
} else {
|
||||
ctx->srcRect = &(args->srcRect);
|
||||
}
|
||||
|
||||
if ((args->dstRect.x1 <= args->dstRect.x0) ||
|
||||
(args->dstRect.y1 <= args->dstRect.y0)) {
|
||||
ctx->dstRect = NULL;
|
||||
} else {
|
||||
ctx->dstRect = &(args->dstRect);
|
||||
}
|
||||
|
||||
static int64_t launch = 0;
|
||||
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to
|
||||
// wait
|
||||
// for cuda signal on fence.
|
||||
if (launch) {
|
||||
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
|
||||
return status;
|
||||
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
|
||||
status);
|
||||
return status;
|
||||
}
|
||||
NvSciSyncFenceClear(preSyncFence);
|
||||
}
|
||||
launch++;
|
||||
|
||||
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) {
|
||||
ctx->srcRect = NULL;
|
||||
} else {
|
||||
ctx->srcRect = &(args->srcRect);
|
||||
}
|
||||
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__,
|
||||
status);
|
||||
return status;
|
||||
}
|
||||
|
||||
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) {
|
||||
ctx->dstRect = NULL;
|
||||
} else {
|
||||
ctx->dstRect = &(args->dstRect);
|
||||
}
|
||||
/* 2DBlit processing on input image */
|
||||
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
|
||||
ctx->dstImage, /* dstSurface */
|
||||
ctx->dstRect, /* dstRect */
|
||||
ctx->srcImage, /* srcSurface */
|
||||
ctx->srcRect, /* srcRect */
|
||||
&args->blitParams, /* params */
|
||||
NULL); /* paramsOut */
|
||||
|
||||
static int64_t launch = 0;
|
||||
// Start inserting pre-fence from second launch inorder to for NvMedia2Blit to wait
|
||||
// for cuda signal on fence.
|
||||
if (launch)
|
||||
{
|
||||
status = NvMedia2DInsertPreNvSciSyncFence(ctx->i2d, preSyncFence);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
NvSciSyncFenceClear(preSyncFence);
|
||||
}
|
||||
launch++;
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = NvMedia2DSetNvSciSyncObjforEOF(ctx->i2d, nvMediaSignalerSyncObj);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DSetNvSciSyncObjforEOF failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
status =
|
||||
NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
/* 2DBlit processing on input image */
|
||||
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
|
||||
ctx->dstImage, /* dstSurface */
|
||||
ctx->dstRect, /* dstRect */
|
||||
ctx->srcImage, /* srcSurface */
|
||||
ctx->srcRect, /* srcRect */
|
||||
&args->blitParams, /* params */
|
||||
NULL); /* paramsOut */
|
||||
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = NvMedia2DGetEOFNvSciSyncFence(ctx->i2d, nvMediaSignalerSyncObj, fence);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DGetEOFNvSciSyncFence failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
return NVMEDIA_STATUS_OK;
|
||||
return NVMEDIA_STATUS_OK;
|
||||
}
|
||||
|
||||
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs* args)
|
||||
{
|
||||
NvMediaStatus status;
|
||||
NvMediaImageSurfaceMap surfaceMap;
|
||||
static NvMediaStatus blit2DImageNonNvSCI(Blit2DTest *ctx, TestArgs *args) {
|
||||
NvMediaStatus status;
|
||||
NvMediaImageSurfaceMap surfaceMap;
|
||||
|
||||
status = ReadImage(args->inputFileName, /* fileName */
|
||||
0, /* frameNum */
|
||||
args->srcSurfAllocAttrs[0].value, /* source image width */
|
||||
args->srcSurfAllocAttrs[1].value, /* source image height */
|
||||
ctx->srcImage, /* srcImage */
|
||||
NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
1, /* bytesPerPixel */
|
||||
MSB_ALIGNED); /* pixelAlignment */
|
||||
status = ReadImage(args->inputFileName, /* fileName */
|
||||
0, /* frameNum */
|
||||
args->srcSurfAllocAttrs[0].value, /* source image width */
|
||||
args->srcSurfAllocAttrs[1].value, /* source image height */
|
||||
ctx->srcImage, /* srcImage */
|
||||
NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
1, /* bytesPerPixel */
|
||||
MSB_ALIGNED); /* pixelAlignment */
|
||||
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: ReadImage failed for input buffer: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
if ((args->srcRect.x1 <= args->srcRect.x0) || (args->srcRect.y1 <= args->srcRect.y0)) {
|
||||
ctx->srcRect = NULL;
|
||||
} else {
|
||||
ctx->srcRect = &(args->srcRect);
|
||||
}
|
||||
if ((args->srcRect.x1 <= args->srcRect.x0) ||
|
||||
(args->srcRect.y1 <= args->srcRect.y0)) {
|
||||
ctx->srcRect = NULL;
|
||||
} else {
|
||||
ctx->srcRect = &(args->srcRect);
|
||||
}
|
||||
|
||||
if ((args->dstRect.x1 <= args->dstRect.x0) || (args->dstRect.y1 <= args->dstRect.y0)) {
|
||||
ctx->dstRect = NULL;
|
||||
} else {
|
||||
ctx->dstRect = &(args->dstRect);
|
||||
}
|
||||
if ((args->dstRect.x1 <= args->dstRect.x0) ||
|
||||
(args->dstRect.y1 <= args->dstRect.y0)) {
|
||||
ctx->dstRect = NULL;
|
||||
} else {
|
||||
ctx->dstRect = &(args->dstRect);
|
||||
}
|
||||
|
||||
/* 2DBlit processing on input image */
|
||||
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
|
||||
ctx->dstImage, /* dstSurface */
|
||||
ctx->dstRect, /* dstRect */
|
||||
ctx->srcImage, /* srcSurface */
|
||||
ctx->srcRect, /* srcRect */
|
||||
&args->blitParams, /* params */
|
||||
NULL); /* paramsOut */
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
/* 2DBlit processing on input image */
|
||||
status = NvMedia2DBlitEx(ctx->i2d, /* i2d */
|
||||
ctx->dstImage, /* dstSurface */
|
||||
ctx->dstRect, /* dstRect */
|
||||
ctx->srcImage, /* srcSurface */
|
||||
ctx->srcRect, /* srcRect */
|
||||
&args->blitParams, /* params */
|
||||
NULL); /* paramsOut */
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DBlitEx failed: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
/* Write output image into buffer */
|
||||
ctx->bytesPerPixel = 1;
|
||||
WriteImageToAllocatedBuffer(ctx, ctx->dstImage,
|
||||
NVMEDIA_TRUE,
|
||||
NVMEDIA_FALSE,
|
||||
/* Write output image into buffer */
|
||||
ctx->bytesPerPixel = 1;
|
||||
WriteImageToAllocatedBuffer(ctx, ctx->dstImage, NVMEDIA_TRUE, NVMEDIA_FALSE,
|
||||
ctx->bytesPerPixel);
|
||||
|
||||
return NVMEDIA_STATUS_OK;
|
||||
return NVMEDIA_STATUS_OK;
|
||||
}
|
||||
|
||||
static void cleanup(Blit2DTest* ctx, NvMediaStatus status = NVMEDIA_STATUS_OK)
|
||||
{
|
||||
if (ctx->srcImage != NULL) {
|
||||
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
|
||||
destroySurface(ctx->srcImage);
|
||||
}
|
||||
if (ctx->dstImage != NULL) {
|
||||
NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage);
|
||||
destroySurface(ctx->dstImage);
|
||||
}
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
static void cleanup(Blit2DTest *ctx, NvMediaStatus status = NVMEDIA_STATUS_OK) {
|
||||
if (ctx->srcImage != NULL) {
|
||||
NvMedia2DImageUnRegister(ctx->i2d, ctx->srcImage);
|
||||
destroySurface(ctx->srcImage);
|
||||
}
|
||||
if (ctx->dstImage != NULL) {
|
||||
NvMedia2DImageUnRegister(ctx->i2d, ctx->dstImage);
|
||||
destroySurface(ctx->dstImage);
|
||||
}
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj)
|
||||
{
|
||||
NvMediaStatus status;
|
||||
cleanup(ctx);
|
||||
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NvMediaImageNvSciBufDeinit();
|
||||
void cleanupNvMedia(Blit2DTest *ctx, NvSciSyncObj &syncObj,
|
||||
NvSciSyncObj &preSyncObj) {
|
||||
NvMediaStatus status;
|
||||
cleanup(ctx);
|
||||
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, syncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
status = NvMedia2DUnregisterNvSciSyncObj(ctx->i2d, preSyncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NvMediaImageNvSciBufDeinit();
|
||||
}
|
||||
|
||||
void cleanupNvMedia(Blit2DTest* ctx)
|
||||
{
|
||||
cleanup(ctx);
|
||||
free(ctx->dstBuffPitches);
|
||||
free(ctx->dstBuffer);
|
||||
free(ctx->dstBuff);
|
||||
void cleanupNvMedia(Blit2DTest *ctx) {
|
||||
cleanup(ctx);
|
||||
free(ctx->dstBuffPitches);
|
||||
free(ctx->dstBuffer);
|
||||
free(ctx->dstBuff);
|
||||
}
|
||||
|
||||
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
|
||||
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj,
|
||||
int cudaDeviceId)
|
||||
{
|
||||
NvMediaStatus status;
|
||||
status = NvMediaImageNvSciBufInit();
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n",__func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
void setupNvMedia(TestArgs *args, Blit2DTest *ctx, NvSciBufObj &srcNvSciBufobj,
|
||||
NvSciBufObj &dstNvSciBufobj, NvSciSyncObj &syncObj,
|
||||
NvSciSyncObj &preSyncObj, int cudaDeviceId) {
|
||||
NvMediaStatus status;
|
||||
status = NvMediaImageNvSciBufInit();
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMediaImageSciBufInit failed\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
// Create source surface
|
||||
status = createSurface(ctx,
|
||||
args->srcSurfFormatAttrs,
|
||||
args->srcSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs,
|
||||
&ctx->srcImage,
|
||||
srcNvSciBufobj,
|
||||
cudaDeviceId);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Create source surface
|
||||
status = createSurface(ctx, args->srcSurfFormatAttrs, args->srcSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs, &ctx->srcImage,
|
||||
srcNvSciBufobj, cudaDeviceId);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
// Create destination surface
|
||||
status = createSurface(ctx,
|
||||
args->dstSurfFormatAttrs,
|
||||
args->dstSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs,
|
||||
&ctx->dstImage,
|
||||
dstNvSciBufobj,
|
||||
cudaDeviceId);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Create destination surface
|
||||
status = createSurface(ctx, args->dstSurfFormatAttrs, args->dstSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs, &ctx->dstImage,
|
||||
dstNvSciBufobj, cudaDeviceId);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
//Register source Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
|
||||
if ( status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register source surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
//Register destination Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE);
|
||||
if ( status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register destination surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Register source Surface
|
||||
status =
|
||||
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register source surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Register destination Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
|
||||
NVMEDIA_ACCESS_MODE_READ_WRITE);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register destination surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
|
||||
}
|
||||
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_EOFSYNCOBJ, syncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
|
||||
}
|
||||
|
||||
status = NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
|
||||
}
|
||||
status =
|
||||
NvMedia2DRegisterNvSciSyncObj(ctx->i2d, NVMEDIA_PRESYNCOBJ, preSyncObj);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to NvMedia2DRegisterNvSciSyncObj\n", __func__);
|
||||
}
|
||||
}
|
||||
|
||||
// Create NvMedia src & dst image without NvSciBuf
|
||||
void setupNvMedia(TestArgs* args, Blit2DTest* ctx)
|
||||
{
|
||||
NvMediaStatus status;
|
||||
void setupNvMedia(TestArgs *args, Blit2DTest *ctx) {
|
||||
NvMediaStatus status;
|
||||
|
||||
// Create source surface
|
||||
status = createSurfaceNonNvSCI(ctx,
|
||||
args->srcSurfFormatAttrs,
|
||||
args->srcSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs,
|
||||
&ctx->srcImage);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Create source surface
|
||||
status = createSurfaceNonNvSCI(ctx, args->srcSurfFormatAttrs,
|
||||
args->srcSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs, &ctx->srcImage);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
// Create destination surface
|
||||
status = createSurfaceNonNvSCI(ctx,
|
||||
args->dstSurfFormatAttrs,
|
||||
args->dstSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs,
|
||||
&ctx->dstImage);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Create destination surface
|
||||
status = createSurfaceNonNvSCI(ctx, args->dstSurfFormatAttrs,
|
||||
args->dstSurfAllocAttrs,
|
||||
args->numSurfAllocAttrs, &ctx->dstImage);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to create buffer pools\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
//Register source Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
|
||||
if ( status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register source surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Register source Surface
|
||||
status =
|
||||
NvMedia2DImageRegister(ctx->i2d, ctx->srcImage, NVMEDIA_ACCESS_MODE_READ);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register source surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
//Register destination Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage, NVMEDIA_ACCESS_MODE_READ_WRITE);
|
||||
if ( status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register destination surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
// Register destination Surface
|
||||
status = NvMedia2DImageRegister(ctx->i2d, ctx->dstImage,
|
||||
NVMEDIA_ACCESS_MODE_READ_WRITE);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Unable to register destination surface\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
|
||||
// Allocate buffer for writing image & set image parameters in Blit2DTest.
|
||||
ctx->bytesPerPixel = 1;
|
||||
AllocateBufferToWriteImage(ctx,
|
||||
ctx->dstImage,
|
||||
NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
NVMEDIA_FALSE); /* appendFlag */
|
||||
// Allocate buffer for writing image & set image parameters in Blit2DTest.
|
||||
ctx->bytesPerPixel = 1;
|
||||
AllocateBufferToWriteImage(ctx, ctx->dstImage, NVMEDIA_TRUE, /* uvOrderFlag */
|
||||
NVMEDIA_FALSE); /* appendFlag */
|
||||
}
|
||||
|
||||
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx)
|
||||
{
|
||||
// Blit2D function
|
||||
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Blit2D failed\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx) {
|
||||
// Blit2D function
|
||||
NvMediaStatus status = blit2DImageNonNvSCI(ctx, args);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Blit2D failed\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
}
|
||||
|
||||
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &nvMediaSignalerSyncObj,
|
||||
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence)
|
||||
{
|
||||
// Blit2D function
|
||||
NvMediaStatus status = blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Blit2D failed\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
void runNvMediaBlit2D(TestArgs *args, Blit2DTest *ctx,
|
||||
NvSciSyncObj &nvMediaSignalerSyncObj,
|
||||
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence) {
|
||||
// Blit2D function
|
||||
NvMediaStatus status =
|
||||
blit2DImage(ctx, args, nvMediaSignalerSyncObj, preSyncFence, fence);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: Blit2D failed\n", __func__);
|
||||
cleanup(ctx, status);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,6 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __NVMEDIA_PRODUCER_H__
|
||||
#define __NVMEDIA_PRODUCER_H__
|
||||
#include "nvmedia_utils/cmdline.h"
|
||||
|
@ -36,13 +35,14 @@
|
|||
#include "nvmedia_image_nvscibuf.h"
|
||||
#include "nvscisync.h"
|
||||
|
||||
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj &syncObj,
|
||||
NvSciSyncFence *preSyncFence, NvSciSyncFence *fence);
|
||||
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx, NvSciSyncObj& syncObj,
|
||||
NvSciSyncFence* preSyncFence, NvSciSyncFence* fence);
|
||||
void runNvMediaBlit2D(TestArgs* args, Blit2DTest* ctx);
|
||||
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj &srcNvSciBufobj,
|
||||
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj,
|
||||
int cudaDeviceId);
|
||||
void setupNvMedia(TestArgs* args, Blit2DTest* ctx, NvSciBufObj& srcNvSciBufobj,
|
||||
NvSciBufObj& dstNvSciBufobj, NvSciSyncObj& syncObj,
|
||||
NvSciSyncObj& preSyncObj, int cudaDeviceId);
|
||||
void setupNvMedia(TestArgs* args, Blit2DTest* ctx);
|
||||
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj &syncObj, NvSciSyncObj &preSyncObj);
|
||||
void cleanupNvMedia(Blit2DTest* ctx, NvSciSyncObj& syncObj,
|
||||
NvSciSyncObj& preSyncObj);
|
||||
void cleanupNvMedia(Blit2DTest* ctx);
|
||||
#endif
|
||||
|
|
|
@ -32,7 +32,6 @@
|
|||
#include "nvsci_setup.h"
|
||||
#include "nvmedia_2d_nvscisync.h"
|
||||
|
||||
|
||||
#define checkNvSciErrors(call) \
|
||||
do { \
|
||||
NvSciError _status = call; \
|
||||
|
@ -44,111 +43,115 @@
|
|||
fflush(stdout); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
} while (0)
|
||||
|
||||
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId)
|
||||
{
|
||||
NvSciSyncModule sciSyncModule;
|
||||
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
|
||||
NvSciSyncAttrList signalerAttrList, waiterAttrList;
|
||||
NvSciSyncAttrList syncUnreconciledList[2];
|
||||
NvSciSyncAttrList syncReconciledList, syncConflictList;
|
||||
void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
|
||||
int cudaDeviceId) {
|
||||
NvSciSyncModule sciSyncModule;
|
||||
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
|
||||
NvSciSyncAttrList signalerAttrList, waiterAttrList;
|
||||
NvSciSyncAttrList syncUnreconciledList[2];
|
||||
NvSciSyncAttrList syncReconciledList, syncConflictList;
|
||||
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
|
||||
|
||||
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(
|
||||
ctx->i2d, signalerAttrList, NVMEDIA_SIGNALER);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaSetDevice(cudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId, cudaNvSciSyncAttrWait));
|
||||
checkCudaErrors(cudaSetDevice(cudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(waiterAttrList, cudaDeviceId,
|
||||
cudaNvSciSyncAttrWait));
|
||||
|
||||
syncUnreconciledList[0] = signalerAttrList;
|
||||
syncUnreconciledList[1] = waiterAttrList;
|
||||
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
|
||||
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
|
||||
syncUnreconciledList[0] = signalerAttrList;
|
||||
syncUnreconciledList[1] = waiterAttrList;
|
||||
checkNvSciErrors(NvSciSyncAttrListReconcile(
|
||||
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
|
||||
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
|
||||
|
||||
NvSciSyncAttrListFree(signalerAttrList);
|
||||
NvSciSyncAttrListFree(waiterAttrList);
|
||||
if(syncConflictList != nullptr) {
|
||||
NvSciSyncAttrListFree(syncConflictList);
|
||||
}
|
||||
NvSciSyncAttrListFree(signalerAttrList);
|
||||
NvSciSyncAttrListFree(waiterAttrList);
|
||||
if (syncConflictList != nullptr) {
|
||||
NvSciSyncAttrListFree(syncConflictList);
|
||||
}
|
||||
}
|
||||
|
||||
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId)
|
||||
{
|
||||
NvSciSyncModule sciSyncModule;
|
||||
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
|
||||
NvSciSyncAttrList signalerAttrList, waiterAttrList;
|
||||
NvSciSyncAttrList syncUnreconciledList[2];
|
||||
NvSciSyncAttrList syncReconciledList, syncConflictList;
|
||||
void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
|
||||
int cudaDeviceId) {
|
||||
NvSciSyncModule sciSyncModule;
|
||||
checkNvSciErrors(NvSciSyncModuleOpen(&sciSyncModule));
|
||||
NvSciSyncAttrList signalerAttrList, waiterAttrList;
|
||||
NvSciSyncAttrList syncUnreconciledList[2];
|
||||
NvSciSyncAttrList syncReconciledList, syncConflictList;
|
||||
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &signalerAttrList));
|
||||
checkNvSciErrors(NvSciSyncAttrListCreate(sciSyncModule, &waiterAttrList));
|
||||
|
||||
NvMediaStatus status = NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
|
||||
if(status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n",__func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NvMediaStatus status =
|
||||
NvMedia2DFillNvSciSyncAttrList(ctx->i2d, waiterAttrList, NVMEDIA_WAITER);
|
||||
if (status != NVMEDIA_STATUS_OK) {
|
||||
printf("%s: NvMedia2DFillNvSciSyncAttrList failed\n", __func__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaSetDevice(cudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
|
||||
checkCudaErrors(cudaSetDevice(cudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetNvSciSyncAttributes(
|
||||
signalerAttrList, cudaDeviceId, cudaNvSciSyncAttrSignal));
|
||||
|
||||
syncUnreconciledList[0] = signalerAttrList;
|
||||
syncUnreconciledList[1] = waiterAttrList;
|
||||
checkNvSciErrors(NvSciSyncAttrListReconcile(syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
|
||||
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
|
||||
syncUnreconciledList[0] = signalerAttrList;
|
||||
syncUnreconciledList[1] = waiterAttrList;
|
||||
checkNvSciErrors(NvSciSyncAttrListReconcile(
|
||||
syncUnreconciledList, 2, &syncReconciledList, &syncConflictList));
|
||||
checkNvSciErrors(NvSciSyncObjAlloc(syncReconciledList, &syncObj));
|
||||
|
||||
NvSciSyncAttrListFree(signalerAttrList);
|
||||
NvSciSyncAttrListFree(waiterAttrList);
|
||||
if(syncConflictList != nullptr) {
|
||||
NvSciSyncAttrListFree(syncConflictList);
|
||||
}
|
||||
NvSciSyncAttrListFree(signalerAttrList);
|
||||
NvSciSyncAttrListFree(waiterAttrList);
|
||||
if (syncConflictList != nullptr) {
|
||||
NvSciSyncAttrListFree(syncConflictList);
|
||||
}
|
||||
}
|
||||
|
||||
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
|
||||
int cudaDeviceId) {
|
||||
CUuuid devUUID;
|
||||
NvSciBufAttrList conflictlist;
|
||||
NvSciBufAttrList bufUnreconciledAttrlist[1];
|
||||
|
||||
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId)
|
||||
{
|
||||
CUuuid devUUID;
|
||||
NvSciBufAttrList conflictlist;
|
||||
NvSciBufAttrList bufUnreconciledAttrlist[1];
|
||||
CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId);
|
||||
if (res != CUDA_SUCCESS) {
|
||||
fprintf(stderr, "Driver API error = %04d \n", res);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
CUresult res = cuDeviceGetUuid(&devUUID, cudaDeviceId);
|
||||
if (res != CUDA_SUCCESS) {
|
||||
fprintf(stderr, "Driver API error = %04d \n", res);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId,
|
||||
&devUUID, sizeof(devUUID)};
|
||||
|
||||
NvSciBufAttrKeyValuePair attr_gpuid[] = {NvSciBufGeneralAttrKey_GpuId, &devUUID, sizeof(devUUID)};
|
||||
// set CUDA GPU ID to attribute list
|
||||
checkNvSciErrors(NvSciBufAttrListSetAttrs(
|
||||
nvmediaAttrlist, attr_gpuid,
|
||||
sizeof(attr_gpuid) / sizeof(NvSciBufAttrKeyValuePair)));
|
||||
|
||||
// set CUDA GPU ID to attribute list
|
||||
checkNvSciErrors(NvSciBufAttrListSetAttrs(nvmediaAttrlist, attr_gpuid, sizeof(attr_gpuid)/sizeof(NvSciBufAttrKeyValuePair)));
|
||||
bufUnreconciledAttrlist[0] = nvmediaAttrlist;
|
||||
|
||||
bufUnreconciledAttrlist[0] = nvmediaAttrlist;
|
||||
|
||||
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(bufUnreconciledAttrlist,
|
||||
1,
|
||||
&bufobj,
|
||||
&conflictlist));
|
||||
if (conflictlist != NULL) {
|
||||
NvSciBufAttrListFree(conflictlist);
|
||||
}
|
||||
checkNvSciErrors(NvSciBufAttrListReconcileAndObjAlloc(
|
||||
bufUnreconciledAttrlist, 1, &bufobj, &conflictlist));
|
||||
if (conflictlist != NULL) {
|
||||
NvSciBufAttrListFree(conflictlist);
|
||||
}
|
||||
}
|
||||
|
||||
void cleanupNvSciBuf(NvSciBufObj &Bufobj)
|
||||
{
|
||||
if (Bufobj != NULL) {
|
||||
NvSciBufObjFree(Bufobj);
|
||||
}
|
||||
void cleanupNvSciBuf(NvSciBufObj &Bufobj) {
|
||||
if (Bufobj != NULL) {
|
||||
NvSciBufObjFree(Bufobj);
|
||||
}
|
||||
}
|
||||
|
||||
void cleanupNvSciSync(NvSciSyncObj &syncObj)
|
||||
{
|
||||
if (NvSciSyncObjFree != NULL) {
|
||||
NvSciSyncObjFree(syncObj);
|
||||
}
|
||||
void cleanupNvSciSync(NvSciSyncObj &syncObj) {
|
||||
if (NvSciSyncObjFree != NULL) {
|
||||
NvSciSyncObjFree(syncObj);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,16 +25,18 @@
|
|||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __NVSCI_SETUP_H__
|
||||
#define __NVSCI_SETUP_H__
|
||||
#include "nvmedia_utils/cmdline.h"
|
||||
#include <nvscibuf.h>
|
||||
#include <nvscisync.h>
|
||||
|
||||
void setupNvMediaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId);
|
||||
void setupCudaSignalerNvSciSync(Blit2DTest* ctx, NvSciSyncObj &syncObj, int cudaDeviceId);
|
||||
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist, int cudaDeviceId);
|
||||
void setupNvMediaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
|
||||
int cudaDeviceId);
|
||||
void setupCudaSignalerNvSciSync(Blit2DTest *ctx, NvSciSyncObj &syncObj,
|
||||
int cudaDeviceId);
|
||||
void setupNvSciBuf(NvSciBufObj &bufobj, NvSciBufAttrList &nvmediaAttrlist,
|
||||
int cudaDeviceId);
|
||||
void cleanupNvSciBuf(NvSciBufObj &Bufobj);
|
||||
void cleanupNvSciSync(NvSciSyncObj &syncObj);
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user