diff --git a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt index 0872a885..c1a65040 100644 --- a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt +++ b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(cudaNvSciNvMedia) +add_subdirectory(cudaNvSciBufMultiplanar) add_subdirectory(cuDLAErrorReporting) add_subdirectory(cuDLAHybridMode) add_subdirectory(cuDLALayerwiseStatsHybrid) diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json new file mode 100644 index 00000000..f0066b0f --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json @@ -0,0 +1,18 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/**", + "${workspaceFolder}/../../../Common" + ], + "defines": [], + "compilerPath": "/usr/local/cuda/bin/nvcc", + "cStandard": "gnu17", + "cppStandard": "gnu++14", + "intelliSenseMode": "linux-gcc-x64", + "configurationProvider": "ms-vscode.makefile-tools" + } + ], + "version": 4 +} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json new file mode 100644 index 00000000..c7eb54dc --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "nvidia.nsight-vscode-edition", + "ms-vscode.cpptools", + "ms-vscode.makefile-tools" + ] +} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json new file mode 100644 index 00000000..df6af9c7 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json @@ -0,0 +1,10 @@ +{ + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "program": "${workspaceFolder}/cudaNvSciBufMultiplanar" + } + ] +} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json new file mode 100644 index 00000000..4509aeb1 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json @@ -0,0 +1,15 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "sample", + "type": "shell", + "command": "make dbg=1", + "problemMatcher": ["$nvcc"], + "group": { + "kind": "build", + "isDefault": true + } + } + ] +} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt new file mode 100644 index 00000000..34959a68 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt @@ -0,0 +1,74 @@ +cmake_minimum_required(VERSION 3.20) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Modules") + +project(cudaNvSciBufMultiplanar LANGUAGES C CXX CUDA) + +find_package(CUDAToolkit REQUIRED) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90) +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive) +endif() + +# Include directories and libraries +include_directories(../../../../Common) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + # Find the NVSCI libraries + # use CMAKE_LIBRARY_PATH so that users can also specify the NVSCI lib path in cmake command + set(CMAKE_LIBRARY_PATH "/usr/lib" ${CMAKE_LIBRARY_PATH}) + file(GLOB_RECURSE NVSCIBUF_LIB + ${CMAKE_LIBRARY_PATH}/*/libnvscibuf.so + ) + file(GLOB_RECURSE NVSCISYNC_LIB + ${CMAKE_LIBRARY_PATH}/*/libnvscisync.so + ) + + # Find the NVSCI header files + # use CMAKE_INCLUDE_PATH so that users can also specify the NVSCI include path in cmake command + set(CMAKE_INCLUDE_PATH "/usr/include" ${CMAKE_LIBRARY_PATH}) + find_path(NVSCIBUF_INCLUDE_DIR nvscibuf.h PATHS ${CMAKE_INCLUDE_PATH}) + find_path(NVSCISYNC_INCLUDE_DIR nvscisync.h PATHS ${CMAKE_INCLUDE_PATH}) + + if(NVSCIBUF_LIB AND NVSCISYNC_LIB AND NVSCIBUF_INCLUDE_DIR AND NVSCISYNC_INCLUDE_DIR) + message(STATUS "FOUND NVSCI libs: ${NVSCIBUF_LIB} ${NVSCISYNC_LIB}") + message(STATUS "Using NVSCI headers path: ${NVSCIBUF_INCLUDE_DIR} ${NVSCIBUF_INCLUDE_DIR}") + # Source file + # Add target for cudaNvSciBufMultiplanar + add_executable(cudaNvSciBufMultiplanar imageKernels.cu cudaNvSciBufMultiplanar.cpp main.cpp) + + target_compile_options(cudaNvSciBufMultiplanar PRIVATE $<$:--extended-lambda>) + + target_compile_features(cudaNvSciBufMultiplanar PRIVATE cxx_std_17 cuda_std_17) + + set_target_properties(cudaNvSciBufMultiplanar PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + target_include_directories(cudaNvSciBufMultiplanar PUBLIC + ${CUDAToolkit_INCLUDE_DIRS} + ${NVSCIBUF_INCLUDE_DIR} + ${NVSCISYNC_INCLUDE_DIR} + ) + + target_link_libraries(cudaNvSciBufMultiplanar + CUDA::cuda_driver + ${NVSCIBUF_LIB} + ${NVSCISYNC_LIB} + ) + # Copy yuv_planar_img1.yuv to the output directory + add_custom_command(TARGET cudaNvSciBufMultiplanar POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/yuv_planar_img1.yuv ${CMAKE_CURRENT_BINARY_DIR}/yuv_planar_img1.yuv + ) + # Specify additional clean files + set_target_properties(cudaNvSciBufMultiplanar PROPERTIES + ADDITIONAL_CLEAN_FILES "image_out.yuv" + ) + else() + message(STATUS "NvSCI not found - will not build sample 'cudaNvSciBufMultiplanar'") + endif() +else() + message(STATUS "Will not build sample cudaNvSciBufMultiplanar - requires Linux OS") +endif() diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md new file mode 100644 index 00000000..c704d6a8 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md @@ -0,0 +1,64 @@ +# cudaNvSciBufMultiplanar - CUDA NvSciBufMultiplanar Image Samples + +## Description + +This sample demonstrates CUDA-NvSciBuf Interop for Multiplanar images. A YUV 420 multiplanar image is flipped and allocated using NvSciBuf APIs and imported into CUDA with CUDA External Resource Interoperability. A CUDA surface is created from the corresponding mapped CUDA array and again bit flipping is performed on the surface. The result is copied back to a YUV image which is compared against the input. + +## Key Concepts + +CUDA NvSci Interop, Data Parallel Algorithms, Image Processing + +## Supported SM Architectures + +[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.1 ](https://developer.nvidia.com/cuda-gpus) [SM 12.0 ](https://developer.nvidia.com/cuda-gpus) + +## Supported OSes + +Linux + +## Supported CPU Architecture + +aarch64 + +## CUDA APIs involved + +### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) +cudaDeviceGetAttribute, cudaNvSciBufMultiplanar, cudaDestroyExternalMemory, cuDriverGetVersion, cuDeviceGetUuid, cudaSetDevice, cudaGetMipmappedArrayLevel, cudaFreeMipmappedArray, cudaImportExternalMemory, cudaCreateChannelDesc, cudaExternalMemoryGetMappedMipmappedArray, cuCtxSynchronize, cudaMemcpy2DToArray, cudaMemcpy2DFromArray + +## Dependencies needed to build/run +[NVSCI](../../../README.md#nvsci) + +## Prerequisites + +Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. +Make sure the dependencies mentioned in [Dependencies]() section above are installed. + +## Build and Run + +### Linux +The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make: +``` +$ cd +$ make +``` +The samples makefiles can take advantage of certain options: +* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are aarch64. + By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
+`$ make TARGET_ARCH=aarch64`
+ See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details. +* **dbg=1** - build with debug symbols + ``` + $ make dbg=1 + ``` +* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`. + ``` + $ make SMS="50 60" + ``` + +* **HOST_COMPILER=** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers. +``` + $ make HOST_COMPILER=g++ +``` + +## References (for more details) + diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp new file mode 100644 index 00000000..57df5784 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp @@ -0,0 +1,435 @@ +/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "cudaNvSciBufMultiplanar.h" + +NvSciBufModule module; +NvSciBufObj buffObj; +CUuuid uuid; + +void flipBits(uint8_t *pBuff, uint32_t size) { + for (uint32_t i = 0; i < size; i++) { + pBuff[i] = (~pBuff[i]); + } +} + +// Compare input and generated image files +void compareFiles(std::string &path1, std::string &path2) { + bool result = true; + FILE *fp1, *fp2; + int ch1, ch2; + + fp1 = fopen(path1.c_str(), "rb"); + fp2 = fopen(path2.c_str(), "rb"); + if (!fp1) { + result = false; + printf("File %s open failed in %s line %d\n", path1.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + if (!fp2) { + result = false; + printf("File %s open failed in %s line %d\n", path2.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + + do { + ch1 = getc(fp1); + ch2 = getc(fp2); + + if (ch1 != ch2) { + result = false; + break; + } + } while(ch1 != EOF && ch2 != EOF); + + if (result) { + printf("Input file : %s and output file : %s match SUCCESS\n", path1.c_str(), path2.c_str()); + } + else { + printf("Input file : %s and output file : %s match FAILURE\n", path1.c_str(), path2.c_str()); + } + + if (fp1) { + fclose(fp1); + } + if (fp2) { + fclose(fp2); + } +} + +void Caller::init() { + checkNvSciErrors(NvSciBufAttrListCreate(module, &attrList)); + attrListOut = NULL; +} + +void Caller::deinit() { + NvSciBufAttrListFree(attrList); + checkCudaErrors(cudaDestroyExternalMemory(extMem)); +} + +// Set NvSciBufImage attribute values in the attribute list +void Caller::setAttrListImageMultiPlanes(int imageWidth, int imageHeight) { + NvSciBufType bufType = NvSciBufType_Image; + NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType; + bool cpuAccessFlag = false; + NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite; + NvSciRmGpuId gpuid; + bool vpr = false; + int32_t planeCount = PLANAR_NUM_PLANES; + int drvVersion; + // Dimensions of the imported image in the YUV 420 planar format + int32_t planeWidths[] = {imageWidth, imageWidth/2, imageWidth/2}; + int32_t planeHeights[] = {imageHeight, imageHeight/2, imageHeight/2}; + NvSciBufAttrKeyValuePair keyPair; + NvSciBufAttrKeyValuePair pairArray[ATTR_SIZE]; + + NvSciBufAttrValColorFmt planeColorFmts[] = + { NvSciColor_Y8, NvSciColor_V8, NvSciColor_U8 }; + NvSciBufAttrValImageScanType planeScanType[] = + { NvSciBufScan_ProgressiveType }; + + memcpy(&gpuid.bytes, &uuid.bytes, sizeof(uuid.bytes)); + + NvSciBufAttrKeyValuePair imgBuffAttrsArr[] = { + { NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) }, + { NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag, + sizeof(cpuAccessFlag) }, + { NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm) }, + { NvSciBufGeneralAttrKey_GpuId, &gpuid, sizeof(gpuid) }, + { NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) }, + { NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) }, + { NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) }, + { NvSciBufImageAttrKey_PlaneColorFormat, planeColorFmts, + sizeof(planeColorFmts) }, + { NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths) }, + { NvSciBufImageAttrKey_PlaneHeight, planeHeights, + sizeof(planeHeights) }, + { NvSciBufImageAttrKey_PlaneScanType, planeScanType, + sizeof(planeScanType) }, + }; + + std::vector imgBuffAttrsVec(imgBuffAttrsArr, + imgBuffAttrsArr+(sizeof(imgBuffAttrsArr)/sizeof(imgBuffAttrsArr[0]))); + + memset(pairArray, 0, sizeof(NvSciBufAttrKeyValuePair) * imgBuffAttrsVec.size()); + std::copy(imgBuffAttrsVec.begin(), imgBuffAttrsVec.end(), pairArray); + checkNvSciErrors(NvSciBufAttrListSetAttrs(attrList, pairArray, imgBuffAttrsVec.size())); +} + +cudaNvSciBufMultiplanar::cudaNvSciBufMultiplanar(size_t width, size_t height, std::vector &deviceIds) + : imageWidth(width), + imageHeight(height) { + mCudaDeviceId = deviceIds[0]; + attrListReconciled = NULL; + attrListConflict = NULL; + checkNvSciErrors(NvSciBufModuleOpen(&module)); + initCuda(mCudaDeviceId); + } + +void cudaNvSciBufMultiplanar::initCuda(int devId) { + int major = 0, minor = 0, drvVersion; + NvSciRmGpuId gpuid; + + checkCudaErrors(cudaSetDevice(mCudaDeviceId)); + checkCudaErrors(cudaDeviceGetAttribute( + &major, cudaDevAttrComputeCapabilityMajor, mCudaDeviceId)); + checkCudaErrors(cudaDeviceGetAttribute( + &minor, cudaDevAttrComputeCapabilityMinor, mCudaDeviceId)); + printf( + "[cudaNvSciBufMultiplanar] GPU Device %d: \"%s\" with compute capability " + "%d.%d\n\n", + mCudaDeviceId, _ConvertSMVer2ArchName(major, minor), major, minor); + + checkCudaDrvErrors(cuDriverGetVersion(&drvVersion)); + + if (drvVersion <= 11030) { + checkCudaDrvErrors(cuDeviceGetUuid(&uuid, devId)); + } else { + checkCudaDrvErrors(cuDeviceGetUuid_v2(&uuid, devId)); + } +} + +/* +Caller1 flips a YUV image which is allocated to nvscibuf APIs and copied into CUDA Array. +It is mapped to CUDA surface and bit flip is done. Caller2 in the same thread copies +CUDA Array to a YUV image file. The original image is compared with the double bit +flipped image. +*/ +void cudaNvSciBufMultiplanar::runCudaNvSciBufPlanar(std::string &imageFilename, std::string &imageFilenameOut) { + cudaArray_t levelArray1[PLANAR_NUM_PLANES]; + cudaArray_t levelArray2[PLANAR_NUM_PLANES]; + Caller caller1; + Caller caller2; + + int numPlanes = PLANAR_NUM_PLANES; + caller1.init(); + caller2.init(); + + // Set NvSciBufImage attribute values in the attribute list + caller1.setAttrListImageMultiPlanes(imageWidth, imageHeight); + caller2.setAttrListImageMultiPlanes(imageWidth, imageHeight); + + // Reconcile attribute lists and allocate NvSciBuf object + reconcileAttrList(&caller1.attrList, &caller2.attrList); + caller1.copyExtMemToMultiPlanarArrays(); + for (int i = 0; i < numPlanes; i++) { + checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray1[i], caller1.multiPlanarArray[i], 0)); + } + caller1.copyYUVToCudaArrayAndFlipBits(imageFilename, levelArray1); + + caller2.copyExtMemToMultiPlanarArrays(); + for (int i = 0; i < numPlanes; i++) { + checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray2[i], caller2.multiPlanarArray[i], 0)); + } + // Maps cudaArray to surface memory and launches a kernel to flip bits + launchFlipSurfaceBitsKernel(levelArray2, caller2.multiPlanarWidth, caller2.multiPlanarHeight, numPlanes); + + // Synchronization can be done using nvSciSync when non CUDA callers and cross-process signaler-waiter + // applications are involved. Please refer to the cudaNvSci sample library for more details. + checkCudaDrvErrors(cuCtxSynchronize()); + printf("Bit flip of the surface memory done\n"); + + caller2.copyCudaArrayToYUV(imageFilenameOut, levelArray2); + compareFiles(imageFilename, imageFilenameOut); + + // Release memory + printf("Releasing memory\n"); + for (int i = 0; i < numPlanes; i++) { + checkCudaErrors(cudaFreeMipmappedArray(caller1.multiPlanarArray[i])); + checkCudaErrors(cudaFreeMipmappedArray(caller2.multiPlanarArray[i])); + } + tearDown(&caller1, &caller2); +} + +// Map NvSciBufObj to cudaMipmappedArray +void Caller::copyExtMemToMultiPlanarArrays() { + checkNvSciErrors(NvSciBufObjGetAttrList(buffObj, &attrListOut)); + memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * PLANE_ATTR_SIZE); + cudaExternalMemoryHandleDesc memHandleDesc; + cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0}; + cudaChannelFormatDesc desc = {0}; + cudaExtent extent = {0}; + + pairArrayOut[PLANE_SIZE].key = NvSciBufImageAttrKey_Size; // Datatype: @c uint64_t + pairArrayOut[PLANE_ALIGNED_SIZE].key = NvSciBufImageAttrKey_PlaneAlignedSize; // Datatype: @c uint64_t[] + pairArrayOut[PLANE_OFFSET].key = NvSciBufImageAttrKey_PlaneOffset; // Datatype: @c uint64_t[] + pairArrayOut[PLANE_HEIGHT].key = NvSciBufImageAttrKey_PlaneHeight; // Datatype: @c uint32_t[] + pairArrayOut[PLANE_WIDTH].key = NvSciBufImageAttrKey_PlaneWidth; // Datatype: @c int32_t[] + pairArrayOut[PLANE_CHANNEL_COUNT].key = NvSciBufImageAttrKey_PlaneChannelCount; // Datatype: @c uint8_t + pairArrayOut[PLANE_BITS_PER_PIXEL].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;// Datatype: @c uint32_t[] + pairArrayOut[PLANE_COUNT].key = NvSciBufImageAttrKey_PlaneCount; // Datatype: @c uint32_t + checkNvSciErrors(NvSciBufAttrListGetAttrs(attrListOut, pairArrayOut, (PLANE_ATTR_SIZE))); + + uint64_t size = *(uint64_t*)pairArrayOut[PLANE_SIZE].value; + uint64_t *planeAlignedSize = (uint64_t*)pairArrayOut[PLANE_ALIGNED_SIZE].value; + int32_t *planeWidth = (int32_t*)pairArrayOut[PLANE_WIDTH].value; + int32_t *planeHeight = (int32_t*)pairArrayOut[PLANE_HEIGHT].value; + uint64_t *planeOffset = (uint64_t*)pairArrayOut[PLANE_OFFSET].value; + uint8_t planeChannelCount = *(uint8_t*)pairArrayOut[PLANE_CHANNEL_COUNT].value; + uint32_t *planeBitsPerPixel = (uint32_t*)pairArrayOut[PLANE_BITS_PER_PIXEL].value; + uint32_t planeCount = *(uint32_t*)pairArrayOut[PLANE_COUNT].value; + + numPlanes = planeCount; + + for (int i = 0; i < numPlanes; i++) { + multiPlanarWidth[i] = planeWidth[i]; + multiPlanarHeight[i] = planeHeight[i]; + } + + memset(&memHandleDesc, 0, sizeof(memHandleDesc)); + memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf; + memHandleDesc.handle.nvSciBufObject = buffObj; + memHandleDesc.size = size; + checkCudaErrors(cudaImportExternalMemory(&extMem, &memHandleDesc)); + + desc = cudaCreateChannelDesc(planeBitsPerPixel[0], 0, 0, 0, cudaChannelFormatKindUnsigned); + memset(&mipmapDesc, 0, sizeof(mipmapDesc)); + mipmapDesc.numLevels = 1; + + for (int i = 0; i < numPlanes; i++) { + memset(&extent, 0, sizeof(extent)); + extent.width = planeWidth[i]; + extent.height = planeHeight[i]; + extent.depth = 0; + mipmapDesc.offset = planeOffset[i]; + mipmapDesc.formatDesc = desc; + mipmapDesc.extent = extent; + mipmapDesc.flags = cudaArraySurfaceLoadStore;; + checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&multiPlanarArray[i], extMem, &mipmapDesc)); + } +} + +void cudaNvSciBufMultiplanar::reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2) { + attrList[0] = *attrList1; + attrList[1] = *attrList2; + bool isReconciled = false; + + checkNvSciErrors(NvSciBufAttrListReconcile(attrList, 2, &attrListReconciled, &attrListConflict)); + checkNvSciErrors(NvSciBufAttrListIsReconciled(attrListReconciled, &isReconciled)); + checkNvSciErrors(NvSciBufObjAlloc(attrListReconciled, &buffObj)); + printf("NvSciBufAttrList reconciled\n"); +} + +// YUV 420 image is flipped and copied to cuda Array which is mapped to nvsciBuf +void Caller::copyYUVToCudaArrayAndFlipBits(std::string &path, cudaArray_t *cudaArr) { + FILE *fp = NULL; + uint8_t *pYBuff, *pUBuff, *pVBuff, *pChroma; + uint8_t *pBuff = NULL; + uint32_t uvOffset[numPlanes] = {0}, copyWidthInBytes[numPlanes] = {0}, copyHeight[numPlanes] = {0}; + uint32_t width = multiPlanarWidth[0]; + uint32_t height = multiPlanarHeight[0]; + + fp = fopen(path.c_str(), "rb"); + if (!fp) { + printf("CudaProducer: Error opening file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + pBuff = (uint8_t*)malloc((width * height * PLANAR_CHROMA_WIDTH_ORDER * PLANAR_CHROMA_HEIGHT_ORDER) * sizeof(unsigned char)); + if (!pBuff) { + printf("CudaProducer: Failed to allocate image buffer in %s line %d\n", __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + // Y V U order in the buffer. Fully planar formats use + // three planes to store the Y, Cb and Cr components separately. + pYBuff = pBuff; + pVBuff = pYBuff + width * height; + pUBuff = pVBuff + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER); + for (uint32_t i = 0; i < height; i++) { + if (fread(pYBuff, width, 1, fp) != 1) { + printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + flipBits(pYBuff, width); + pYBuff += width; + } + + pChroma = pVBuff; + for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) { + if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) { + printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + flipBits(pChroma, width); + pChroma += width / PLANAR_CHROMA_WIDTH_ORDER; + } + + pChroma = pUBuff; + for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) { + if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) { + printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + flipBits(pChroma, width); + pChroma += width / PLANAR_CHROMA_WIDTH_ORDER; + } + uvOffset[0] = 0; + copyHeight[0] = height; + copyHeight[1] = height / PLANAR_CHROMA_HEIGHT_ORDER; + copyHeight[2] = height / PLANAR_CHROMA_HEIGHT_ORDER; + copyWidthInBytes[0] = width; + // Width of the second and third planes is half of the first plane. + copyWidthInBytes[1] = width / PLANAR_CHROMA_WIDTH_ORDER; + copyWidthInBytes[2] = width / PLANAR_CHROMA_WIDTH_ORDER; + uvOffset[1] = width * height; + uvOffset[2] = uvOffset[1] + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER); + for (int i = 0; i < numPlanes; i++) { + checkCudaDrvErrors(cuCtxSynchronize()); + checkCudaErrors(cudaMemcpy2DToArray( + cudaArr[i], 0, 0, (void *)(pBuff + uvOffset[i]), copyWidthInBytes[i], + copyWidthInBytes[i], copyHeight[i], + cudaMemcpyHostToDevice)); + } + + if (fp) { + fclose(fp); + fp = NULL; + } + if (pBuff) { + free(pBuff); + pBuff = NULL; + } + printf("Image %s copied to CUDA Array and bit flip done\n", path.c_str()); +} + +// Copy Cuda Array in YUV 420 format to a file +void Caller::copyCudaArrayToYUV(std::string &path, cudaArray_t *cudaArr) { + FILE *fp = NULL; + int bufferSize; + uint32_t width = multiPlanarWidth[0]; + uint32_t height = multiPlanarHeight[0]; + uint32_t copyWidthInBytes=0, copyHeight=0; + uint8_t *pCudaCopyMem = NULL; + + fp = fopen(path.c_str(), "wb+"); + if (!fp) { + printf("WriteFrame: file open failed %s in %s line %d\n", path.c_str(), __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < numPlanes; i++) { + if (i == 0) { + bufferSize = width * height; + copyWidthInBytes = width; + copyHeight = height; + + pCudaCopyMem = (uint8_t *)malloc(bufferSize); + if (pCudaCopyMem == NULL) { + printf("pCudaCopyMem malloc failed in %s line %d\n", __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + } + else { + bufferSize = ((height / PLANAR_CHROMA_HEIGHT_ORDER) * (width / PLANAR_CHROMA_WIDTH_ORDER)); + copyWidthInBytes = width / PLANAR_CHROMA_WIDTH_ORDER; + copyHeight = height / PLANAR_CHROMA_HEIGHT_ORDER; + } + memset(pCudaCopyMem, 0, bufferSize); + + checkCudaErrors(cudaMemcpy2DFromArray( + (void *)pCudaCopyMem, copyWidthInBytes, cudaArr[i], 0, 0, + copyWidthInBytes, copyHeight, + cudaMemcpyDeviceToHost)); + + checkCudaDrvErrors(cuCtxSynchronize()); + + if (fwrite(pCudaCopyMem, bufferSize, 1, fp) != 1) { + printf("Cuda consumer: output file write failed in %s line %d\n", __FILE__, __LINE__); + exit(EXIT_FAILURE); + } + } + printf("Output file : %s saved\n", path.c_str()); + + if (fp) { + fclose(fp); + fp = NULL; + } +} + +void cudaNvSciBufMultiplanar::tearDown(Caller *caller1, Caller *caller2) { + caller1->deinit(); + caller2->deinit(); + NvSciBufObjFree(buffObj); +} diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h new file mode 100644 index 00000000..25a1d756 --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef CUDA_NVSCIBUF_MULTIPLANAR_H +#define CUDA_NVSCIBUF_MULTIPLANAR_H + +#include +#include +#include +#include +#include + +#define PLANAR_NUM_PLANES 3 +#define PLANAR_CHROMA_WIDTH_ORDER 2 +#define PLANAR_CHROMA_HEIGHT_ORDER 2 + +#define ATTR_SIZE 20 +#define DEFAULT_GPU 0 + +#define checkNvSciErrors(call) \ + do { \ + NvSciError _status = call; \ + if (NvSciError_Success != _status) { \ + printf( \ + "NVSCI call in file '%s' in line %i returned" \ + " %d, expected %d\n", \ + __FILE__, __LINE__, _status, NvSciError_Success); \ + fflush(stdout); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define checkCudaDrvErrors(call) \ + do { \ + CUresult err = call; \ + if (CUDA_SUCCESS != err) { \ + const char *errorStr = NULL; \ + cuGetErrorString(err, &errorStr); \ + printf( \ + "checkCudaDrvErrors() Driver API error" \ + " = %04d \"%s\" from file <%s>, " \ + "line %i.\n", \ + err, errorStr, __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +extern void launchFlipSurfaceBitsKernel(cudaArray_t *levelArray, int32_t *multiPlanarWidth, + int32_t *multiPlanarHeight, int numPlanes); + +class Caller { +private: + NvSciBufAttrList attrListOut; + NvSciBufAttrKeyValuePair pairArrayOut[ATTR_SIZE]; + cudaExternalMemory_t extMem; + int32_t numPlanes; +public: + NvSciBufAttrList attrList; + cudaMipmappedArray_t multiPlanarArray[PLANAR_NUM_PLANES]; + int32_t multiPlanarWidth[PLANAR_NUM_PLANES]; + int32_t multiPlanarHeight[PLANAR_NUM_PLANES]; + + void init(); + void deinit(); + void copyExtMemToMultiPlanarArrays(); + void copyYUVToCudaArrayAndFlipBits(std::string &image_filename, cudaArray_t *yuvPlanes); + void copyCudaArrayToYUV(std::string &image_filename, cudaArray_t *yuvPlanes); + void setAttrListImageMultiPlanes(int imageWidth, int imageHeight); +}; + + +class cudaNvSciBufMultiplanar { +private: + size_t imageWidth; + size_t imageHeight; + int mCudaDeviceId; + int deviceCnt; + NvSciBufAttrList attrList[2]; + NvSciBufAttrList attrListReconciled; + NvSciBufAttrList attrListConflict; +public: + cudaNvSciBufMultiplanar(size_t imageWidth, size_t imageHeight, std::vector &deviceIds); + void initCuda(int devId); + void reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2); + void runCudaNvSciBufPlanar(std::string &image_filename, std::string &image_filename_out); + void tearDown(Caller *caller1, Caller *caller2); +}; + +enum NvSciBufImageAttributes { + PLANE_SIZE, + PLANE_ALIGNED_SIZE, + PLANE_OFFSET, + PLANE_HEIGHT, + PLANE_WIDTH, + PLANE_CHANNEL_COUNT, + PLANE_BITS_PER_PIXEL, + PLANE_COUNT, + PLANE_ATTR_SIZE +}; + +#endif // CUDA_NVSCIBUF_MULTIPLANAR_H diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu new file mode 100644 index 00000000..eaaed39b --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu @@ -0,0 +1,64 @@ +/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +static __global__ void flipSurfaceBits(cudaSurfaceObject_t surfObj, int width, int height) { + char data; + unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; + unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + // Read from input surface + surf2Dread(&data, surfObj, x, y); + // Write to output surface + data = ~data; + surf2Dwrite(data, surfObj, x, y); + } +} + +// Copy cudaArray to surface memory and launch the CUDA kernel +void launchFlipSurfaceBitsKernel( + cudaArray_t *levelArray, + int32_t *multiPlanarWidth, + int32_t *multiPlanarHeight, + int numPlanes) { + + cudaSurfaceObject_t surfObject[numPlanes] = {0}; + cudaResourceDesc resDesc; + + for (int i = 0; i < numPlanes; i++) { + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = cudaResourceTypeArray; + resDesc.res.array.array = levelArray[i]; + checkCudaErrors(cudaCreateSurfaceObject(&surfObject[i], &resDesc)); + dim3 threadsperBlock(16, 16); + dim3 numBlocks((multiPlanarWidth[i] + threadsperBlock.x - 1) / threadsperBlock.x, + (multiPlanarHeight[i] + threadsperBlock.y - 1) / threadsperBlock.y); + flipSurfaceBits<<>>(surfObject[i], multiPlanarWidth[i], multiPlanarHeight[i]); + } +} + diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp new file mode 100644 index 00000000..d6ce1c2e --- /dev/null +++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp @@ -0,0 +1,72 @@ +/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include "cudaNvSciBufMultiplanar.h" +#include + +#define MAX_FILE_SIZE 100 + +int main(int argc, const char **argv) { + int numOfGPUs = 0; + std::vector deviceIds; + (cudaGetDeviceCount(&numOfGPUs)); + + printf("%d GPUs found\n", numOfGPUs); + if (!numOfGPUs) { + exit(EXIT_WAIVED); + } else { + for (int devID = 0; devID < numOfGPUs; devID++) { + int major = 0, minor = 0; + (cudaDeviceGetAttribute( + &major, cudaDevAttrComputeCapabilityMajor, devID)); + (cudaDeviceGetAttribute( + &minor, cudaDevAttrComputeCapabilityMinor, devID)); + if (major >= 6) { + deviceIds.push_back(devID); + } + } + if (deviceIds.size() == 0) { + printf( + "cudaNvSciBufMultiplanar requires one or more GPUs of Pascal(SM 6.0) or higher " + "archs\nWaiving..\n"); + exit(EXIT_WAIVED); + } + } + + std::string image_filename = sdkFindFilePath("yuv_planar_img1.yuv", argv[0]); + std::string image_filename_out = "image_out.yuv"; + uint32_t imageWidth = 720; + uint32_t imageHeight = 480; + + printf("input image %s , width = %d, height = %d\n", image_filename.c_str(), imageWidth, imageHeight); + + cudaNvSciBufMultiplanar cudaNvSciBufMultiplanarApp(imageWidth, imageHeight, deviceIds); + cudaNvSciBufMultiplanarApp.runCudaNvSciBufPlanar(image_filename, image_filename_out); + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv new file mode 100644 index 00000000..4e524775 Binary files /dev/null and b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv differ