mirror of
				https://github.com/NVIDIA/cuda-samples.git
				synced 2025-11-04 15:47:50 +08:00 
			
		
		
		
	Merge pull request #10 from XSShawnZeng/Tegra_Samples_Cmake_Transition
Add Tegra sample cudaNvSciBufMultiplanar
This commit is contained in:
		
						commit
						1a466282da
					
				@ -1,4 +1,5 @@
 | 
			
		||||
add_subdirectory(cudaNvSciNvMedia)
 | 
			
		||||
add_subdirectory(cudaNvSciBufMultiplanar)
 | 
			
		||||
add_subdirectory(cuDLAErrorReporting)
 | 
			
		||||
add_subdirectory(cuDLAHybridMode)
 | 
			
		||||
add_subdirectory(cuDLALayerwiseStatsHybrid)
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										18
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,18 @@
 | 
			
		||||
{
 | 
			
		||||
    "configurations": [
 | 
			
		||||
        {
 | 
			
		||||
            "name": "Linux",
 | 
			
		||||
            "includePath": [
 | 
			
		||||
                "${workspaceFolder}/**",
 | 
			
		||||
                "${workspaceFolder}/../../../Common"
 | 
			
		||||
            ],
 | 
			
		||||
            "defines": [],
 | 
			
		||||
            "compilerPath": "/usr/local/cuda/bin/nvcc",
 | 
			
		||||
            "cStandard": "gnu17",
 | 
			
		||||
            "cppStandard": "gnu++14",
 | 
			
		||||
            "intelliSenseMode": "linux-gcc-x64",
 | 
			
		||||
            "configurationProvider": "ms-vscode.makefile-tools"
 | 
			
		||||
        }
 | 
			
		||||
    ],
 | 
			
		||||
    "version": 4
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										7
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,7 @@
 | 
			
		||||
{
 | 
			
		||||
    "recommendations": [
 | 
			
		||||
        "nvidia.nsight-vscode-edition",
 | 
			
		||||
        "ms-vscode.cpptools",
 | 
			
		||||
        "ms-vscode.makefile-tools"
 | 
			
		||||
    ]
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										10
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,10 @@
 | 
			
		||||
{
 | 
			
		||||
    "configurations": [
 | 
			
		||||
        {
 | 
			
		||||
            "name": "CUDA C++: Launch",
 | 
			
		||||
            "type": "cuda-gdb",
 | 
			
		||||
            "request": "launch",
 | 
			
		||||
            "program": "${workspaceFolder}/cudaNvSciBufMultiplanar"
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										15
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@ -0,0 +1,15 @@
 | 
			
		||||
{
 | 
			
		||||
    "version": "2.0.0",
 | 
			
		||||
    "tasks": [
 | 
			
		||||
        {
 | 
			
		||||
            "label": "sample",
 | 
			
		||||
            "type": "shell",
 | 
			
		||||
            "command": "make dbg=1",
 | 
			
		||||
            "problemMatcher": ["$nvcc"],
 | 
			
		||||
            "group": {
 | 
			
		||||
                "kind": "build",
 | 
			
		||||
                "isDefault": true
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,74 @@
 | 
			
		||||
cmake_minimum_required(VERSION 3.20)
 | 
			
		||||
 | 
			
		||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Modules")
 | 
			
		||||
 | 
			
		||||
project(cudaNvSciBufMultiplanar LANGUAGES C CXX CUDA)
 | 
			
		||||
 | 
			
		||||
find_package(CUDAToolkit REQUIRED)
 | 
			
		||||
 | 
			
		||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 | 
			
		||||
 | 
			
		||||
set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90)
 | 
			
		||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
 | 
			
		||||
    # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G")  # enable cuda-gdb (expensive)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# Include directories and libraries
 | 
			
		||||
include_directories(../../../../Common)
 | 
			
		||||
 | 
			
		||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
 | 
			
		||||
    # Find the NVSCI libraries
 | 
			
		||||
    # use CMAKE_LIBRARY_PATH so that users can also specify the NVSCI lib path in cmake command
 | 
			
		||||
    set(CMAKE_LIBRARY_PATH "/usr/lib" ${CMAKE_LIBRARY_PATH})
 | 
			
		||||
    file(GLOB_RECURSE NVSCIBUF_LIB
 | 
			
		||||
         ${CMAKE_LIBRARY_PATH}/*/libnvscibuf.so
 | 
			
		||||
    )
 | 
			
		||||
    file(GLOB_RECURSE NVSCISYNC_LIB
 | 
			
		||||
         ${CMAKE_LIBRARY_PATH}/*/libnvscisync.so
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Find the NVSCI header files
 | 
			
		||||
    # use CMAKE_INCLUDE_PATH so that users can also specify the NVSCI include path in cmake command
 | 
			
		||||
    set(CMAKE_INCLUDE_PATH "/usr/include" ${CMAKE_LIBRARY_PATH})
 | 
			
		||||
    find_path(NVSCIBUF_INCLUDE_DIR nvscibuf.h PATHS ${CMAKE_INCLUDE_PATH})
 | 
			
		||||
    find_path(NVSCISYNC_INCLUDE_DIR nvscisync.h PATHS ${CMAKE_INCLUDE_PATH})
 | 
			
		||||
 | 
			
		||||
    if(NVSCIBUF_LIB AND NVSCISYNC_LIB AND NVSCIBUF_INCLUDE_DIR AND NVSCISYNC_INCLUDE_DIR)
 | 
			
		||||
        message(STATUS "FOUND NVSCI libs: ${NVSCIBUF_LIB} ${NVSCISYNC_LIB}")
 | 
			
		||||
        message(STATUS "Using NVSCI headers path: ${NVSCIBUF_INCLUDE_DIR} ${NVSCIBUF_INCLUDE_DIR}")
 | 
			
		||||
        # Source file
 | 
			
		||||
        # Add target for cudaNvSciBufMultiplanar
 | 
			
		||||
        add_executable(cudaNvSciBufMultiplanar imageKernels.cu cudaNvSciBufMultiplanar.cpp main.cpp)
 | 
			
		||||
 | 
			
		||||
        target_compile_options(cudaNvSciBufMultiplanar PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
 | 
			
		||||
 | 
			
		||||
        target_compile_features(cudaNvSciBufMultiplanar PRIVATE cxx_std_17 cuda_std_17)
 | 
			
		||||
 | 
			
		||||
        set_target_properties(cudaNvSciBufMultiplanar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
 | 
			
		||||
 | 
			
		||||
        target_include_directories(cudaNvSciBufMultiplanar PUBLIC
 | 
			
		||||
            ${CUDAToolkit_INCLUDE_DIRS}
 | 
			
		||||
            ${NVSCIBUF_INCLUDE_DIR}
 | 
			
		||||
            ${NVSCISYNC_INCLUDE_DIR}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        target_link_libraries(cudaNvSciBufMultiplanar
 | 
			
		||||
            CUDA::cuda_driver
 | 
			
		||||
            ${NVSCIBUF_LIB}
 | 
			
		||||
            ${NVSCISYNC_LIB}
 | 
			
		||||
        )
 | 
			
		||||
        # Copy yuv_planar_img1.yuv to the output directory
 | 
			
		||||
        add_custom_command(TARGET cudaNvSciBufMultiplanar POST_BUILD
 | 
			
		||||
            COMMAND ${CMAKE_COMMAND} -E copy_if_different
 | 
			
		||||
            ${CMAKE_CURRENT_SOURCE_DIR}/yuv_planar_img1.yuv ${CMAKE_CURRENT_BINARY_DIR}/yuv_planar_img1.yuv
 | 
			
		||||
        )
 | 
			
		||||
        # Specify additional clean files
 | 
			
		||||
        set_target_properties(cudaNvSciBufMultiplanar PROPERTIES
 | 
			
		||||
            ADDITIONAL_CLEAN_FILES "image_out.yuv"
 | 
			
		||||
        )
 | 
			
		||||
    else()
 | 
			
		||||
        message(STATUS "NvSCI not found - will not build sample 'cudaNvSciBufMultiplanar'")
 | 
			
		||||
    endif()
 | 
			
		||||
else()
 | 
			
		||||
    message(STATUS "Will not build sample cudaNvSciBufMultiplanar - requires Linux OS")
 | 
			
		||||
endif()
 | 
			
		||||
@ -0,0 +1,64 @@
 | 
			
		||||
# cudaNvSciBufMultiplanar - CUDA NvSciBufMultiplanar Image Samples
 | 
			
		||||
 | 
			
		||||
## Description
 | 
			
		||||
 | 
			
		||||
This sample demonstrates CUDA-NvSciBuf Interop for Multiplanar images. A YUV 420 multiplanar image is flipped and allocated using NvSciBuf APIs and imported into CUDA with CUDA External Resource Interoperability. A CUDA surface is created from the corresponding mapped CUDA array and again bit flipping is performed on the surface. The result is copied back to a YUV image which is compared against the input.
 | 
			
		||||
 | 
			
		||||
## Key Concepts
 | 
			
		||||
 | 
			
		||||
CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
 | 
			
		||||
 | 
			
		||||
## Supported SM Architectures
 | 
			
		||||
 | 
			
		||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 10.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 10.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 12.0 ](https://developer.nvidia.com/cuda-gpus)
 | 
			
		||||
 | 
			
		||||
## Supported OSes
 | 
			
		||||
 | 
			
		||||
Linux
 | 
			
		||||
 | 
			
		||||
## Supported CPU Architecture
 | 
			
		||||
 | 
			
		||||
aarch64
 | 
			
		||||
 | 
			
		||||
## CUDA APIs involved
 | 
			
		||||
 | 
			
		||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
 | 
			
		||||
cudaDeviceGetAttribute, cudaNvSciBufMultiplanar, cudaDestroyExternalMemory, cuDriverGetVersion, cuDeviceGetUuid, cudaSetDevice, cudaGetMipmappedArrayLevel, cudaFreeMipmappedArray, cudaImportExternalMemory, cudaCreateChannelDesc, cudaExternalMemoryGetMappedMipmappedArray, cuCtxSynchronize, cudaMemcpy2DToArray, cudaMemcpy2DFromArray
 | 
			
		||||
 | 
			
		||||
## Dependencies needed to build/run
 | 
			
		||||
[NVSCI](../../../README.md#nvsci)
 | 
			
		||||
 | 
			
		||||
## Prerequisites
 | 
			
		||||
 | 
			
		||||
Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
 | 
			
		||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
 | 
			
		||||
 | 
			
		||||
## Build and Run
 | 
			
		||||
 | 
			
		||||
### Linux
 | 
			
		||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
 | 
			
		||||
```
 | 
			
		||||
$ cd <sample_dir>
 | 
			
		||||
$ make
 | 
			
		||||
```
 | 
			
		||||
The samples makefiles can take advantage of certain options:
 | 
			
		||||
*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are aarch64.
 | 
			
		||||
    By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
 | 
			
		||||
`$ make TARGET_ARCH=aarch64` <br/>
 | 
			
		||||
    See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
 | 
			
		||||
*   **dbg=1** - build with debug symbols
 | 
			
		||||
    ```
 | 
			
		||||
    $ make dbg=1
 | 
			
		||||
    ```
 | 
			
		||||
*   **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
 | 
			
		||||
    ```
 | 
			
		||||
    $ make SMS="50 60"
 | 
			
		||||
    ```
 | 
			
		||||
 | 
			
		||||
*  **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
 | 
			
		||||
```
 | 
			
		||||
    $ make HOST_COMPILER=g++
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## References (for more details)
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,435 @@
 | 
			
		||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
#include "cudaNvSciBufMultiplanar.h"
 | 
			
		||||
 | 
			
		||||
NvSciBufModule module;
 | 
			
		||||
NvSciBufObj buffObj;
 | 
			
		||||
CUuuid uuid;
 | 
			
		||||
 | 
			
		||||
void flipBits(uint8_t *pBuff, uint32_t size) {
 | 
			
		||||
    for (uint32_t i = 0; i < size; i++) {
 | 
			
		||||
        pBuff[i] = (~pBuff[i]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Compare input and generated image files
 | 
			
		||||
void compareFiles(std::string &path1, std::string &path2) {  
 | 
			
		||||
    bool result = true;
 | 
			
		||||
    FILE *fp1, *fp2;
 | 
			
		||||
    int ch1, ch2;
 | 
			
		||||
    
 | 
			
		||||
    fp1 = fopen(path1.c_str(), "rb");
 | 
			
		||||
    fp2 = fopen(path2.c_str(), "rb");
 | 
			
		||||
    if (!fp1) {
 | 
			
		||||
        result = false;
 | 
			
		||||
        printf("File %s open failed in %s line %d\n", path1.c_str(), __FILE__, __LINE__);
 | 
			
		||||
        exit(EXIT_FAILURE); 
 | 
			
		||||
    }
 | 
			
		||||
    if (!fp2) {
 | 
			
		||||
        result = false;
 | 
			
		||||
        printf("File %s open failed in %s line %d\n", path2.c_str(), __FILE__, __LINE__);
 | 
			
		||||
        exit(EXIT_FAILURE);
 | 
			
		||||
    }
 | 
			
		||||
   
 | 
			
		||||
    do  {       
 | 
			
		||||
        ch1 = getc(fp1); 
 | 
			
		||||
        ch2 = getc(fp2);
 | 
			
		||||
 | 
			
		||||
        if (ch1 != ch2) { 
 | 
			
		||||
            result = false; 
 | 
			
		||||
            break;
 | 
			
		||||
        } 
 | 
			
		||||
    } while(ch1 != EOF && ch2 != EOF);
 | 
			
		||||
 | 
			
		||||
    if (result) {
 | 
			
		||||
        printf("Input file : %s and output file : %s match SUCCESS\n", path1.c_str(), path2.c_str());
 | 
			
		||||
    } 
 | 
			
		||||
    else {
 | 
			
		||||
        printf("Input file : %s and output file : %s match FAILURE\n", path1.c_str(), path2.c_str());
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
    if (fp1) {
 | 
			
		||||
        fclose(fp1);
 | 
			
		||||
    }
 | 
			
		||||
    if (fp2) {
 | 
			
		||||
        fclose(fp2);
 | 
			
		||||
    }
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
void Caller::init() {
 | 
			
		||||
    checkNvSciErrors(NvSciBufAttrListCreate(module, &attrList));
 | 
			
		||||
    attrListOut = NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Caller::deinit() {
 | 
			
		||||
    NvSciBufAttrListFree(attrList);
 | 
			
		||||
    checkCudaErrors(cudaDestroyExternalMemory(extMem));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Set NvSciBufImage attribute values in the attribute list  
 | 
			
		||||
void Caller::setAttrListImageMultiPlanes(int imageWidth, int imageHeight) {
 | 
			
		||||
    NvSciBufType bufType = NvSciBufType_Image;
 | 
			
		||||
    NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType;
 | 
			
		||||
    bool cpuAccessFlag = false;
 | 
			
		||||
    NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
 | 
			
		||||
    NvSciRmGpuId gpuid;
 | 
			
		||||
    bool vpr = false;
 | 
			
		||||
    int32_t planeCount = PLANAR_NUM_PLANES;
 | 
			
		||||
    int drvVersion;
 | 
			
		||||
    // Dimensions of the imported image in the YUV 420 planar format
 | 
			
		||||
    int32_t planeWidths[] = {imageWidth, imageWidth/2, imageWidth/2};
 | 
			
		||||
    int32_t planeHeights[] = {imageHeight, imageHeight/2, imageHeight/2};
 | 
			
		||||
    NvSciBufAttrKeyValuePair keyPair;
 | 
			
		||||
    NvSciBufAttrKeyValuePair pairArray[ATTR_SIZE];
 | 
			
		||||
 
 | 
			
		||||
    NvSciBufAttrValColorFmt planeColorFmts[] =
 | 
			
		||||
            { NvSciColor_Y8, NvSciColor_V8, NvSciColor_U8 };
 | 
			
		||||
    NvSciBufAttrValImageScanType planeScanType[] =
 | 
			
		||||
            { NvSciBufScan_ProgressiveType };
 | 
			
		||||
 
 | 
			
		||||
    memcpy(&gpuid.bytes, &uuid.bytes, sizeof(uuid.bytes));
 | 
			
		||||
 
 | 
			
		||||
    NvSciBufAttrKeyValuePair imgBuffAttrsArr[] = {
 | 
			
		||||
        { NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
 | 
			
		||||
        { NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag,
 | 
			
		||||
            sizeof(cpuAccessFlag) },
 | 
			
		||||
        { NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm) },
 | 
			
		||||
        { NvSciBufGeneralAttrKey_GpuId, &gpuid, sizeof(gpuid) },
 | 
			
		||||
        { NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) },
 | 
			
		||||
        { NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) },
 | 
			
		||||
        { NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) },
 | 
			
		||||
        { NvSciBufImageAttrKey_PlaneColorFormat, planeColorFmts,
 | 
			
		||||
            sizeof(planeColorFmts) },
 | 
			
		||||
        { NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths) },
 | 
			
		||||
        { NvSciBufImageAttrKey_PlaneHeight, planeHeights,
 | 
			
		||||
            sizeof(planeHeights) },
 | 
			
		||||
        { NvSciBufImageAttrKey_PlaneScanType, planeScanType,
 | 
			
		||||
            sizeof(planeScanType) },
 | 
			
		||||
    };
 | 
			
		||||
 
 | 
			
		||||
    std::vector<NvSciBufAttrKeyValuePair> imgBuffAttrsVec(imgBuffAttrsArr,
 | 
			
		||||
                            imgBuffAttrsArr+(sizeof(imgBuffAttrsArr)/sizeof(imgBuffAttrsArr[0])));
 | 
			
		||||
  
 | 
			
		||||
    memset(pairArray, 0, sizeof(NvSciBufAttrKeyValuePair) * imgBuffAttrsVec.size());
 | 
			
		||||
    std::copy(imgBuffAttrsVec.begin(), imgBuffAttrsVec.end(), pairArray);
 | 
			
		||||
    checkNvSciErrors(NvSciBufAttrListSetAttrs(attrList, pairArray, imgBuffAttrsVec.size()));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
cudaNvSciBufMultiplanar::cudaNvSciBufMultiplanar(size_t width, size_t height, std::vector<int> &deviceIds)
 | 
			
		||||
    : imageWidth(width),
 | 
			
		||||
      imageHeight(height) {
 | 
			
		||||
        mCudaDeviceId =  deviceIds[0];
 | 
			
		||||
        attrListReconciled = NULL;
 | 
			
		||||
        attrListConflict = NULL;
 | 
			
		||||
        checkNvSciErrors(NvSciBufModuleOpen(&module));
 | 
			
		||||
        initCuda(mCudaDeviceId);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
void cudaNvSciBufMultiplanar::initCuda(int devId) {
 | 
			
		||||
    int major = 0, minor = 0, drvVersion;
 | 
			
		||||
    NvSciRmGpuId gpuid;
 | 
			
		||||
 | 
			
		||||
    checkCudaErrors(cudaSetDevice(mCudaDeviceId));
 | 
			
		||||
    checkCudaErrors(cudaDeviceGetAttribute(
 | 
			
		||||
        &major, cudaDevAttrComputeCapabilityMajor, mCudaDeviceId));
 | 
			
		||||
    checkCudaErrors(cudaDeviceGetAttribute(
 | 
			
		||||
        &minor, cudaDevAttrComputeCapabilityMinor, mCudaDeviceId));
 | 
			
		||||
    printf(
 | 
			
		||||
        "[cudaNvSciBufMultiplanar] GPU Device %d: \"%s\" with compute capability "
 | 
			
		||||
        "%d.%d\n\n",
 | 
			
		||||
        mCudaDeviceId, _ConvertSMVer2ArchName(major, minor), major, minor);
 | 
			
		||||
 | 
			
		||||
    checkCudaDrvErrors(cuDriverGetVersion(&drvVersion));
 | 
			
		||||
 | 
			
		||||
    if (drvVersion <= 11030) {
 | 
			
		||||
        checkCudaDrvErrors(cuDeviceGetUuid(&uuid, devId));
 | 
			
		||||
    } else {
 | 
			
		||||
        checkCudaDrvErrors(cuDeviceGetUuid_v2(&uuid, devId));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* 
 | 
			
		||||
Caller1 flips a YUV image which is allocated to nvscibuf APIs and copied into CUDA Array.
 | 
			
		||||
It is mapped to CUDA surface and bit flip is done. Caller2 in the same thread copies 
 | 
			
		||||
CUDA Array to a YUV image file. The original image is compared with the double bit
 | 
			
		||||
flipped image.
 | 
			
		||||
*/
 | 
			
		||||
void cudaNvSciBufMultiplanar::runCudaNvSciBufPlanar(std::string &imageFilename, std::string &imageFilenameOut) {
 | 
			
		||||
    cudaArray_t levelArray1[PLANAR_NUM_PLANES];
 | 
			
		||||
    cudaArray_t levelArray2[PLANAR_NUM_PLANES];
 | 
			
		||||
    Caller caller1;
 | 
			
		||||
    Caller caller2;
 | 
			
		||||
 | 
			
		||||
    int numPlanes = PLANAR_NUM_PLANES;
 | 
			
		||||
    caller1.init();
 | 
			
		||||
    caller2.init();
 | 
			
		||||
 | 
			
		||||
    // Set NvSciBufImage attribute values in the attribute list
 | 
			
		||||
    caller1.setAttrListImageMultiPlanes(imageWidth, imageHeight);
 | 
			
		||||
    caller2.setAttrListImageMultiPlanes(imageWidth, imageHeight);
 | 
			
		||||
    
 | 
			
		||||
    // Reconcile attribute lists and allocate NvSciBuf object
 | 
			
		||||
    reconcileAttrList(&caller1.attrList, &caller2.attrList); 
 | 
			
		||||
    caller1.copyExtMemToMultiPlanarArrays();
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray1[i], caller1.multiPlanarArray[i], 0)); 
 | 
			
		||||
    }
 | 
			
		||||
    caller1.copyYUVToCudaArrayAndFlipBits(imageFilename, levelArray1);
 | 
			
		||||
    
 | 
			
		||||
    caller2.copyExtMemToMultiPlanarArrays();
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray2[i], caller2.multiPlanarArray[i], 0)); 
 | 
			
		||||
    }
 | 
			
		||||
    // Maps cudaArray to surface memory and launches a kernel to flip bits
 | 
			
		||||
    launchFlipSurfaceBitsKernel(levelArray2, caller2.multiPlanarWidth, caller2.multiPlanarHeight, numPlanes);
 | 
			
		||||
    
 | 
			
		||||
    // Synchronization can be done using nvSciSync when non CUDA callers and cross-process signaler-waiter 
 | 
			
		||||
    // applications are involved. Please refer to the cudaNvSci sample library for more details.
 | 
			
		||||
    checkCudaDrvErrors(cuCtxSynchronize());
 | 
			
		||||
    printf("Bit flip of the surface memory done\n");
 | 
			
		||||
 | 
			
		||||
    caller2.copyCudaArrayToYUV(imageFilenameOut, levelArray2);
 | 
			
		||||
    compareFiles(imageFilename, imageFilenameOut);
 | 
			
		||||
 | 
			
		||||
    // Release memory
 | 
			
		||||
    printf("Releasing memory\n");
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        checkCudaErrors(cudaFreeMipmappedArray(caller1.multiPlanarArray[i]));
 | 
			
		||||
        checkCudaErrors(cudaFreeMipmappedArray(caller2.multiPlanarArray[i]));
 | 
			
		||||
    }
 | 
			
		||||
    tearDown(&caller1, &caller2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Map NvSciBufObj to cudaMipmappedArray
 | 
			
		||||
void Caller::copyExtMemToMultiPlanarArrays() {
 | 
			
		||||
    checkNvSciErrors(NvSciBufObjGetAttrList(buffObj, &attrListOut));
 | 
			
		||||
    memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * PLANE_ATTR_SIZE);
 | 
			
		||||
    cudaExternalMemoryHandleDesc memHandleDesc;
 | 
			
		||||
    cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
 | 
			
		||||
    cudaChannelFormatDesc desc = {0};
 | 
			
		||||
    cudaExtent extent = {0};
 | 
			
		||||
 
 | 
			
		||||
    pairArrayOut[PLANE_SIZE].key   = NvSciBufImageAttrKey_Size;                       // Datatype: @c uint64_t
 | 
			
		||||
    pairArrayOut[PLANE_ALIGNED_SIZE].key   = NvSciBufImageAttrKey_PlaneAlignedSize;   // Datatype: @c uint64_t[]
 | 
			
		||||
    pairArrayOut[PLANE_OFFSET].key   = NvSciBufImageAttrKey_PlaneOffset;              // Datatype: @c uint64_t[]
 | 
			
		||||
    pairArrayOut[PLANE_HEIGHT].key   = NvSciBufImageAttrKey_PlaneHeight;              // Datatype: @c uint32_t[]
 | 
			
		||||
    pairArrayOut[PLANE_WIDTH].key   = NvSciBufImageAttrKey_PlaneWidth;                // Datatype: @c int32_t[]
 | 
			
		||||
    pairArrayOut[PLANE_CHANNEL_COUNT].key   = NvSciBufImageAttrKey_PlaneChannelCount; // Datatype: @c uint8_t
 | 
			
		||||
    pairArrayOut[PLANE_BITS_PER_PIXEL].key   = NvSciBufImageAttrKey_PlaneBitsPerPixel;// Datatype: @c uint32_t[]
 | 
			
		||||
    pairArrayOut[PLANE_COUNT].key   = NvSciBufImageAttrKey_PlaneCount;                // Datatype: @c uint32_t
 | 
			
		||||
    checkNvSciErrors(NvSciBufAttrListGetAttrs(attrListOut, pairArrayOut, (PLANE_ATTR_SIZE)));  
 | 
			
		||||
 
 | 
			
		||||
    uint64_t size = *(uint64_t*)pairArrayOut[PLANE_SIZE].value;
 | 
			
		||||
    uint64_t *planeAlignedSize = (uint64_t*)pairArrayOut[PLANE_ALIGNED_SIZE].value;
 | 
			
		||||
    int32_t *planeWidth = (int32_t*)pairArrayOut[PLANE_WIDTH].value;
 | 
			
		||||
    int32_t *planeHeight = (int32_t*)pairArrayOut[PLANE_HEIGHT].value;
 | 
			
		||||
    uint64_t *planeOffset = (uint64_t*)pairArrayOut[PLANE_OFFSET].value;
 | 
			
		||||
    uint8_t planeChannelCount = *(uint8_t*)pairArrayOut[PLANE_CHANNEL_COUNT].value;
 | 
			
		||||
    uint32_t *planeBitsPerPixel = (uint32_t*)pairArrayOut[PLANE_BITS_PER_PIXEL].value;
 | 
			
		||||
    uint32_t planeCount = *(uint32_t*)pairArrayOut[PLANE_COUNT].value;
 | 
			
		||||
 
 | 
			
		||||
    numPlanes = planeCount;
 | 
			
		||||
 
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        multiPlanarWidth[i] = planeWidth[i];
 | 
			
		||||
        multiPlanarHeight[i] = planeHeight[i];
 | 
			
		||||
    }
 | 
			
		||||
   
 | 
			
		||||
    memset(&memHandleDesc, 0, sizeof(memHandleDesc));
 | 
			
		||||
    memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
 | 
			
		||||
    memHandleDesc.handle.nvSciBufObject = buffObj;
 | 
			
		||||
    memHandleDesc.size = size;
 | 
			
		||||
    checkCudaErrors(cudaImportExternalMemory(&extMem, &memHandleDesc));
 | 
			
		||||
    
 | 
			
		||||
    desc = cudaCreateChannelDesc(planeBitsPerPixel[0], 0, 0, 0, cudaChannelFormatKindUnsigned);
 | 
			
		||||
    memset(&mipmapDesc, 0, sizeof(mipmapDesc));
 | 
			
		||||
    mipmapDesc.numLevels = 1; 
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {    
 | 
			
		||||
        memset(&extent, 0, sizeof(extent));
 | 
			
		||||
        extent.width = planeWidth[i];
 | 
			
		||||
        extent.height = planeHeight[i];
 | 
			
		||||
        extent.depth = 0;
 | 
			
		||||
        mipmapDesc.offset = planeOffset[i]; 
 | 
			
		||||
        mipmapDesc.formatDesc = desc;
 | 
			
		||||
        mipmapDesc.extent = extent;
 | 
			
		||||
        mipmapDesc.flags = cudaArraySurfaceLoadStore;;
 | 
			
		||||
        checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&multiPlanarArray[i], extMem, &mipmapDesc));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void cudaNvSciBufMultiplanar::reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2) {
 | 
			
		||||
    attrList[0] = *attrList1;
 | 
			
		||||
    attrList[1] = *attrList2;
 | 
			
		||||
    bool isReconciled = false;
 | 
			
		||||
 | 
			
		||||
    checkNvSciErrors(NvSciBufAttrListReconcile(attrList, 2, &attrListReconciled, &attrListConflict));
 | 
			
		||||
    checkNvSciErrors(NvSciBufAttrListIsReconciled(attrListReconciled, &isReconciled));
 | 
			
		||||
    checkNvSciErrors(NvSciBufObjAlloc(attrListReconciled, &buffObj));
 | 
			
		||||
    printf("NvSciBufAttrList reconciled\n");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// YUV 420 image is flipped and copied to cuda Array which is mapped to nvsciBuf
 | 
			
		||||
void Caller::copyYUVToCudaArrayAndFlipBits(std::string &path, cudaArray_t *cudaArr) {
 | 
			
		||||
    FILE *fp = NULL;
 | 
			
		||||
    uint8_t *pYBuff, *pUBuff, *pVBuff, *pChroma;
 | 
			
		||||
    uint8_t *pBuff = NULL; 
 | 
			
		||||
    uint32_t uvOffset[numPlanes] = {0}, copyWidthInBytes[numPlanes] = {0}, copyHeight[numPlanes] = {0};
 | 
			
		||||
    uint32_t width = multiPlanarWidth[0];
 | 
			
		||||
    uint32_t height = multiPlanarHeight[0];
 | 
			
		||||
 | 
			
		||||
    fp = fopen(path.c_str(), "rb");
 | 
			
		||||
    if (!fp) {
 | 
			
		||||
        printf("CudaProducer: Error opening file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
 | 
			
		||||
        exit(EXIT_FAILURE); 
 | 
			
		||||
    }
 | 
			
		||||
    pBuff = (uint8_t*)malloc((width * height * PLANAR_CHROMA_WIDTH_ORDER * PLANAR_CHROMA_HEIGHT_ORDER) * sizeof(unsigned char));
 | 
			
		||||
    if (!pBuff) {
 | 
			
		||||
        printf("CudaProducer: Failed to allocate image buffer in %s line %d\n", __FILE__, __LINE__);
 | 
			
		||||
        exit(EXIT_FAILURE); 
 | 
			
		||||
    }
 | 
			
		||||
    // Y V U order in the buffer. Fully planar formats use 
 | 
			
		||||
    // three planes to store the Y, Cb and Cr components separately.
 | 
			
		||||
    pYBuff = pBuff; 
 | 
			
		||||
    pVBuff = pYBuff + width * height;
 | 
			
		||||
    pUBuff = pVBuff + (width / PLANAR_CHROMA_WIDTH_ORDER)  * (height / PLANAR_CHROMA_HEIGHT_ORDER);
 | 
			
		||||
    for (uint32_t i = 0; i < height; i++) {
 | 
			
		||||
        if (fread(pYBuff, width, 1, fp) != 1) {
 | 
			
		||||
            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
 | 
			
		||||
            exit(EXIT_FAILURE); 
 | 
			
		||||
        }
 | 
			
		||||
        flipBits(pYBuff, width);
 | 
			
		||||
        pYBuff += width;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
    pChroma = pVBuff;
 | 
			
		||||
    for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
 | 
			
		||||
        if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
 | 
			
		||||
            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
 | 
			
		||||
            exit(EXIT_FAILURE);
 | 
			
		||||
        }
 | 
			
		||||
        flipBits(pChroma, width);
 | 
			
		||||
        pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
    pChroma = pUBuff;
 | 
			
		||||
    for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
 | 
			
		||||
        if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
 | 
			
		||||
            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
 | 
			
		||||
            exit(EXIT_FAILURE);
 | 
			
		||||
        }
 | 
			
		||||
        flipBits(pChroma, width);
 | 
			
		||||
        pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
 | 
			
		||||
    }
 | 
			
		||||
    uvOffset[0] = 0;
 | 
			
		||||
    copyHeight[0] = height;
 | 
			
		||||
    copyHeight[1] = height / PLANAR_CHROMA_HEIGHT_ORDER;
 | 
			
		||||
    copyHeight[2] = height / PLANAR_CHROMA_HEIGHT_ORDER;
 | 
			
		||||
    copyWidthInBytes[0] = width;
 | 
			
		||||
    // Width of the second and third planes is half of the first plane.  
 | 
			
		||||
    copyWidthInBytes[1] = width / PLANAR_CHROMA_WIDTH_ORDER;        
 | 
			
		||||
    copyWidthInBytes[2] = width / PLANAR_CHROMA_WIDTH_ORDER;        
 | 
			
		||||
    uvOffset[1] = width * height;
 | 
			
		||||
    uvOffset[2] = uvOffset[1] + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        checkCudaDrvErrors(cuCtxSynchronize());
 | 
			
		||||
        checkCudaErrors(cudaMemcpy2DToArray(
 | 
			
		||||
        cudaArr[i], 0, 0, (void *)(pBuff + uvOffset[i]), copyWidthInBytes[i],
 | 
			
		||||
        copyWidthInBytes[i], copyHeight[i],
 | 
			
		||||
        cudaMemcpyHostToDevice));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (fp) {
 | 
			
		||||
        fclose(fp);
 | 
			
		||||
        fp = NULL;
 | 
			
		||||
    }
 | 
			
		||||
    if (pBuff) {
 | 
			
		||||
        free(pBuff);
 | 
			
		||||
        pBuff = NULL;
 | 
			
		||||
    }
 | 
			
		||||
    printf("Image %s copied to CUDA Array and bit flip done\n", path.c_str());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Copy Cuda Array in YUV 420 format to a file 
 | 
			
		||||
void Caller::copyCudaArrayToYUV(std::string &path, cudaArray_t *cudaArr) {
 | 
			
		||||
    FILE *fp = NULL;
 | 
			
		||||
    int bufferSize;
 | 
			
		||||
    uint32_t width = multiPlanarWidth[0];
 | 
			
		||||
    uint32_t height = multiPlanarHeight[0];
 | 
			
		||||
    uint32_t copyWidthInBytes=0, copyHeight=0;
 | 
			
		||||
    uint8_t *pCudaCopyMem = NULL;
 | 
			
		||||
 
 | 
			
		||||
    fp = fopen(path.c_str(), "wb+");
 | 
			
		||||
    if (!fp) {
 | 
			
		||||
        printf("WriteFrame: file open failed %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
 | 
			
		||||
        exit(EXIT_FAILURE);
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) {
 | 
			
		||||
        if (i == 0) {
 | 
			
		||||
            bufferSize = width * height;
 | 
			
		||||
            copyWidthInBytes = width;
 | 
			
		||||
            copyHeight = height;
 | 
			
		||||
 
 | 
			
		||||
            pCudaCopyMem = (uint8_t *)malloc(bufferSize);
 | 
			
		||||
            if (pCudaCopyMem == NULL) {
 | 
			
		||||
                printf("pCudaCopyMem malloc failed in %s line %d\n", __FILE__, __LINE__);
 | 
			
		||||
                exit(EXIT_FAILURE);
 | 
			
		||||
            }
 | 
			
		||||
        } 
 | 
			
		||||
        else {
 | 
			
		||||
            bufferSize = ((height / PLANAR_CHROMA_HEIGHT_ORDER) * (width / PLANAR_CHROMA_WIDTH_ORDER));
 | 
			
		||||
            copyWidthInBytes = width / PLANAR_CHROMA_WIDTH_ORDER;
 | 
			
		||||
            copyHeight = height / PLANAR_CHROMA_HEIGHT_ORDER;
 | 
			
		||||
        }
 | 
			
		||||
        memset(pCudaCopyMem, 0, bufferSize);
 | 
			
		||||
 
 | 
			
		||||
        checkCudaErrors(cudaMemcpy2DFromArray(
 | 
			
		||||
        (void *)pCudaCopyMem, copyWidthInBytes, cudaArr[i], 0, 0, 
 | 
			
		||||
        copyWidthInBytes, copyHeight,
 | 
			
		||||
        cudaMemcpyDeviceToHost));
 | 
			
		||||
 | 
			
		||||
        checkCudaDrvErrors(cuCtxSynchronize());
 | 
			
		||||
       
 | 
			
		||||
        if (fwrite(pCudaCopyMem, bufferSize, 1, fp) != 1) {
 | 
			
		||||
            printf("Cuda consumer: output file write failed in %s line %d\n", __FILE__, __LINE__);
 | 
			
		||||
             exit(EXIT_FAILURE);
 | 
			
		||||
        }  
 | 
			
		||||
    }
 | 
			
		||||
    printf("Output file : %s saved\n", path.c_str());
 | 
			
		||||
    
 | 
			
		||||
    if (fp) {
 | 
			
		||||
        fclose(fp);
 | 
			
		||||
        fp = NULL;
 | 
			
		||||
    }
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
void cudaNvSciBufMultiplanar::tearDown(Caller *caller1, Caller *caller2) {
 | 
			
		||||
    caller1->deinit();
 | 
			
		||||
    caller2->deinit();
 | 
			
		||||
    NvSciBufObjFree(buffObj);
 | 
			
		||||
}
 | 
			
		||||
@ -0,0 +1,124 @@
 | 
			
		||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
#ifndef CUDA_NVSCIBUF_MULTIPLANAR_H
 | 
			
		||||
#define CUDA_NVSCIBUF_MULTIPLANAR_H
 | 
			
		||||
 | 
			
		||||
#include <cuda_runtime.h>
 | 
			
		||||
#include <nvscibuf.h>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <helper_cuda.h>
 | 
			
		||||
 | 
			
		||||
#define PLANAR_NUM_PLANES 3
 | 
			
		||||
#define PLANAR_CHROMA_WIDTH_ORDER 2
 | 
			
		||||
#define PLANAR_CHROMA_HEIGHT_ORDER 2
 | 
			
		||||
 
 | 
			
		||||
#define ATTR_SIZE 20
 | 
			
		||||
#define DEFAULT_GPU 0
 | 
			
		||||
 | 
			
		||||
#define checkNvSciErrors(call)                              \
 | 
			
		||||
  do {                                                      \
 | 
			
		||||
    NvSciError _status = call;                              \
 | 
			
		||||
    if (NvSciError_Success != _status) {                    \
 | 
			
		||||
      printf(                                               \
 | 
			
		||||
          "NVSCI call in file '%s' in line %i returned"     \
 | 
			
		||||
          " %d, expected %d\n",                             \
 | 
			
		||||
          __FILE__, __LINE__, _status, NvSciError_Success); \
 | 
			
		||||
      fflush(stdout);                                       \
 | 
			
		||||
      exit(EXIT_FAILURE);                                   \
 | 
			
		||||
    }                                                       \
 | 
			
		||||
  } while (0)
 | 
			
		||||
 | 
			
		||||
#define checkCudaDrvErrors(call)                           \
 | 
			
		||||
  do {                                                     \
 | 
			
		||||
    CUresult err = call;                                   \
 | 
			
		||||
    if (CUDA_SUCCESS != err) {                             \
 | 
			
		||||
      const char *errorStr = NULL;                         \
 | 
			
		||||
      cuGetErrorString(err, &errorStr);                    \
 | 
			
		||||
      printf(                                              \
 | 
			
		||||
              "checkCudaDrvErrors() Driver API error"      \
 | 
			
		||||
              " = %04d \"%s\" from file <%s>, "            \
 | 
			
		||||
              "line %i.\n",                                \
 | 
			
		||||
              err, errorStr, __FILE__, __LINE__);          \
 | 
			
		||||
      exit(EXIT_FAILURE);                                  \
 | 
			
		||||
    }                                                      \
 | 
			
		||||
  } while (0)
 | 
			
		||||
 | 
			
		||||
extern void launchFlipSurfaceBitsKernel(cudaArray_t *levelArray, int32_t *multiPlanarWidth,
 | 
			
		||||
 int32_t *multiPlanarHeight, int numPlanes);
 | 
			
		||||
 | 
			
		||||
class Caller {
 | 
			
		||||
private:
 | 
			
		||||
    NvSciBufAttrList attrListOut;
 | 
			
		||||
    NvSciBufAttrKeyValuePair pairArrayOut[ATTR_SIZE];
 | 
			
		||||
    cudaExternalMemory_t extMem;
 | 
			
		||||
    int32_t numPlanes;
 | 
			
		||||
public:
 | 
			
		||||
    NvSciBufAttrList attrList;
 | 
			
		||||
    cudaMipmappedArray_t multiPlanarArray[PLANAR_NUM_PLANES];
 | 
			
		||||
    int32_t multiPlanarWidth[PLANAR_NUM_PLANES];
 | 
			
		||||
    int32_t multiPlanarHeight[PLANAR_NUM_PLANES];
 | 
			
		||||
 | 
			
		||||
    void init();
 | 
			
		||||
    void deinit();
 | 
			
		||||
    void copyExtMemToMultiPlanarArrays();
 | 
			
		||||
    void copyYUVToCudaArrayAndFlipBits(std::string &image_filename, cudaArray_t *yuvPlanes);
 | 
			
		||||
    void copyCudaArrayToYUV(std::string &image_filename, cudaArray_t *yuvPlanes);
 | 
			
		||||
    void setAttrListImageMultiPlanes(int imageWidth, int imageHeight);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class cudaNvSciBufMultiplanar {
 | 
			
		||||
private:
 | 
			
		||||
    size_t imageWidth;
 | 
			
		||||
    size_t imageHeight;
 | 
			
		||||
    int mCudaDeviceId;
 | 
			
		||||
    int deviceCnt;
 | 
			
		||||
    NvSciBufAttrList attrList[2];
 | 
			
		||||
    NvSciBufAttrList attrListReconciled;
 | 
			
		||||
    NvSciBufAttrList attrListConflict;
 | 
			
		||||
public:
 | 
			
		||||
    cudaNvSciBufMultiplanar(size_t imageWidth, size_t imageHeight, std::vector<int> &deviceIds);
 | 
			
		||||
    void initCuda(int devId);
 | 
			
		||||
    void reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2);
 | 
			
		||||
    void runCudaNvSciBufPlanar(std::string &image_filename, std::string &image_filename_out);
 | 
			
		||||
    void tearDown(Caller *caller1, Caller *caller2);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum NvSciBufImageAttributes {
 | 
			
		||||
    PLANE_SIZE,  
 | 
			
		||||
    PLANE_ALIGNED_SIZE,
 | 
			
		||||
    PLANE_OFFSET,
 | 
			
		||||
    PLANE_HEIGHT, 
 | 
			
		||||
    PLANE_WIDTH, 
 | 
			
		||||
    PLANE_CHANNEL_COUNT, 
 | 
			
		||||
    PLANE_BITS_PER_PIXEL,
 | 
			
		||||
    PLANE_COUNT,
 | 
			
		||||
    PLANE_ATTR_SIZE
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif  // CUDA_NVSCIBUF_MULTIPLANAR_H
 | 
			
		||||
@ -0,0 +1,64 @@
 | 
			
		||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <helper_cuda.h>
 | 
			
		||||
 | 
			
		||||
static __global__ void flipSurfaceBits(cudaSurfaceObject_t surfObj, int width, int height) {
 | 
			
		||||
    char data;
 | 
			
		||||
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
 | 
			
		||||
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
 | 
			
		||||
    if (x < width && y < height) {
 | 
			
		||||
        // Read from input surface
 | 
			
		||||
        surf2Dread(&data,  surfObj, x, y);
 | 
			
		||||
        // Write to output surface
 | 
			
		||||
        data = ~data;
 | 
			
		||||
        surf2Dwrite(data, surfObj, x, y);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Copy cudaArray to surface memory and launch the CUDA kernel
 | 
			
		||||
void launchFlipSurfaceBitsKernel(
 | 
			
		||||
    cudaArray_t *levelArray, 
 | 
			
		||||
    int32_t *multiPlanarWidth, 
 | 
			
		||||
    int32_t *multiPlanarHeight, 
 | 
			
		||||
    int numPlanes) {
 | 
			
		||||
 | 
			
		||||
    cudaSurfaceObject_t surfObject[numPlanes] = {0};
 | 
			
		||||
    cudaResourceDesc resDesc;
 | 
			
		||||
    
 | 
			
		||||
    for (int i = 0; i < numPlanes; i++) { 
 | 
			
		||||
        memset(&resDesc, 0, sizeof(resDesc));
 | 
			
		||||
        resDesc.resType = cudaResourceTypeArray;
 | 
			
		||||
        resDesc.res.array.array = levelArray[i];
 | 
			
		||||
        checkCudaErrors(cudaCreateSurfaceObject(&surfObject[i], &resDesc));
 | 
			
		||||
        dim3 threadsperBlock(16, 16);
 | 
			
		||||
        dim3 numBlocks((multiPlanarWidth[i] + threadsperBlock.x - 1) / threadsperBlock.x,
 | 
			
		||||
                (multiPlanarHeight[i] + threadsperBlock.y - 1) / threadsperBlock.y);
 | 
			
		||||
        flipSurfaceBits<<<numBlocks, threadsperBlock>>>(surfObject[i], multiPlanarWidth[i], multiPlanarHeight[i]);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,72 @@
 | 
			
		||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 | 
			
		||||
 *
 | 
			
		||||
 * Redistribution and use in source and binary forms, with or without
 | 
			
		||||
 * modification, are permitted provided that the following conditions
 | 
			
		||||
 * are met:
 | 
			
		||||
 *  * Redistributions of source code must retain the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer.
 | 
			
		||||
 *  * Redistributions in binary form must reproduce the above copyright
 | 
			
		||||
 *    notice, this list of conditions and the following disclaimer in the
 | 
			
		||||
 *    documentation and/or other materials provided with the distribution.
 | 
			
		||||
 *  * Neither the name of NVIDIA CORPORATION nor the names of its
 | 
			
		||||
 *    contributors may be used to endorse or promote products derived
 | 
			
		||||
 *    from this software without specific prior written permission.
 | 
			
		||||
 *
 | 
			
		||||
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
 | 
			
		||||
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
			
		||||
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 | 
			
		||||
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 | 
			
		||||
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 | 
			
		||||
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 | 
			
		||||
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 | 
			
		||||
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 | 
			
		||||
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 */
 | 
			
		||||
#include <cuda.h>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "cudaNvSciBufMultiplanar.h"
 | 
			
		||||
#include <helper_image.h>
 | 
			
		||||
 | 
			
		||||
#define MAX_FILE_SIZE 100
 | 
			
		||||
 | 
			
		||||
int main(int argc, const char **argv) {
 | 
			
		||||
  int numOfGPUs = 0;
 | 
			
		||||
  std::vector<int> deviceIds;
 | 
			
		||||
  (cudaGetDeviceCount(&numOfGPUs));
 | 
			
		||||
 | 
			
		||||
  printf("%d GPUs found\n", numOfGPUs);
 | 
			
		||||
  if (!numOfGPUs) {
 | 
			
		||||
    exit(EXIT_WAIVED);
 | 
			
		||||
  } else {
 | 
			
		||||
    for (int devID = 0; devID < numOfGPUs; devID++) {
 | 
			
		||||
      int major = 0, minor = 0;
 | 
			
		||||
      (cudaDeviceGetAttribute(
 | 
			
		||||
          &major, cudaDevAttrComputeCapabilityMajor, devID));
 | 
			
		||||
      (cudaDeviceGetAttribute(
 | 
			
		||||
          &minor, cudaDevAttrComputeCapabilityMinor, devID));
 | 
			
		||||
      if (major >= 6) {
 | 
			
		||||
        deviceIds.push_back(devID);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if (deviceIds.size() == 0) {
 | 
			
		||||
      printf(
 | 
			
		||||
          "cudaNvSciBufMultiplanar requires one or more GPUs of Pascal(SM 6.0) or higher "
 | 
			
		||||
          "archs\nWaiving..\n");
 | 
			
		||||
      exit(EXIT_WAIVED);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::string image_filename = sdkFindFilePath("yuv_planar_img1.yuv", argv[0]);
 | 
			
		||||
  std::string image_filename_out = "image_out.yuv";
 | 
			
		||||
  uint32_t imageWidth = 720;
 | 
			
		||||
  uint32_t imageHeight = 480;
 | 
			
		||||
 | 
			
		||||
  printf("input image %s , width = %d, height = %d\n", image_filename.c_str(), imageWidth, imageHeight);
 | 
			
		||||
 | 
			
		||||
  cudaNvSciBufMultiplanar cudaNvSciBufMultiplanarApp(imageWidth, imageHeight, deviceIds);
 | 
			
		||||
  cudaNvSciBufMultiplanarApp.runCudaNvSciBufPlanar(image_filename, image_filename_out);
 | 
			
		||||
 | 
			
		||||
  return EXIT_SUCCESS;
 | 
			
		||||
}
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user