mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-07-02 13:20:30 +08:00
Merge pull request #10 from XSShawnZeng/Tegra_Samples_Cmake_Transition
Add Tegra sample cudaNvSciBufMultiplanar
This commit is contained in:
commit
1a466282da
@ -1,4 +1,5 @@
|
|||||||
add_subdirectory(cudaNvSciNvMedia)
|
add_subdirectory(cudaNvSciNvMedia)
|
||||||
|
add_subdirectory(cudaNvSciBufMultiplanar)
|
||||||
add_subdirectory(cuDLAErrorReporting)
|
add_subdirectory(cuDLAErrorReporting)
|
||||||
add_subdirectory(cuDLAHybridMode)
|
add_subdirectory(cuDLAHybridMode)
|
||||||
add_subdirectory(cuDLALayerwiseStatsHybrid)
|
add_subdirectory(cuDLALayerwiseStatsHybrid)
|
||||||
|
18
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
vendored
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Linux",
|
||||||
|
"includePath": [
|
||||||
|
"${workspaceFolder}/**",
|
||||||
|
"${workspaceFolder}/../../../Common"
|
||||||
|
],
|
||||||
|
"defines": [],
|
||||||
|
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||||
|
"cStandard": "gnu17",
|
||||||
|
"cppStandard": "gnu++14",
|
||||||
|
"intelliSenseMode": "linux-gcc-x64",
|
||||||
|
"configurationProvider": "ms-vscode.makefile-tools"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"version": 4
|
||||||
|
}
|
7
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
vendored
Normal file
7
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"nvidia.nsight-vscode-edition",
|
||||||
|
"ms-vscode.cpptools",
|
||||||
|
"ms-vscode.makefile-tools"
|
||||||
|
]
|
||||||
|
}
|
10
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
vendored
Normal file
10
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "CUDA C++: Launch",
|
||||||
|
"type": "cuda-gdb",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}/cudaNvSciBufMultiplanar"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
15
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
vendored
Normal file
15
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "sample",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "make dbg=1",
|
||||||
|
"problemMatcher": ["$nvcc"],
|
||||||
|
"group": {
|
||||||
|
"kind": "build",
|
||||||
|
"isDefault": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
@ -0,0 +1,74 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Modules")
|
||||||
|
|
||||||
|
project(cudaNvSciBufMultiplanar LANGUAGES C CXX CUDA)
|
||||||
|
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90)
|
||||||
|
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||||
|
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Include directories and libraries
|
||||||
|
include_directories(../../../../Common)
|
||||||
|
|
||||||
|
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||||
|
# Find the NVSCI libraries
|
||||||
|
# use CMAKE_LIBRARY_PATH so that users can also specify the NVSCI lib path in cmake command
|
||||||
|
set(CMAKE_LIBRARY_PATH "/usr/lib" ${CMAKE_LIBRARY_PATH})
|
||||||
|
file(GLOB_RECURSE NVSCIBUF_LIB
|
||||||
|
${CMAKE_LIBRARY_PATH}/*/libnvscibuf.so
|
||||||
|
)
|
||||||
|
file(GLOB_RECURSE NVSCISYNC_LIB
|
||||||
|
${CMAKE_LIBRARY_PATH}/*/libnvscisync.so
|
||||||
|
)
|
||||||
|
|
||||||
|
# Find the NVSCI header files
|
||||||
|
# use CMAKE_INCLUDE_PATH so that users can also specify the NVSCI include path in cmake command
|
||||||
|
set(CMAKE_INCLUDE_PATH "/usr/include" ${CMAKE_LIBRARY_PATH})
|
||||||
|
find_path(NVSCIBUF_INCLUDE_DIR nvscibuf.h PATHS ${CMAKE_INCLUDE_PATH})
|
||||||
|
find_path(NVSCISYNC_INCLUDE_DIR nvscisync.h PATHS ${CMAKE_INCLUDE_PATH})
|
||||||
|
|
||||||
|
if(NVSCIBUF_LIB AND NVSCISYNC_LIB AND NVSCIBUF_INCLUDE_DIR AND NVSCISYNC_INCLUDE_DIR)
|
||||||
|
message(STATUS "FOUND NVSCI libs: ${NVSCIBUF_LIB} ${NVSCISYNC_LIB}")
|
||||||
|
message(STATUS "Using NVSCI headers path: ${NVSCIBUF_INCLUDE_DIR} ${NVSCIBUF_INCLUDE_DIR}")
|
||||||
|
# Source file
|
||||||
|
# Add target for cudaNvSciBufMultiplanar
|
||||||
|
add_executable(cudaNvSciBufMultiplanar imageKernels.cu cudaNvSciBufMultiplanar.cpp main.cpp)
|
||||||
|
|
||||||
|
target_compile_options(cudaNvSciBufMultiplanar PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
target_compile_features(cudaNvSciBufMultiplanar PRIVATE cxx_std_17 cuda_std_17)
|
||||||
|
|
||||||
|
set_target_properties(cudaNvSciBufMultiplanar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
|
|
||||||
|
target_include_directories(cudaNvSciBufMultiplanar PUBLIC
|
||||||
|
${CUDAToolkit_INCLUDE_DIRS}
|
||||||
|
${NVSCIBUF_INCLUDE_DIR}
|
||||||
|
${NVSCISYNC_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_libraries(cudaNvSciBufMultiplanar
|
||||||
|
CUDA::cuda_driver
|
||||||
|
${NVSCIBUF_LIB}
|
||||||
|
${NVSCISYNC_LIB}
|
||||||
|
)
|
||||||
|
# Copy yuv_planar_img1.yuv to the output directory
|
||||||
|
add_custom_command(TARGET cudaNvSciBufMultiplanar POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/yuv_planar_img1.yuv ${CMAKE_CURRENT_BINARY_DIR}/yuv_planar_img1.yuv
|
||||||
|
)
|
||||||
|
# Specify additional clean files
|
||||||
|
set_target_properties(cudaNvSciBufMultiplanar PROPERTIES
|
||||||
|
ADDITIONAL_CLEAN_FILES "image_out.yuv"
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
message(STATUS "NvSCI not found - will not build sample 'cudaNvSciBufMultiplanar'")
|
||||||
|
endif()
|
||||||
|
else()
|
||||||
|
message(STATUS "Will not build sample cudaNvSciBufMultiplanar - requires Linux OS")
|
||||||
|
endif()
|
@ -0,0 +1,64 @@
|
|||||||
|
# cudaNvSciBufMultiplanar - CUDA NvSciBufMultiplanar Image Samples
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
This sample demonstrates CUDA-NvSciBuf Interop for Multiplanar images. A YUV 420 multiplanar image is flipped and allocated using NvSciBuf APIs and imported into CUDA with CUDA External Resource Interoperability. A CUDA surface is created from the corresponding mapped CUDA array and again bit flipping is performed on the surface. The result is copied back to a YUV image which is compared against the input.
|
||||||
|
|
||||||
|
## Key Concepts
|
||||||
|
|
||||||
|
CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
|
||||||
|
|
||||||
|
## Supported SM Architectures
|
||||||
|
|
||||||
|
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.1 ](https://developer.nvidia.com/cuda-gpus) [SM 12.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||||
|
|
||||||
|
## Supported OSes
|
||||||
|
|
||||||
|
Linux
|
||||||
|
|
||||||
|
## Supported CPU Architecture
|
||||||
|
|
||||||
|
aarch64
|
||||||
|
|
||||||
|
## CUDA APIs involved
|
||||||
|
|
||||||
|
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||||
|
cudaDeviceGetAttribute, cudaNvSciBufMultiplanar, cudaDestroyExternalMemory, cuDriverGetVersion, cuDeviceGetUuid, cudaSetDevice, cudaGetMipmappedArrayLevel, cudaFreeMipmappedArray, cudaImportExternalMemory, cudaCreateChannelDesc, cudaExternalMemoryGetMappedMipmappedArray, cuCtxSynchronize, cudaMemcpy2DToArray, cudaMemcpy2DFromArray
|
||||||
|
|
||||||
|
## Dependencies needed to build/run
|
||||||
|
[NVSCI](../../../README.md#nvsci)
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||||
|
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||||
|
|
||||||
|
## Build and Run
|
||||||
|
|
||||||
|
### Linux
|
||||||
|
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||||
|
```
|
||||||
|
$ cd <sample_dir>
|
||||||
|
$ make
|
||||||
|
```
|
||||||
|
The samples makefiles can take advantage of certain options:
|
||||||
|
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are aarch64.
|
||||||
|
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||||
|
`$ make TARGET_ARCH=aarch64` <br/>
|
||||||
|
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||||
|
* **dbg=1** - build with debug symbols
|
||||||
|
```
|
||||||
|
$ make dbg=1
|
||||||
|
```
|
||||||
|
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||||
|
```
|
||||||
|
$ make SMS="50 60"
|
||||||
|
```
|
||||||
|
|
||||||
|
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||||
|
```
|
||||||
|
$ make HOST_COMPILER=g++
|
||||||
|
```
|
||||||
|
|
||||||
|
## References (for more details)
|
||||||
|
|
@ -0,0 +1,435 @@
|
|||||||
|
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#include "cudaNvSciBufMultiplanar.h"
|
||||||
|
|
||||||
|
NvSciBufModule module;
|
||||||
|
NvSciBufObj buffObj;
|
||||||
|
CUuuid uuid;
|
||||||
|
|
||||||
|
void flipBits(uint8_t *pBuff, uint32_t size) {
|
||||||
|
for (uint32_t i = 0; i < size; i++) {
|
||||||
|
pBuff[i] = (~pBuff[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare input and generated image files
|
||||||
|
void compareFiles(std::string &path1, std::string &path2) {
|
||||||
|
bool result = true;
|
||||||
|
FILE *fp1, *fp2;
|
||||||
|
int ch1, ch2;
|
||||||
|
|
||||||
|
fp1 = fopen(path1.c_str(), "rb");
|
||||||
|
fp2 = fopen(path2.c_str(), "rb");
|
||||||
|
if (!fp1) {
|
||||||
|
result = false;
|
||||||
|
printf("File %s open failed in %s line %d\n", path1.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
if (!fp2) {
|
||||||
|
result = false;
|
||||||
|
printf("File %s open failed in %s line %d\n", path2.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
ch1 = getc(fp1);
|
||||||
|
ch2 = getc(fp2);
|
||||||
|
|
||||||
|
if (ch1 != ch2) {
|
||||||
|
result = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while(ch1 != EOF && ch2 != EOF);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
printf("Input file : %s and output file : %s match SUCCESS\n", path1.c_str(), path2.c_str());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printf("Input file : %s and output file : %s match FAILURE\n", path1.c_str(), path2.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fp1) {
|
||||||
|
fclose(fp1);
|
||||||
|
}
|
||||||
|
if (fp2) {
|
||||||
|
fclose(fp2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Caller::init() {
|
||||||
|
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrList));
|
||||||
|
attrListOut = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Caller::deinit() {
|
||||||
|
NvSciBufAttrListFree(attrList);
|
||||||
|
checkCudaErrors(cudaDestroyExternalMemory(extMem));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set NvSciBufImage attribute values in the attribute list
|
||||||
|
void Caller::setAttrListImageMultiPlanes(int imageWidth, int imageHeight) {
|
||||||
|
NvSciBufType bufType = NvSciBufType_Image;
|
||||||
|
NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType;
|
||||||
|
bool cpuAccessFlag = false;
|
||||||
|
NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
|
||||||
|
NvSciRmGpuId gpuid;
|
||||||
|
bool vpr = false;
|
||||||
|
int32_t planeCount = PLANAR_NUM_PLANES;
|
||||||
|
int drvVersion;
|
||||||
|
// Dimensions of the imported image in the YUV 420 planar format
|
||||||
|
int32_t planeWidths[] = {imageWidth, imageWidth/2, imageWidth/2};
|
||||||
|
int32_t planeHeights[] = {imageHeight, imageHeight/2, imageHeight/2};
|
||||||
|
NvSciBufAttrKeyValuePair keyPair;
|
||||||
|
NvSciBufAttrKeyValuePair pairArray[ATTR_SIZE];
|
||||||
|
|
||||||
|
NvSciBufAttrValColorFmt planeColorFmts[] =
|
||||||
|
{ NvSciColor_Y8, NvSciColor_V8, NvSciColor_U8 };
|
||||||
|
NvSciBufAttrValImageScanType planeScanType[] =
|
||||||
|
{ NvSciBufScan_ProgressiveType };
|
||||||
|
|
||||||
|
memcpy(&gpuid.bytes, &uuid.bytes, sizeof(uuid.bytes));
|
||||||
|
|
||||||
|
NvSciBufAttrKeyValuePair imgBuffAttrsArr[] = {
|
||||||
|
{ NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
|
||||||
|
{ NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag,
|
||||||
|
sizeof(cpuAccessFlag) },
|
||||||
|
{ NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm) },
|
||||||
|
{ NvSciBufGeneralAttrKey_GpuId, &gpuid, sizeof(gpuid) },
|
||||||
|
{ NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) },
|
||||||
|
{ NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) },
|
||||||
|
{ NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) },
|
||||||
|
{ NvSciBufImageAttrKey_PlaneColorFormat, planeColorFmts,
|
||||||
|
sizeof(planeColorFmts) },
|
||||||
|
{ NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths) },
|
||||||
|
{ NvSciBufImageAttrKey_PlaneHeight, planeHeights,
|
||||||
|
sizeof(planeHeights) },
|
||||||
|
{ NvSciBufImageAttrKey_PlaneScanType, planeScanType,
|
||||||
|
sizeof(planeScanType) },
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<NvSciBufAttrKeyValuePair> imgBuffAttrsVec(imgBuffAttrsArr,
|
||||||
|
imgBuffAttrsArr+(sizeof(imgBuffAttrsArr)/sizeof(imgBuffAttrsArr[0])));
|
||||||
|
|
||||||
|
memset(pairArray, 0, sizeof(NvSciBufAttrKeyValuePair) * imgBuffAttrsVec.size());
|
||||||
|
std::copy(imgBuffAttrsVec.begin(), imgBuffAttrsVec.end(), pairArray);
|
||||||
|
checkNvSciErrors(NvSciBufAttrListSetAttrs(attrList, pairArray, imgBuffAttrsVec.size()));
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaNvSciBufMultiplanar::cudaNvSciBufMultiplanar(size_t width, size_t height, std::vector<int> &deviceIds)
|
||||||
|
: imageWidth(width),
|
||||||
|
imageHeight(height) {
|
||||||
|
mCudaDeviceId = deviceIds[0];
|
||||||
|
attrListReconciled = NULL;
|
||||||
|
attrListConflict = NULL;
|
||||||
|
checkNvSciErrors(NvSciBufModuleOpen(&module));
|
||||||
|
initCuda(mCudaDeviceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaNvSciBufMultiplanar::initCuda(int devId) {
|
||||||
|
int major = 0, minor = 0, drvVersion;
|
||||||
|
NvSciRmGpuId gpuid;
|
||||||
|
|
||||||
|
checkCudaErrors(cudaSetDevice(mCudaDeviceId));
|
||||||
|
checkCudaErrors(cudaDeviceGetAttribute(
|
||||||
|
&major, cudaDevAttrComputeCapabilityMajor, mCudaDeviceId));
|
||||||
|
checkCudaErrors(cudaDeviceGetAttribute(
|
||||||
|
&minor, cudaDevAttrComputeCapabilityMinor, mCudaDeviceId));
|
||||||
|
printf(
|
||||||
|
"[cudaNvSciBufMultiplanar] GPU Device %d: \"%s\" with compute capability "
|
||||||
|
"%d.%d\n\n",
|
||||||
|
mCudaDeviceId, _ConvertSMVer2ArchName(major, minor), major, minor);
|
||||||
|
|
||||||
|
checkCudaDrvErrors(cuDriverGetVersion(&drvVersion));
|
||||||
|
|
||||||
|
if (drvVersion <= 11030) {
|
||||||
|
checkCudaDrvErrors(cuDeviceGetUuid(&uuid, devId));
|
||||||
|
} else {
|
||||||
|
checkCudaDrvErrors(cuDeviceGetUuid_v2(&uuid, devId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Caller1 flips a YUV image which is allocated to nvscibuf APIs and copied into CUDA Array.
|
||||||
|
It is mapped to CUDA surface and bit flip is done. Caller2 in the same thread copies
|
||||||
|
CUDA Array to a YUV image file. The original image is compared with the double bit
|
||||||
|
flipped image.
|
||||||
|
*/
|
||||||
|
void cudaNvSciBufMultiplanar::runCudaNvSciBufPlanar(std::string &imageFilename, std::string &imageFilenameOut) {
|
||||||
|
cudaArray_t levelArray1[PLANAR_NUM_PLANES];
|
||||||
|
cudaArray_t levelArray2[PLANAR_NUM_PLANES];
|
||||||
|
Caller caller1;
|
||||||
|
Caller caller2;
|
||||||
|
|
||||||
|
int numPlanes = PLANAR_NUM_PLANES;
|
||||||
|
caller1.init();
|
||||||
|
caller2.init();
|
||||||
|
|
||||||
|
// Set NvSciBufImage attribute values in the attribute list
|
||||||
|
caller1.setAttrListImageMultiPlanes(imageWidth, imageHeight);
|
||||||
|
caller2.setAttrListImageMultiPlanes(imageWidth, imageHeight);
|
||||||
|
|
||||||
|
// Reconcile attribute lists and allocate NvSciBuf object
|
||||||
|
reconcileAttrList(&caller1.attrList, &caller2.attrList);
|
||||||
|
caller1.copyExtMemToMultiPlanarArrays();
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray1[i], caller1.multiPlanarArray[i], 0));
|
||||||
|
}
|
||||||
|
caller1.copyYUVToCudaArrayAndFlipBits(imageFilename, levelArray1);
|
||||||
|
|
||||||
|
caller2.copyExtMemToMultiPlanarArrays();
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray2[i], caller2.multiPlanarArray[i], 0));
|
||||||
|
}
|
||||||
|
// Maps cudaArray to surface memory and launches a kernel to flip bits
|
||||||
|
launchFlipSurfaceBitsKernel(levelArray2, caller2.multiPlanarWidth, caller2.multiPlanarHeight, numPlanes);
|
||||||
|
|
||||||
|
// Synchronization can be done using nvSciSync when non CUDA callers and cross-process signaler-waiter
|
||||||
|
// applications are involved. Please refer to the cudaNvSci sample library for more details.
|
||||||
|
checkCudaDrvErrors(cuCtxSynchronize());
|
||||||
|
printf("Bit flip of the surface memory done\n");
|
||||||
|
|
||||||
|
caller2.copyCudaArrayToYUV(imageFilenameOut, levelArray2);
|
||||||
|
compareFiles(imageFilename, imageFilenameOut);
|
||||||
|
|
||||||
|
// Release memory
|
||||||
|
printf("Releasing memory\n");
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
checkCudaErrors(cudaFreeMipmappedArray(caller1.multiPlanarArray[i]));
|
||||||
|
checkCudaErrors(cudaFreeMipmappedArray(caller2.multiPlanarArray[i]));
|
||||||
|
}
|
||||||
|
tearDown(&caller1, &caller2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map NvSciBufObj to cudaMipmappedArray
|
||||||
|
void Caller::copyExtMemToMultiPlanarArrays() {
|
||||||
|
checkNvSciErrors(NvSciBufObjGetAttrList(buffObj, &attrListOut));
|
||||||
|
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * PLANE_ATTR_SIZE);
|
||||||
|
cudaExternalMemoryHandleDesc memHandleDesc;
|
||||||
|
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
|
||||||
|
cudaChannelFormatDesc desc = {0};
|
||||||
|
cudaExtent extent = {0};
|
||||||
|
|
||||||
|
pairArrayOut[PLANE_SIZE].key = NvSciBufImageAttrKey_Size; // Datatype: @c uint64_t
|
||||||
|
pairArrayOut[PLANE_ALIGNED_SIZE].key = NvSciBufImageAttrKey_PlaneAlignedSize; // Datatype: @c uint64_t[]
|
||||||
|
pairArrayOut[PLANE_OFFSET].key = NvSciBufImageAttrKey_PlaneOffset; // Datatype: @c uint64_t[]
|
||||||
|
pairArrayOut[PLANE_HEIGHT].key = NvSciBufImageAttrKey_PlaneHeight; // Datatype: @c uint32_t[]
|
||||||
|
pairArrayOut[PLANE_WIDTH].key = NvSciBufImageAttrKey_PlaneWidth; // Datatype: @c int32_t[]
|
||||||
|
pairArrayOut[PLANE_CHANNEL_COUNT].key = NvSciBufImageAttrKey_PlaneChannelCount; // Datatype: @c uint8_t
|
||||||
|
pairArrayOut[PLANE_BITS_PER_PIXEL].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;// Datatype: @c uint32_t[]
|
||||||
|
pairArrayOut[PLANE_COUNT].key = NvSciBufImageAttrKey_PlaneCount; // Datatype: @c uint32_t
|
||||||
|
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrListOut, pairArrayOut, (PLANE_ATTR_SIZE)));
|
||||||
|
|
||||||
|
uint64_t size = *(uint64_t*)pairArrayOut[PLANE_SIZE].value;
|
||||||
|
uint64_t *planeAlignedSize = (uint64_t*)pairArrayOut[PLANE_ALIGNED_SIZE].value;
|
||||||
|
int32_t *planeWidth = (int32_t*)pairArrayOut[PLANE_WIDTH].value;
|
||||||
|
int32_t *planeHeight = (int32_t*)pairArrayOut[PLANE_HEIGHT].value;
|
||||||
|
uint64_t *planeOffset = (uint64_t*)pairArrayOut[PLANE_OFFSET].value;
|
||||||
|
uint8_t planeChannelCount = *(uint8_t*)pairArrayOut[PLANE_CHANNEL_COUNT].value;
|
||||||
|
uint32_t *planeBitsPerPixel = (uint32_t*)pairArrayOut[PLANE_BITS_PER_PIXEL].value;
|
||||||
|
uint32_t planeCount = *(uint32_t*)pairArrayOut[PLANE_COUNT].value;
|
||||||
|
|
||||||
|
numPlanes = planeCount;
|
||||||
|
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
multiPlanarWidth[i] = planeWidth[i];
|
||||||
|
multiPlanarHeight[i] = planeHeight[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
|
||||||
|
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
|
||||||
|
memHandleDesc.handle.nvSciBufObject = buffObj;
|
||||||
|
memHandleDesc.size = size;
|
||||||
|
checkCudaErrors(cudaImportExternalMemory(&extMem, &memHandleDesc));
|
||||||
|
|
||||||
|
desc = cudaCreateChannelDesc(planeBitsPerPixel[0], 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||||
|
memset(&mipmapDesc, 0, sizeof(mipmapDesc));
|
||||||
|
mipmapDesc.numLevels = 1;
|
||||||
|
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
memset(&extent, 0, sizeof(extent));
|
||||||
|
extent.width = planeWidth[i];
|
||||||
|
extent.height = planeHeight[i];
|
||||||
|
extent.depth = 0;
|
||||||
|
mipmapDesc.offset = planeOffset[i];
|
||||||
|
mipmapDesc.formatDesc = desc;
|
||||||
|
mipmapDesc.extent = extent;
|
||||||
|
mipmapDesc.flags = cudaArraySurfaceLoadStore;;
|
||||||
|
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&multiPlanarArray[i], extMem, &mipmapDesc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaNvSciBufMultiplanar::reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2) {
|
||||||
|
attrList[0] = *attrList1;
|
||||||
|
attrList[1] = *attrList2;
|
||||||
|
bool isReconciled = false;
|
||||||
|
|
||||||
|
checkNvSciErrors(NvSciBufAttrListReconcile(attrList, 2, &attrListReconciled, &attrListConflict));
|
||||||
|
checkNvSciErrors(NvSciBufAttrListIsReconciled(attrListReconciled, &isReconciled));
|
||||||
|
checkNvSciErrors(NvSciBufObjAlloc(attrListReconciled, &buffObj));
|
||||||
|
printf("NvSciBufAttrList reconciled\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// YUV 420 image is flipped and copied to cuda Array which is mapped to nvsciBuf
|
||||||
|
void Caller::copyYUVToCudaArrayAndFlipBits(std::string &path, cudaArray_t *cudaArr) {
|
||||||
|
FILE *fp = NULL;
|
||||||
|
uint8_t *pYBuff, *pUBuff, *pVBuff, *pChroma;
|
||||||
|
uint8_t *pBuff = NULL;
|
||||||
|
uint32_t uvOffset[numPlanes] = {0}, copyWidthInBytes[numPlanes] = {0}, copyHeight[numPlanes] = {0};
|
||||||
|
uint32_t width = multiPlanarWidth[0];
|
||||||
|
uint32_t height = multiPlanarHeight[0];
|
||||||
|
|
||||||
|
fp = fopen(path.c_str(), "rb");
|
||||||
|
if (!fp) {
|
||||||
|
printf("CudaProducer: Error opening file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
pBuff = (uint8_t*)malloc((width * height * PLANAR_CHROMA_WIDTH_ORDER * PLANAR_CHROMA_HEIGHT_ORDER) * sizeof(unsigned char));
|
||||||
|
if (!pBuff) {
|
||||||
|
printf("CudaProducer: Failed to allocate image buffer in %s line %d\n", __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
// Y V U order in the buffer. Fully planar formats use
|
||||||
|
// three planes to store the Y, Cb and Cr components separately.
|
||||||
|
pYBuff = pBuff;
|
||||||
|
pVBuff = pYBuff + width * height;
|
||||||
|
pUBuff = pVBuff + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
|
||||||
|
for (uint32_t i = 0; i < height; i++) {
|
||||||
|
if (fread(pYBuff, width, 1, fp) != 1) {
|
||||||
|
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
flipBits(pYBuff, width);
|
||||||
|
pYBuff += width;
|
||||||
|
}
|
||||||
|
|
||||||
|
pChroma = pVBuff;
|
||||||
|
for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
|
||||||
|
if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
|
||||||
|
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
flipBits(pChroma, width);
|
||||||
|
pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||||
|
}
|
||||||
|
|
||||||
|
pChroma = pUBuff;
|
||||||
|
for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
|
||||||
|
if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
|
||||||
|
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
flipBits(pChroma, width);
|
||||||
|
pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||||
|
}
|
||||||
|
uvOffset[0] = 0;
|
||||||
|
copyHeight[0] = height;
|
||||||
|
copyHeight[1] = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||||
|
copyHeight[2] = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||||
|
copyWidthInBytes[0] = width;
|
||||||
|
// Width of the second and third planes is half of the first plane.
|
||||||
|
copyWidthInBytes[1] = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||||
|
copyWidthInBytes[2] = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||||
|
uvOffset[1] = width * height;
|
||||||
|
uvOffset[2] = uvOffset[1] + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
checkCudaDrvErrors(cuCtxSynchronize());
|
||||||
|
checkCudaErrors(cudaMemcpy2DToArray(
|
||||||
|
cudaArr[i], 0, 0, (void *)(pBuff + uvOffset[i]), copyWidthInBytes[i],
|
||||||
|
copyWidthInBytes[i], copyHeight[i],
|
||||||
|
cudaMemcpyHostToDevice));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fp) {
|
||||||
|
fclose(fp);
|
||||||
|
fp = NULL;
|
||||||
|
}
|
||||||
|
if (pBuff) {
|
||||||
|
free(pBuff);
|
||||||
|
pBuff = NULL;
|
||||||
|
}
|
||||||
|
printf("Image %s copied to CUDA Array and bit flip done\n", path.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy Cuda Array in YUV 420 format to a file
|
||||||
|
void Caller::copyCudaArrayToYUV(std::string &path, cudaArray_t *cudaArr) {
|
||||||
|
FILE *fp = NULL;
|
||||||
|
int bufferSize;
|
||||||
|
uint32_t width = multiPlanarWidth[0];
|
||||||
|
uint32_t height = multiPlanarHeight[0];
|
||||||
|
uint32_t copyWidthInBytes=0, copyHeight=0;
|
||||||
|
uint8_t *pCudaCopyMem = NULL;
|
||||||
|
|
||||||
|
fp = fopen(path.c_str(), "wb+");
|
||||||
|
if (!fp) {
|
||||||
|
printf("WriteFrame: file open failed %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
if (i == 0) {
|
||||||
|
bufferSize = width * height;
|
||||||
|
copyWidthInBytes = width;
|
||||||
|
copyHeight = height;
|
||||||
|
|
||||||
|
pCudaCopyMem = (uint8_t *)malloc(bufferSize);
|
||||||
|
if (pCudaCopyMem == NULL) {
|
||||||
|
printf("pCudaCopyMem malloc failed in %s line %d\n", __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bufferSize = ((height / PLANAR_CHROMA_HEIGHT_ORDER) * (width / PLANAR_CHROMA_WIDTH_ORDER));
|
||||||
|
copyWidthInBytes = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||||
|
copyHeight = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||||
|
}
|
||||||
|
memset(pCudaCopyMem, 0, bufferSize);
|
||||||
|
|
||||||
|
checkCudaErrors(cudaMemcpy2DFromArray(
|
||||||
|
(void *)pCudaCopyMem, copyWidthInBytes, cudaArr[i], 0, 0,
|
||||||
|
copyWidthInBytes, copyHeight,
|
||||||
|
cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
checkCudaDrvErrors(cuCtxSynchronize());
|
||||||
|
|
||||||
|
if (fwrite(pCudaCopyMem, bufferSize, 1, fp) != 1) {
|
||||||
|
printf("Cuda consumer: output file write failed in %s line %d\n", __FILE__, __LINE__);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("Output file : %s saved\n", path.c_str());
|
||||||
|
|
||||||
|
if (fp) {
|
||||||
|
fclose(fp);
|
||||||
|
fp = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cudaNvSciBufMultiplanar::tearDown(Caller *caller1, Caller *caller2) {
|
||||||
|
caller1->deinit();
|
||||||
|
caller2->deinit();
|
||||||
|
NvSciBufObjFree(buffObj);
|
||||||
|
}
|
@ -0,0 +1,124 @@
|
|||||||
|
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#ifndef CUDA_NVSCIBUF_MULTIPLANAR_H
|
||||||
|
#define CUDA_NVSCIBUF_MULTIPLANAR_H
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <nvscibuf.h>
|
||||||
|
#include <vector>
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <helper_cuda.h>
|
||||||
|
|
||||||
|
#define PLANAR_NUM_PLANES 3
|
||||||
|
#define PLANAR_CHROMA_WIDTH_ORDER 2
|
||||||
|
#define PLANAR_CHROMA_HEIGHT_ORDER 2
|
||||||
|
|
||||||
|
#define ATTR_SIZE 20
|
||||||
|
#define DEFAULT_GPU 0
|
||||||
|
|
||||||
|
#define checkNvSciErrors(call) \
|
||||||
|
do { \
|
||||||
|
NvSciError _status = call; \
|
||||||
|
if (NvSciError_Success != _status) { \
|
||||||
|
printf( \
|
||||||
|
"NVSCI call in file '%s' in line %i returned" \
|
||||||
|
" %d, expected %d\n", \
|
||||||
|
__FILE__, __LINE__, _status, NvSciError_Success); \
|
||||||
|
fflush(stdout); \
|
||||||
|
exit(EXIT_FAILURE); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define checkCudaDrvErrors(call) \
|
||||||
|
do { \
|
||||||
|
CUresult err = call; \
|
||||||
|
if (CUDA_SUCCESS != err) { \
|
||||||
|
const char *errorStr = NULL; \
|
||||||
|
cuGetErrorString(err, &errorStr); \
|
||||||
|
printf( \
|
||||||
|
"checkCudaDrvErrors() Driver API error" \
|
||||||
|
" = %04d \"%s\" from file <%s>, " \
|
||||||
|
"line %i.\n", \
|
||||||
|
err, errorStr, __FILE__, __LINE__); \
|
||||||
|
exit(EXIT_FAILURE); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
extern void launchFlipSurfaceBitsKernel(cudaArray_t *levelArray, int32_t *multiPlanarWidth,
|
||||||
|
int32_t *multiPlanarHeight, int numPlanes);
|
||||||
|
|
||||||
|
class Caller {
|
||||||
|
private:
|
||||||
|
NvSciBufAttrList attrListOut;
|
||||||
|
NvSciBufAttrKeyValuePair pairArrayOut[ATTR_SIZE];
|
||||||
|
cudaExternalMemory_t extMem;
|
||||||
|
int32_t numPlanes;
|
||||||
|
public:
|
||||||
|
NvSciBufAttrList attrList;
|
||||||
|
cudaMipmappedArray_t multiPlanarArray[PLANAR_NUM_PLANES];
|
||||||
|
int32_t multiPlanarWidth[PLANAR_NUM_PLANES];
|
||||||
|
int32_t multiPlanarHeight[PLANAR_NUM_PLANES];
|
||||||
|
|
||||||
|
void init();
|
||||||
|
void deinit();
|
||||||
|
void copyExtMemToMultiPlanarArrays();
|
||||||
|
void copyYUVToCudaArrayAndFlipBits(std::string &image_filename, cudaArray_t *yuvPlanes);
|
||||||
|
void copyCudaArrayToYUV(std::string &image_filename, cudaArray_t *yuvPlanes);
|
||||||
|
void setAttrListImageMultiPlanes(int imageWidth, int imageHeight);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class cudaNvSciBufMultiplanar {
|
||||||
|
private:
|
||||||
|
size_t imageWidth;
|
||||||
|
size_t imageHeight;
|
||||||
|
int mCudaDeviceId;
|
||||||
|
int deviceCnt;
|
||||||
|
NvSciBufAttrList attrList[2];
|
||||||
|
NvSciBufAttrList attrListReconciled;
|
||||||
|
NvSciBufAttrList attrListConflict;
|
||||||
|
public:
|
||||||
|
cudaNvSciBufMultiplanar(size_t imageWidth, size_t imageHeight, std::vector<int> &deviceIds);
|
||||||
|
void initCuda(int devId);
|
||||||
|
void reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2);
|
||||||
|
void runCudaNvSciBufPlanar(std::string &image_filename, std::string &image_filename_out);
|
||||||
|
void tearDown(Caller *caller1, Caller *caller2);
|
||||||
|
};
|
||||||
|
|
||||||
|
enum NvSciBufImageAttributes {
|
||||||
|
PLANE_SIZE,
|
||||||
|
PLANE_ALIGNED_SIZE,
|
||||||
|
PLANE_OFFSET,
|
||||||
|
PLANE_HEIGHT,
|
||||||
|
PLANE_WIDTH,
|
||||||
|
PLANE_CHANNEL_COUNT,
|
||||||
|
PLANE_BITS_PER_PIXEL,
|
||||||
|
PLANE_COUNT,
|
||||||
|
PLANE_ATTR_SIZE
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // CUDA_NVSCIBUF_MULTIPLANAR_H
|
@ -0,0 +1,64 @@
|
|||||||
|
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <helper_cuda.h>
|
||||||
|
|
||||||
|
static __global__ void flipSurfaceBits(cudaSurfaceObject_t surfObj, int width, int height) {
|
||||||
|
char data;
|
||||||
|
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
if (x < width && y < height) {
|
||||||
|
// Read from input surface
|
||||||
|
surf2Dread(&data, surfObj, x, y);
|
||||||
|
// Write to output surface
|
||||||
|
data = ~data;
|
||||||
|
surf2Dwrite(data, surfObj, x, y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy cudaArray to surface memory and launch the CUDA kernel
|
||||||
|
void launchFlipSurfaceBitsKernel(
|
||||||
|
cudaArray_t *levelArray,
|
||||||
|
int32_t *multiPlanarWidth,
|
||||||
|
int32_t *multiPlanarHeight,
|
||||||
|
int numPlanes) {
|
||||||
|
|
||||||
|
cudaSurfaceObject_t surfObject[numPlanes] = {0};
|
||||||
|
cudaResourceDesc resDesc;
|
||||||
|
|
||||||
|
for (int i = 0; i < numPlanes; i++) {
|
||||||
|
memset(&resDesc, 0, sizeof(resDesc));
|
||||||
|
resDesc.resType = cudaResourceTypeArray;
|
||||||
|
resDesc.res.array.array = levelArray[i];
|
||||||
|
checkCudaErrors(cudaCreateSurfaceObject(&surfObject[i], &resDesc));
|
||||||
|
dim3 threadsperBlock(16, 16);
|
||||||
|
dim3 numBlocks((multiPlanarWidth[i] + threadsperBlock.x - 1) / threadsperBlock.x,
|
||||||
|
(multiPlanarHeight[i] + threadsperBlock.y - 1) / threadsperBlock.y);
|
||||||
|
flipSurfaceBits<<<numBlocks, threadsperBlock>>>(surfObject[i], multiPlanarWidth[i], multiPlanarHeight[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,72 @@
|
|||||||
|
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||||
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <vector>
|
||||||
|
#include "cudaNvSciBufMultiplanar.h"
|
||||||
|
#include <helper_image.h>
|
||||||
|
|
||||||
|
#define MAX_FILE_SIZE 100
|
||||||
|
|
||||||
|
int main(int argc, const char **argv) {
|
||||||
|
int numOfGPUs = 0;
|
||||||
|
std::vector<int> deviceIds;
|
||||||
|
(cudaGetDeviceCount(&numOfGPUs));
|
||||||
|
|
||||||
|
printf("%d GPUs found\n", numOfGPUs);
|
||||||
|
if (!numOfGPUs) {
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
} else {
|
||||||
|
for (int devID = 0; devID < numOfGPUs; devID++) {
|
||||||
|
int major = 0, minor = 0;
|
||||||
|
(cudaDeviceGetAttribute(
|
||||||
|
&major, cudaDevAttrComputeCapabilityMajor, devID));
|
||||||
|
(cudaDeviceGetAttribute(
|
||||||
|
&minor, cudaDevAttrComputeCapabilityMinor, devID));
|
||||||
|
if (major >= 6) {
|
||||||
|
deviceIds.push_back(devID);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (deviceIds.size() == 0) {
|
||||||
|
printf(
|
||||||
|
"cudaNvSciBufMultiplanar requires one or more GPUs of Pascal(SM 6.0) or higher "
|
||||||
|
"archs\nWaiving..\n");
|
||||||
|
exit(EXIT_WAIVED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string image_filename = sdkFindFilePath("yuv_planar_img1.yuv", argv[0]);
|
||||||
|
std::string image_filename_out = "image_out.yuv";
|
||||||
|
uint32_t imageWidth = 720;
|
||||||
|
uint32_t imageHeight = 480;
|
||||||
|
|
||||||
|
printf("input image %s , width = %d, height = %d\n", image_filename.c_str(), imageWidth, imageHeight);
|
||||||
|
|
||||||
|
cudaNvSciBufMultiplanar cudaNvSciBufMultiplanarApp(imageWidth, imageHeight, deviceIds);
|
||||||
|
cudaNvSciBufMultiplanarApp.runCudaNvSciBufPlanar(image_filename, image_filename_out);
|
||||||
|
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user