Samples - Add a sample for decoding a video and converting the raw decoded YUV frames to RGB format using HIP kernels (#108)
* WIP: class implementation * add more definitions * rocvideodecode implementation * formatting fixes * address review comments * rocvideodecode class update * videodec sample app-enable all apis * video_dec_rgb_sample implementation * fix issue with release mode execution and other clean_up * add copyright block * address review comments * sync with tot and addressed review comments * convert to snake_case
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e001d78859
Коммит
15fb3e0b91
@@ -0,0 +1,82 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
project(videodecodergb)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
|
||||
|
||||
set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1031;gfx1032;gfx1100")
|
||||
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
|
||||
find_package(HIP QUIET)
|
||||
find_package(FFmpeg QUIET)
|
||||
# find rocDecode
|
||||
find_library(ROCDECODE_LIBRARY NAMES rocdecode HINTS {ROCM_PATH}/lib)
|
||||
find_path(ROCDECODE_INCLUDE_DIR NAMES rocdecode.h PATHS /opt/rocm/include/rocdecode {ROCM_PATH}/include/rocdecode)
|
||||
|
||||
if(ROCDECODE_LIBRARY AND ROCDECODE_INCLUDE_DIR)
|
||||
set(ROCDECODE_FOUND TRUE)
|
||||
message("-- ${White}Using rocDecode -- \n\tLibraries:${ROCDECODE_LIBRARY} \n\tIncludes:${ROCDECODE_INCLUDE_DIR}${ColourReset}")
|
||||
endif()
|
||||
|
||||
if(HIP_FOUND AND FFMPEG_FOUND AND ROCDECODE_FOUND)
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
|
||||
# FFMPEG
|
||||
include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES})
|
||||
# rocDecode and utils
|
||||
include_directories (${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${ROCDECODE_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY})
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}
|
||||
videodecrgb.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils/colorspace_kernels.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
else()
|
||||
message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!")
|
||||
if (NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
|
||||
endif()
|
||||
if (NOT FFMPEG_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!")
|
||||
endif()
|
||||
if (NOT ROCDECODE_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,11 @@
|
||||
# Video Decode Sample
|
||||
This sample illustrates the FFMPEG demuxer to get the individual frames which are then decoded using rocDecode API and optionally color-converted using custom HIP kernels on AMD hardware. This sample converts decoded YUV output to one of the RGB or BGR formats(24bit, 32bit, 464bit)
|
||||
|
||||
## Build and run the sample:
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
make -j
|
||||
./videodecodergb -i <input video file - required> -o <optional; output path to save decoded YUV frames> -d <GPU device ID, 0 for the first device, 1 for the second device, etc> -of <optional: output format bgr, bgra, bgr48, bgr64 etc>
|
||||
```
|
||||
@@ -0,0 +1,325 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <sys/stat.h>
|
||||
#include <libgen.h>
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
#else
|
||||
#include <experimental/filesystem>
|
||||
#endif
|
||||
#include "video_demuxer.h"
|
||||
#include "roc_video_dec.h"
|
||||
#include "colorspace_kernels.h"
|
||||
|
||||
FILE *fpOut = nullptr;
|
||||
enum OutputFormatEnum {
|
||||
native = 0, bgr, bgr48, rgb, rgb48, bgra, bgra64, rgba, rgba64
|
||||
};
|
||||
std::vector<std::string> st_output_format_name = {"native", "bgr", "bgr48", "rgb", "rgb48", "bgra", "bgra64", "rgba", "rgba64"};
|
||||
|
||||
void ShowHelpAndExit(const char *option = NULL) {
|
||||
std::cout << "Options:" << std::endl
|
||||
<< "-i Input File Path - required" << std::endl
|
||||
<< "-o Output File Path - dumps output if requested; optional" << std::endl
|
||||
<< "-d GPU device ID (0 for the first device, 1 for the second, etc.); optional; default: 0" << std::endl
|
||||
<< "-of Output Format name - (native, bgr, bgr48, rgb, rgb48, bgra, bgra64, rgba, rgba64; converts native YUV frame to RGB image format; optional; default: 0" << std::endl
|
||||
<< "-crop crop rectangle for output (not used when using interopped decoded frame); optional; default: 0" << std::endl
|
||||
<< "-m output_surface_memory_type - decoded surface memory; optional; default - 0" << std::endl;
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void DumpRGBImage(std::string outputfileName, void* pdevMem, OutputSurfaceInfo *surf_info, int rgb_image_size) {
|
||||
if (fpOut == nullptr) {
|
||||
fpOut = fopen(outputfileName.c_str(), "wb");
|
||||
}
|
||||
uint8_t *hstPtr = nullptr;
|
||||
hstPtr = new uint8_t [rgb_image_size];
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = hipMemcpyDtoH((void *)hstPtr, pdevMem, rgb_image_size);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cout << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl;
|
||||
delete [] hstPtr;
|
||||
return;
|
||||
}
|
||||
if (fpOut) {
|
||||
fwrite(hstPtr, 1, rgb_image_size, fpOut);
|
||||
}
|
||||
|
||||
if (hstPtr != nullptr) {
|
||||
delete [] hstPtr;
|
||||
hstPtr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void ColorConvertYUV2RGB(uint8_t *p_src, OutputSurfaceInfo *surf_info, uint8_t *rgb_dev_mem_ptr, OutputFormatEnum e_output_format) {
|
||||
|
||||
int rgb_width = (surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
|
||||
// todo:: get color standard from the decoder
|
||||
if (surf_info->surface_format == rocDecVideoSurfaceFormat_YUV444) {
|
||||
if (e_output_format == bgr)
|
||||
YUV444ToColor24<BGR24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgra)
|
||||
YUV444ToColor32<BGRA32>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 4 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb)
|
||||
YUV444ToColor24<RGB24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgba)
|
||||
YUV444ToColor32<RGBA32>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 4 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
} else if (surf_info->surface_format == rocDecVideoSurfaceFormat_NV12) {
|
||||
if (e_output_format == bgr)
|
||||
Nv12ToColor24<BGR24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgra)
|
||||
Nv12ToColor32<BGRA32>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 4 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb)
|
||||
Nv12ToColor24<RGB24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgba)
|
||||
Nv12ToColor32<RGBA32>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 4 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
}
|
||||
if (surf_info->surface_format == rocDecVideoSurfaceFormat_YUV444_16Bit) {
|
||||
if (e_output_format == bgr)
|
||||
YUV444P16ToColor24<BGR24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb)
|
||||
YUV444P16ToColor24<RGB24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgr48)
|
||||
YUV444P16ToColor48<BGR48>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 6 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb48)
|
||||
YUV444P16ToColor48<RGB48>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 6 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgra64)
|
||||
YUV444P16ToColor64<BGRA64>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 8 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgba64)
|
||||
YUV444P16ToColor64<RGBA64>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 8 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
} else if (surf_info->surface_format == rocDecVideoSurfaceFormat_P016) {
|
||||
if (e_output_format == bgr)
|
||||
P016ToColor24<BGR24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb)
|
||||
P016ToColor24<RGB24>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 3 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgr48)
|
||||
P016ToColor48<BGR48>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 6 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgb48)
|
||||
P016ToColor48<RGB48>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 6 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == bgra64)
|
||||
P016ToColor64<BGRA64>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 8 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
else if (e_output_format == rgba64)
|
||||
P016ToColor64<RGBA64>(p_src, surf_info->output_pitch, static_cast<uint8_t *>(rgb_dev_mem_ptr), 8 * rgb_width, surf_info->output_width,
|
||||
surf_info->output_height, surf_info->output_vstride, 0);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string input_file_path, output_file_path;
|
||||
int dump_output_frames = 0;
|
||||
int convert_to_rgb = 0;
|
||||
int device_id = 0;
|
||||
Rect crop_rect = {};
|
||||
Rect *p_crop_rect = nullptr;
|
||||
size_t rgb_image_size;
|
||||
uint32_t rgb_image_stride;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
uint8_t *p_rgb_dev_mem= nullptr;
|
||||
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal
|
||||
OutputFormatEnum e_output_format = native;
|
||||
int rgb_width;
|
||||
|
||||
// Parse command-line arguments
|
||||
if(argc < 1) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-h")) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
if (!strcmp(argv[i], "-i")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-i");
|
||||
}
|
||||
input_file_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-o")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-o");
|
||||
}
|
||||
output_file_path = argv[i];
|
||||
dump_output_frames = 1;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-d")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-d");
|
||||
}
|
||||
device_id = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-m")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-m");
|
||||
}
|
||||
mem_type = static_cast<OutputSurfaceMemoryType>(atoi(argv[i]));
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-crop")) {
|
||||
if (++i == argc || 4 != sscanf(argv[i], "%d,%d,%d,%d", &crop_rect.l, &crop_rect.t, &crop_rect.r, &crop_rect.b)) {
|
||||
ShowHelpAndExit("-crop");
|
||||
}
|
||||
if ((crop_rect.r - crop_rect.l) % 2 == 1 || (crop_rect.b - crop_rect.t) % 2 == 1) {
|
||||
std::cout << "output crop rectangle must have width and height of even numbers" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
p_crop_rect = &crop_rect;
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-of")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-of");
|
||||
}
|
||||
auto it = find(st_output_format_name.begin(), st_output_format_name.end(), argv[i]);
|
||||
if (it == st_output_format_name.end()) {
|
||||
ShowHelpAndExit("-of");
|
||||
}
|
||||
e_output_format = (OutputFormatEnum)(it-st_output_format_name.begin());
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
|
||||
try {
|
||||
VideoDemuxer demuxer(input_file_path.c_str());
|
||||
rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID());
|
||||
RocVideoDecoder viddec(device_id, mem_type, rocdec_codec_id, false, p_crop_rect);
|
||||
|
||||
std::string device_name, gcn_arch_name;
|
||||
int pci_bus_id, pci_domain_id, pci_device_id;
|
||||
hipStream_t stream = viddec.GetStream();
|
||||
|
||||
viddec.GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id);
|
||||
std::cout << "info: Using GPU device " << device_id << " " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
|
||||
std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) <<
|
||||
std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl;
|
||||
std::cout << "info: decoding started, please wait!" << std::endl;
|
||||
|
||||
int n_video_bytes = 0, n_frames_returned = 0, n_frame = 0;
|
||||
uint8_t *p_video = nullptr;
|
||||
uint8_t *p_frame = nullptr;
|
||||
int64_t pts = 0;
|
||||
OutputSurfaceInfo *surf_info;
|
||||
uint32_t width, height;
|
||||
double total_dec_time = 0;
|
||||
convert_to_rgb = e_output_format != native;
|
||||
|
||||
do {
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
demuxer.Demux(&p_video, &n_video_bytes, &pts);
|
||||
n_frames_returned = viddec.DecodeFrame(p_video, n_video_bytes, 0, pts);
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto time_per_frame = std::chrono::duration<double, std::milli>(end_time - startTime).count();
|
||||
total_dec_time += time_per_frame;
|
||||
if (!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)){
|
||||
std::cerr << "Error: Failed to get Output Image Info!" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_frames_returned; i++) {
|
||||
p_frame = viddec.GetFrame(&pts);
|
||||
if (convert_to_rgb) {
|
||||
if (surf_info->bit_depth == 8) {
|
||||
rgb_width = (surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * surf_info->output_height * 3 : rgb_width * surf_info->output_height * 4;
|
||||
} else { // 16bit
|
||||
rgb_width = (surf_info->output_width + 1) & ~1; // has to be a multiple of 2 for hip colorconvert kernels
|
||||
rgb_image_size = ((e_output_format == bgr) || (e_output_format == rgb)) ? rgb_width * surf_info->output_height * 3 : ((e_output_format == bgr48) || (e_output_format == rgb48)) ?
|
||||
rgb_width * surf_info->output_height * 6 : rgb_width * surf_info->output_height * 8;
|
||||
}
|
||||
if (p_rgb_dev_mem == nullptr) {
|
||||
hip_status = hipMalloc(&p_rgb_dev_mem, rgb_image_size);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cerr << "ERROR: hipMalloc failed to allocate the device memory for the output!" << hip_status << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ColorConvertYUV2RGB(p_frame, surf_info, p_rgb_dev_mem, e_output_format);
|
||||
}
|
||||
if (dump_output_frames) {
|
||||
if (convert_to_rgb)
|
||||
DumpRGBImage(output_file_path, p_rgb_dev_mem, surf_info, rgb_image_size);
|
||||
else
|
||||
viddec.SaveFrameToFile(output_file_path, p_frame, surf_info);
|
||||
}
|
||||
// release frame
|
||||
viddec.ReleaseFrame(pts);
|
||||
}
|
||||
n_frame += n_frames_returned;
|
||||
} while (n_video_bytes);
|
||||
|
||||
if (p_rgb_dev_mem != nullptr) {
|
||||
hip_status = hipFree(p_rgb_dev_mem);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cout << "ERROR: hipFree failed! (" << hip_status << ")" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (fpOut) {
|
||||
fclose(fpOut);
|
||||
fpOut = nullptr;
|
||||
}
|
||||
|
||||
std::cout << "info: Video codec format: " << viddec.GetCodecFmtName(viddec.GetCodecId()) << std::endl;
|
||||
std::cout << "info: Video size: [ " << surf_info->output_width << ", " << surf_info->output_height << " ]" << std::endl;
|
||||
std::cout << "info: Video surface format: " << viddec.GetSurfaceFmtName(surf_info->surface_format) << std::endl;
|
||||
std::cout << "info: Video Bit depth: " << surf_info->bit_depth << std::endl;
|
||||
std::cout << "info: Total frame decoded: " << n_frame << std::endl;
|
||||
if (!dump_output_frames) {
|
||||
std::cout << "info: avg decoding time per frame (ms): " << total_dec_time / n_frame << std::endl;
|
||||
std::cout << "info: avg FPS: " << (n_frame / total_dec_time) * 1000 << std::endl;
|
||||
}
|
||||
} catch (const std::exception &ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,593 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "colorspace_kernels.h"
|
||||
#include "roc_video_dec.h"
|
||||
|
||||
__constant__ float yuv_to_rgb_mat[3][3];
|
||||
__constant__ float rgb_to_yuv_mat[3][3];
|
||||
|
||||
|
||||
void inline GetColMatCoefficients(int col_standard, float &wr, float &wb, int &black, int &white, int &max) {
|
||||
black = 16; white = 235;
|
||||
max = 255;
|
||||
|
||||
switch (col_standard)
|
||||
{
|
||||
case ColorSpaceStandard_BT709:
|
||||
default:
|
||||
wr = 0.2126f; wb = 0.0722f;
|
||||
break;
|
||||
|
||||
case ColorSpaceStandard_FCC:
|
||||
wr = 0.30f; wb = 0.11f;
|
||||
break;
|
||||
|
||||
case ColorSpaceStandard_BT470:
|
||||
case ColorSpaceStandard_BT601:
|
||||
wr = 0.2990f; wb = 0.1140f;
|
||||
break;
|
||||
|
||||
case ColorSpaceStandard_SMPTE240M:
|
||||
wr = 0.212f; wb = 0.087f;
|
||||
break;
|
||||
|
||||
case ColorSpaceStandard_BT2020:
|
||||
case ColorSpaceStandard_BT2020C:
|
||||
wr = 0.2627f; wb = 0.0593f;
|
||||
// 10-bit only
|
||||
black = 64 << 6; white = 940 << 6;
|
||||
max = (1 << 16) - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void SetMatYuv2Rgb(int col_standard) {
|
||||
float wr, wb;
|
||||
int black, white, max;
|
||||
GetColMatCoefficients(col_standard, wr, wb, black, white, max);
|
||||
float mat[3][3] = {
|
||||
1.0f, 0.0f, (1.0f - wr) / 0.5f,
|
||||
1.0f, -wb * (1.0f - wb) / 0.5f / (1 - wb - wr), -wr * (1 - wr) / 0.5f / (1 - wb - wr),
|
||||
1.0f, (1.0f - wb) / 0.5f, 0.0f,
|
||||
};
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int j = 0; j < 3; j++) {
|
||||
mat[i][j] = (float)(1.0 * max / (white - black) * mat[i][j]);
|
||||
}
|
||||
}
|
||||
HIP_API_CALL(hipMemcpyToSymbol(yuv_to_rgb_mat, mat, sizeof(mat)));
|
||||
}
|
||||
|
||||
void SetMatRgb2Yuv(int col_standard) {
|
||||
float wr, wb;
|
||||
int black, white, max;
|
||||
GetColMatCoefficients(col_standard, wr, wb, black, white, max);
|
||||
float mat[3][3] = {
|
||||
wr, 1.0f - wb - wr, wb,
|
||||
-0.5f * wr / (1.0f - wb), -0.5f * (1 - wb - wr) / (1.0f - wb), 0.5f,
|
||||
0.5f, -0.5f * (1.0f - wb - wr) / (1.0f - wr), -0.5f * wb / (1.0f - wr),
|
||||
};
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int j = 0; j < 3; j++) {
|
||||
mat[i][j] = (float)(1.0 * (white - black) / max * mat[i][j]);
|
||||
}
|
||||
}
|
||||
HIP_API_CALL(hipMemcpyToSymbol(rgb_to_yuv_mat, mat, sizeof(mat)));
|
||||
}
|
||||
|
||||
template<class T>
|
||||
__device__ static T Clamp(T x, T lower, T upper) {
|
||||
return x < lower ? lower : (x > upper ? upper : x);
|
||||
}
|
||||
|
||||
template<class Rgb, class YuvUnit>
|
||||
__device__ inline Rgb YuvToRgbForPixel(YuvUnit y, YuvUnit u, YuvUnit v) {
|
||||
const int
|
||||
low = 1 << (sizeof(YuvUnit) * 8 - 4),
|
||||
mid = 1 << (sizeof(YuvUnit) * 8 - 1);
|
||||
float fy = (int)y - low, fu = (int)u - mid, fv = (int)v - mid;
|
||||
const float maxf = (1 << sizeof(YuvUnit) * 8) - 1.0f;
|
||||
YuvUnit
|
||||
r = (YuvUnit)Clamp(yuv_to_rgb_mat[0][0] * fy + yuv_to_rgb_mat[0][1] * fu + yuv_to_rgb_mat[0][2] * fv, 0.0f, maxf),
|
||||
g = (YuvUnit)Clamp(yuv_to_rgb_mat[1][0] * fy + yuv_to_rgb_mat[1][1] * fu + yuv_to_rgb_mat[1][2] * fv, 0.0f, maxf),
|
||||
b = (YuvUnit)Clamp(yuv_to_rgb_mat[2][0] * fy + yuv_to_rgb_mat[2][1] * fu + yuv_to_rgb_mat[2][2] * fv, 0.0f, maxf);
|
||||
|
||||
Rgb rgb{};
|
||||
const int nShift = abs((int)sizeof(YuvUnit) - (int)sizeof(rgb.c.r)) * 8;
|
||||
if (sizeof(YuvUnit) >= sizeof(rgb.c.r)) {
|
||||
rgb.c.r = r >> nShift;
|
||||
rgb.c.g = g >> nShift;
|
||||
rgb.c.b = b >> nShift;
|
||||
} else {
|
||||
rgb.c.r = r << nShift;
|
||||
rgb.c.g = g << nShift;
|
||||
rgb.c.b = b << nShift;
|
||||
}
|
||||
return rgb;
|
||||
}
|
||||
|
||||
// yuv to RGBA (32/64 bit)
|
||||
template<class YuvUnitx2, class Rgb, class RgbIntx2>
|
||||
__global__ static void YuvToRgbaKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgb, int rgb_pitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
|
||||
if (x + 1 >= width || y + 1 >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
uint8_t *p_dst = dp_rgb + x * sizeof(Rgb) + y * rgb_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 l1 = *(YuvUnitx2 *)(p_src + yuv_pitch);
|
||||
YuvUnitx2 ch = *(YuvUnitx2 *)(p_src + (v_pitch - y / 2) * yuv_pitch);
|
||||
|
||||
*(RgbIntx2 *)p_dst = RgbIntx2 {
|
||||
YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y).d,
|
||||
YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y).d,
|
||||
};
|
||||
*(RgbIntx2 *)(p_dst + rgb_pitch) = RgbIntx2 {
|
||||
YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y).d,
|
||||
YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y).d,
|
||||
};
|
||||
}
|
||||
|
||||
// yuv to RGB (24/48 bit)
|
||||
template<class YuvUnitx2, class Rgb, class RgbInt1, class RgbInt2>
|
||||
__global__ static void YuvToRgbKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgb, int rgb_pitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
|
||||
if (x + 1 >= width || y + 1 >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
uint8_t *p_dst = dp_rgb + x * sizeof(Rgb) + y * rgb_pitch;
|
||||
uint8_t *p_dst1 = p_dst + rgb_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 l1 = *(YuvUnitx2 *)(p_src + yuv_pitch);
|
||||
YuvUnitx2 ch = *(YuvUnitx2 *)(p_src + (v_pitch - y / 2) * yuv_pitch);
|
||||
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
|
||||
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
|
||||
rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
|
||||
rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
|
||||
|
||||
*(RgbInt1 *)p_dst = RgbInt1 { rgb0.v.x, rgb0.v.y, rgb0.v.z, rgb1.v.x };
|
||||
*(RgbInt2 *)(p_dst + sizeof(RgbInt1)) = RgbInt2 { rgb1.v.y, rgb1.v.z };
|
||||
*(RgbInt1 *)(p_dst1) = RgbInt1 { rgb2.v.x, rgb2.v.y, rgb2.v.z, rgb3.v.x };
|
||||
*(RgbInt2 *)(p_dst1 + sizeof(RgbInt1)) = RgbInt2 { rgb3.v.y, rgb3.v.z };
|
||||
}
|
||||
|
||||
// yuv444 to RGBA (32/64 bit)
|
||||
template<class YuvUnitx2, class Rgb, class RgbIntx2>
|
||||
__global__ static void Yuv444ToRgbaKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgb, int rgb_pitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y);
|
||||
if (x + 1 >= width || y >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
uint8_t *p_dst = dp_rgb + x * sizeof(Rgb) + y * rgb_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 ch1 = *(YuvUnitx2 *)(p_src + (v_pitch * yuv_pitch));
|
||||
YuvUnitx2 ch2 = *(YuvUnitx2 *)(p_src + (2 * v_pitch * yuv_pitch));
|
||||
|
||||
*(RgbIntx2 *)p_dst = RgbIntx2{
|
||||
YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x).d,
|
||||
YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y).d,
|
||||
};
|
||||
}
|
||||
|
||||
// yuv444 to RGB (24/48 bit)
|
||||
template<class YuvUnitx2, class Rgb, class RgbInt1, class RgbInt2>
|
||||
__global__ static void Yuv444ToRgbKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgb, int rgb_pitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y);
|
||||
if (x + 1 >= width || y >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
uint8_t *p_dst = dp_rgb + x * sizeof(Rgb) + y * rgb_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 ch1 = *(YuvUnitx2 *)(p_src + (v_pitch * yuv_pitch));
|
||||
YuvUnitx2 ch2 = *(YuvUnitx2 *)(p_src + (2 * v_pitch * yuv_pitch));
|
||||
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
|
||||
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
|
||||
|
||||
*(RgbInt1 *)p_dst = RgbInt1 { rgb0.v.x, rgb0.v.y, rgb0.v.z, rgb1.v.x };
|
||||
*(RgbInt2 *)(p_dst + sizeof(RgbInt1)) = RgbInt2 { rgb1.v.y, rgb1.v.z };
|
||||
}
|
||||
|
||||
|
||||
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
|
||||
__global__ static void YuvToRgbPlanarKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgbp, int nRgbpPitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
|
||||
if (x + 1 >= width || y + 1 >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 l1 = *(YuvUnitx2 *)(p_src + yuv_pitch);
|
||||
YuvUnitx2 ch = *(YuvUnitx2 *)(p_src + (v_pitch - y / 2) * yuv_pitch);
|
||||
|
||||
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch.x, ch.y),
|
||||
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch.x, ch.y),
|
||||
rgb2 = YuvToRgbForPixel<Rgb>(l1.x, ch.x, ch.y),
|
||||
rgb3 = YuvToRgbForPixel<Rgb>(l1.y, ch.x, ch.y);
|
||||
|
||||
uint8_t *p_dst = dp_rgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2 {rgb0.v.x, rgb1.v.x};
|
||||
*(RgbUnitx2 *)(p_dst + nRgbpPitch) = RgbUnitx2 {rgb2.v.x, rgb3.v.x};
|
||||
p_dst += nRgbpPitch * height;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2 {rgb0.v.y, rgb1.v.y};
|
||||
*(RgbUnitx2 *)(p_dst + nRgbpPitch) = RgbUnitx2 {rgb2.v.y, rgb3.v.y};
|
||||
p_dst += nRgbpPitch * height;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2 {rgb0.v.z, rgb1.v.z};
|
||||
*(RgbUnitx2 *)(p_dst + nRgbpPitch) = RgbUnitx2 {rgb2.v.z, rgb3.v.z};
|
||||
}
|
||||
|
||||
template<class YuvUnitx2, class Rgb, class RgbUnitx2>
|
||||
__global__ static void Yuv444ToRgbPlanarKernel(uint8_t *dp_yuv, int yuv_pitch, uint8_t *dp_rgbp, int nRgbpPitch, int width, int height, int v_pitch) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y);
|
||||
if (x + 1 >= width || y >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
|
||||
YuvUnitx2 l0 = *(YuvUnitx2 *)p_src;
|
||||
YuvUnitx2 ch1 = *(YuvUnitx2 *)(p_src + (v_pitch * yuv_pitch));
|
||||
YuvUnitx2 ch2 = *(YuvUnitx2 *)(p_src + (2 * v_pitch * yuv_pitch));
|
||||
|
||||
Rgb rgb0 = YuvToRgbForPixel<Rgb>(l0.x, ch1.x, ch2.x),
|
||||
rgb1 = YuvToRgbForPixel<Rgb>(l0.y, ch1.y, ch2.y);
|
||||
|
||||
|
||||
uint8_t *p_dst = dp_rgbp + x * sizeof(RgbUnitx2) / 2 + y * nRgbpPitch;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2{ rgb0.v.x, rgb1.v.x };
|
||||
|
||||
p_dst += nRgbpPitch * height;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2{ rgb0.v.y, rgb1.v.y };
|
||||
|
||||
p_dst += nRgbpPitch * height;
|
||||
*(RgbUnitx2 *)p_dst = RgbUnitx2{ rgb0.v.z, rgb1.v.z };
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void Nv12ToColor32(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbaKernel<uchar2, COLOR32, uint2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_nv12, nv12_pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR64>
|
||||
void Nv12ToColor64(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbaKernel<uchar2, COLOR64, ulonglong2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_nv12, nv12_pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444ToColor32(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbaKernel<uchar2, COLOR32, uint2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR64>
|
||||
void YUV444ToColor64(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbaKernel<uchar2, COLOR64, ulonglong2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void P016ToColor32(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbaKernel<ushort2, COLOR32, uint2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_p016, p016_pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR64>
|
||||
void P016ToColor64(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbaKernel<ushort2, COLOR64, ulonglong2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_p016, p016_pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444P16ToColor32(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbaKernel<ushort2, COLOR32, uint2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR64>
|
||||
void YUV444P16ToColor64(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbaKernel<ushort2, COLOR64, ulonglong2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void Nv12ToColorPlanar(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbPlanarKernel<uchar2, COLOR32, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_nv12, nv12_pitch, dp_bgrp, nBgrpPitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void P016ToColorPlanar(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbPlanarKernel<ushort2, COLOR32, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_p016, p016_pitch, dp_bgrp, nBgrpPitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444ToColorPlanar(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbPlanarKernel<uchar2, COLOR32, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgrp, nBgrpPitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444P16ToColorPlanar(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbPlanarKernel<ushort2, COLOR32, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgrp, nBgrpPitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
// Explicit Instantiation: for RGB32/BGR32 and RGB64/BGR64 formats
|
||||
template void Nv12ToColor32<BGRA32>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor32<RGBA32>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor64<BGRA64>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor64<RGBA64>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor32<BGRA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor32<RGBA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor64<BGRA64>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor64<RGBA64>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor32<BGRA32>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor32<RGBA32>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor64<BGRA64>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor64<RGBA64>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor32<BGRA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor32<RGBA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor64<BGRA64>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor64<RGBA64>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColorPlanar<BGRA32>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColorPlanar<RGBA32>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColorPlanar<BGRA32>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColorPlanar<RGBA32>(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColorPlanar<BGRA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColorPlanar<RGBA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColorPlanar<BGRA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColorPlanar<RGBA32>(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgrp, int nBgrpPitch, int width, int height, int v_pitch, int col_standard);
|
||||
|
||||
template <class COLOR24>
|
||||
void Nv12ToColor24(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbKernel<uchar2, COLOR24, uchar4, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_nv12, nv12_pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR48>
|
||||
void Nv12ToColor48(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbKernel<uchar2, COLOR48, ushort4, ushort2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_nv12, nv12_pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR24>
|
||||
void YUV444ToColor24(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbKernel<uchar2, COLOR24, uchar4, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR48>
|
||||
void YUV444ToColor48(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbKernel<uchar2, COLOR48, ushort4, ushort2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR24>
|
||||
void P016ToColor24(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbKernel<ushort2, COLOR24, uchar4, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_p016, p016_pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR48>
|
||||
void P016ToColor48(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
YuvToRgbKernel<ushort2, COLOR48, ushort4, ushort2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_p016, p016_pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR24>
|
||||
void YUV444P16ToColor24(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbKernel<ushort2, COLOR24, uchar4, uchar2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgra, bgra_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
template <class COLOR48>
|
||||
void YUV444P16ToColor48(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard) {
|
||||
SetMatYuv2Rgb(col_standard);
|
||||
Yuv444ToRgbKernel<ushort2, COLOR48, ushort4, ushort2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2), dim3(32, 2) >>>
|
||||
(dp_yuv_444, pitch, dp_bgr, bgr_pitch, width, height, v_pitch);
|
||||
}
|
||||
|
||||
|
||||
// Explicit Instantiation: for RGB24/BGR24 and RGB48/BGR48 formats
|
||||
template void Nv12ToColor24<BGR24>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor24<RGB24>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor48<BGR48>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void Nv12ToColor48<RGB48>(uint8_t *dp_nv12, int nv12_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor24<BGR24>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor24<RGB24>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor48<BGR48>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444ToColor48<RGB48>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor24<BGR24>(uint8_t *dp_p016, int p016_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor24<RGB24>(uint8_t *dp_p016, int p016_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor48<BGR48>(uint8_t *dp_p016, int p016_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void P016ToColor48<RGB48>(uint8_t *dp_p016, int p016_pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor24<BGR24>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor24<RGB24>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor48<BGR48>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
template void YUV444P16ToColor48<RGB48>(uint8_t *dp_yuv_444, int pitch, uint8_t *p_bgr, int p_bgrPitch, int width, int height, int v_pitch, int col_standard);
|
||||
|
||||
|
||||
template<class YuvUnit, class RgbUnit>
|
||||
__device__ inline YuvUnit RgbToY(RgbUnit r, RgbUnit g, RgbUnit b) {
|
||||
const YuvUnit low = 1 << (sizeof(YuvUnit) * 8 - 4);
|
||||
return rgb_to_yuv_mat[0][0] * r + rgb_to_yuv_mat[0][1] * g + rgb_to_yuv_mat[0][2] * b + low;
|
||||
}
|
||||
|
||||
template<class YuvUnit, class RgbUnit>
|
||||
__device__ inline YuvUnit RgbToU(RgbUnit r, RgbUnit g, RgbUnit b) {
|
||||
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
|
||||
return rgb_to_yuv_mat[1][0] * r + rgb_to_yuv_mat[1][1] * g + rgb_to_yuv_mat[1][2] * b + mid;
|
||||
}
|
||||
|
||||
template<class YuvUnit, class RgbUnit>
|
||||
__device__ inline YuvUnit RgbToV(RgbUnit r, RgbUnit g, RgbUnit b) {
|
||||
const YuvUnit mid = 1 << (sizeof(YuvUnit) * 8 - 1);
|
||||
return rgb_to_yuv_mat[2][0] * r + rgb_to_yuv_mat[2][1] * g + rgb_to_yuv_mat[2][2] * b + mid;
|
||||
}
|
||||
|
||||
template<class YuvUnitx2, class Rgb, class RgbIntx2>
|
||||
__global__ static void RgbaToYuvKernel(uint8_t *dp_rgb, int rgba_pitch, uint8_t *dp_yuv, int yuv_pitch, int width, int height) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
|
||||
if (x + 1 >= width || y + 1 >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_rgb + x * sizeof(Rgb) + y * rgba_pitch;
|
||||
RgbIntx2 int2a = *(RgbIntx2 *)p_src;
|
||||
RgbIntx2 int2b = *(RgbIntx2 *)(p_src + rgba_pitch);
|
||||
|
||||
Rgb rgb[4] = {int2a.x, int2a.y, int2b.x, int2b.y};
|
||||
decltype(Rgb::c.r)
|
||||
r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
|
||||
g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
|
||||
b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
|
||||
|
||||
uint8_t *p_dst = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
*(YuvUnitx2 *)p_dst = YuvUnitx2 {
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
|
||||
};
|
||||
*(YuvUnitx2 *)(p_dst + yuv_pitch) = YuvUnitx2 {
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
|
||||
};
|
||||
*(YuvUnitx2 *)(p_dst + (height - y / 2) * yuv_pitch) = YuvUnitx2 {
|
||||
RgbToU<decltype(YuvUnitx2::x)>(r, g, b),
|
||||
RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
|
||||
};
|
||||
}
|
||||
|
||||
void Bgra64ToP016(uint8_t *dp_bgra, int bgra_pitch, uint8_t *dp_p016, int p016_pitch, int width, int height, int col_standard) {
|
||||
SetMatRgb2Yuv(col_standard);
|
||||
RgbaToYuvKernel<ushort2, BGRA64, ulonglong2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(dp_bgra, bgra_pitch, dp_p016, p016_pitch, width, height);
|
||||
}
|
||||
|
||||
template<class YuvUnitx2, class Rgb, class RgbInt1, class RgbInt2>
|
||||
__global__ static void RgbToYuvKernel(uint8_t *dp_rgb, int rgb_pitch, uint8_t *dp_yuv, int yuv_pitch, int width, int height) {
|
||||
int x = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
|
||||
int y = (threadIdx.y + blockIdx.y * blockDim.y) * 2;
|
||||
if (x + 1 >= width || y + 1 >= height) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t *p_src = dp_rgb + x * sizeof(Rgb) + y * rgb_pitch;
|
||||
RgbInt1 int1a = *(RgbInt1 *)p_src;
|
||||
RgbInt2 int2a = *(RgbInt2 *)(p_src + sizeof(RgbInt1));
|
||||
RgbInt1 int1b = *(RgbInt1 *)(p_src + rgb_pitch);
|
||||
RgbInt2 int2b = *(RgbInt2 *)(p_src + rgb_pitch + sizeof(RgbInt1));
|
||||
|
||||
Rgb rgb[4];
|
||||
rgb[0].v = {int1a.x, int1a.y, int1a.z},
|
||||
rgb[1].v = {int1a.w, int2a.x, int2a.y},
|
||||
rgb[2].v = {int1b.x, int1b.y, int1b.z},
|
||||
rgb[3].v = {int1b.w, int2b.x, int2b.y};
|
||||
decltype(Rgb::c.r)
|
||||
r = (rgb[0].c.r + rgb[1].c.r + rgb[2].c.r + rgb[3].c.r) / 4,
|
||||
g = (rgb[0].c.g + rgb[1].c.g + rgb[2].c.g + rgb[3].c.g) / 4,
|
||||
b = (rgb[0].c.b + rgb[1].c.b + rgb[2].c.b + rgb[3].c.b) / 4;
|
||||
|
||||
uint8_t *p_dst = dp_yuv + x * sizeof(YuvUnitx2) / 2 + y * yuv_pitch;
|
||||
*(YuvUnitx2 *)p_dst = YuvUnitx2 {
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[0].c.r, rgb[0].c.g, rgb[0].c.b),
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[1].c.r, rgb[1].c.g, rgb[1].c.b),
|
||||
};
|
||||
*(YuvUnitx2 *)(p_dst + yuv_pitch) = YuvUnitx2 {
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[2].c.r, rgb[2].c.g, rgb[2].c.b),
|
||||
RgbToY<decltype(YuvUnitx2::x)>(rgb[3].c.r, rgb[3].c.g, rgb[3].c.b),
|
||||
};
|
||||
*(YuvUnitx2 *)(p_dst + (height - y / 2) * yuv_pitch) = YuvUnitx2 {
|
||||
RgbToU<decltype(YuvUnitx2::x)>(r, g, b),
|
||||
RgbToV<decltype(YuvUnitx2::x)>(r, g, b),
|
||||
};
|
||||
}
|
||||
|
||||
void Bgr48ToP016(uint8_t *p_bgr, int bgr_pitch, uint8_t *dp_p016, int p016_pitch, int width, int height, int col_standard) {
|
||||
SetMatRgb2Yuv(col_standard);
|
||||
RgbToYuvKernel<ushort2, BGR48, ushort4, ushort2>
|
||||
<<<dim3((width + 63) / 32 / 2, (height + 3) / 2 / 2), dim3(32, 2)>>>
|
||||
(p_bgr, bgr_pitch, dp_p016, p016_pitch, width, height);
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
/*!
|
||||
* \file
|
||||
* \brief The AMD Color Space Standards for VCN Decode Library.
|
||||
*
|
||||
* \defgroup group_amd_vcn_colorspace colorSpace: AMD VCN Color Space API
|
||||
* \brief AMD The vcnDECODE Color Space API.
|
||||
*/
|
||||
|
||||
typedef enum ColorSpaceStandard_ {
|
||||
ColorSpaceStandard_BT709 = 1,
|
||||
ColorSpaceStandard_Unspecified = 2,
|
||||
ColorSpaceStandard_Reserved = 3,
|
||||
ColorSpaceStandard_FCC = 4,
|
||||
ColorSpaceStandard_BT470 = 5,
|
||||
ColorSpaceStandard_BT601 = 6,
|
||||
ColorSpaceStandard_SMPTE240M = 7,
|
||||
ColorSpaceStandard_YCgCo = 8,
|
||||
ColorSpaceStandard_BT2020 = 9,
|
||||
ColorSpaceStandard_BT2020C = 10
|
||||
} ColorSpaceStandard;
|
||||
|
||||
union BGR24 {
|
||||
uchar3 v;
|
||||
struct {
|
||||
uint8_t b, g, r;
|
||||
} c;
|
||||
};
|
||||
|
||||
union RGB24 {
|
||||
uchar3 v;
|
||||
struct {
|
||||
uint8_t r, g, b;
|
||||
} c;
|
||||
};
|
||||
|
||||
union BGR48 {
|
||||
ushort3 v;
|
||||
struct {
|
||||
uint16_t b, g, r;
|
||||
} c;
|
||||
};
|
||||
|
||||
union RGB48 {
|
||||
ushort3 v;
|
||||
struct {
|
||||
uint16_t r, g, b;
|
||||
} c;
|
||||
};
|
||||
|
||||
union BGRA32 {
|
||||
uint32_t d;
|
||||
uchar4 v;
|
||||
struct {
|
||||
uint8_t b, g, r, a;
|
||||
} c;
|
||||
};
|
||||
|
||||
union RGBA32 {
|
||||
uint32_t d;
|
||||
uchar4 v;
|
||||
struct {
|
||||
uint8_t r, g, b, a;
|
||||
} c;
|
||||
};
|
||||
|
||||
union BGRA64 {
|
||||
uint64_t d;
|
||||
ushort4 v;
|
||||
struct {
|
||||
uint16_t b, g, r, a;
|
||||
} c;
|
||||
};
|
||||
|
||||
union RGBA64 {
|
||||
uint64_t d;
|
||||
ushort4 v;
|
||||
struct {
|
||||
uint16_t r, g, b, a;
|
||||
} c;
|
||||
};
|
||||
|
||||
// color-convert hip kernel function definitions
|
||||
template <class COLOR32>
|
||||
void YUV444ToColor32(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR64>
|
||||
void YUV444ToColor64(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR24>
|
||||
void YUV444ToColor24(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR48>
|
||||
void YUV444ToColor48(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
|
||||
template <class COLOR24>
|
||||
void Nv12ToColor24(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR32>
|
||||
void Nv12ToColor32(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR48>
|
||||
void Nv12ToColor48(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR64>
|
||||
void Nv12ToColor64(uint8_t *dp_nv12, int nv12_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR24>
|
||||
void YUV444P16ToColor24(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR48>
|
||||
void YUV444P16ToColor48(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR32>
|
||||
void YUV444P16ToColor32(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR64>
|
||||
void YUV444P16ToColor64(uint8_t *dp_yuv_444, int pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR32>
|
||||
void P016ToColor32(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR64>
|
||||
void P016ToColor64(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgra, int bgra_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR24>
|
||||
void P016ToColor24(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
template <class COLOR48>
|
||||
void P016ToColor48(uint8_t *dp_p016, int p016_pitch, uint8_t *dp_bgr, int bgr_pitch, int width, int height, int v_pitch, int col_standard);
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#define TOSTR(X) std::to_string(static_cast<int>(X))
|
||||
#define STR(X) std::string(X)
|
||||
|
||||
#if DBGINFO
|
||||
#define INFO(X) std::clog << "[INF] " << " {" << __func__ <<"} " << " " << X << std::endl;
|
||||
#else
|
||||
#define INFO(X) ;
|
||||
#endif
|
||||
#define ERR(X) std::cerr << "[ERR] " << " {" << __func__ <<"} " << " " << X << std::endl;
|
||||
|
||||
|
||||
class rocVideoDecodeException : public std::exception {
|
||||
public:
|
||||
|
||||
explicit rocVideoDecodeException(const std::string& message):_message(message){}
|
||||
virtual const char* what() const throw() override {
|
||||
return _message.c_str();
|
||||
}
|
||||
private:
|
||||
std::string _message;
|
||||
};
|
||||
|
||||
#define THROW(X) throw rocVideoDecodeException(" { "+std::string(__func__)+" } " + X);
|
||||
@@ -158,6 +158,9 @@ class RocVideoDecoder {
|
||||
~RocVideoDecoder();
|
||||
|
||||
rocDecVideoCodec GetCodecId() { return codec_id_; }
|
||||
|
||||
hipStream_t GetStream() {return hip_stream_;}
|
||||
|
||||
/**
|
||||
* @brief Get the output frame width
|
||||
*/
|
||||
|
||||
Ссылка в новой задаче
Block a user