From e62aa3e09bb1b3e2d0729603463fd98f9c4a76b1 Mon Sep 17 00:00:00 2001 From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com> Date: Thu, 5 Dec 2024 09:46:24 -0500 Subject: [PATCH] Added the bit stream reader feature. (#433) * * rocDecode/ES parser: Added elementary stream file parser for HEVC and AVC. * * rocDecode/ES parser: Added elementary stream file parser for AV1. Also cleaned up the bitstream ring buffer code. * * rocDecode/ES parser: Added the IVF container file parser for AV1. Also fixed a bug in fill ring buffer function. * * rocDecode/ES file parder: Added supported stream type detection. - The stream type detection checks the unique syntax patterns of the stream type and calculate the likeliheed score. Based on the score, the most likely type is determined. - The current supported stream types are: AVC/HEVC/AV1 elementary streams, IVF AV1 streams. * * rocDecode/ES file parser: Fixed an AVC decode regression due to a copy and paste error. * * rocDecode/ES file parser: Added bit depth parsing for codec support check; Added stronger AV1 detection for IVF AV1 stream type. * * rocDecode/ES file parser: Removed debugging logs. * * rocDecode/ES file parser: Added exmaple code to use the built-in file parser. * * rocDecode/Bitstream reader: Renamed the elementary parser feature to bitstream reader and re-organized the code. - Moved the bitstream reader code to rocDecode core lib from utility. - Added bitstream reader interface in parallel with rocDecode parser and decoder interfaces. * * rocDecode/Bitstream reader: Added sample to use bitstream reader, instead of FFMPEG demuxer, to get picture data. Also reverted the original sample app back to using FFMPEG demuxer only. * * rocDecode/Bitstream reader: Renamed the new sample app. * * rocDecode/Bitstream reader: FFMPEG dependency reduction. - Moved MD5 functions out of RocVideoDecoder utility class. This removed RocVideoDecoder's dependency on FFMPEG. - Added the new MD5 utility, which depends on FFMPEG lib. MD5 message digest generation is now performed in the MD5 utility. - Modified decode sampples that uses MD5 generation function. - Removed FFMPEG dependency from video decoder basic sample. * * rocDecode/Bitstream reader: Added option to use bitstream reader to video decode sample and conformance test script. Added the missing destroy bitstream reader call in video decode basic sample. * * rocDecode/Bitstream reader: Minor format change. No functional changes. * * rocDecode/Bitstream reader: Added handling of unsupported stream file type by the bitstream reader to decode sample apps. * * rocDecode/Bitstream reader: Fixed build errors of several samples. * * rocDecode/Bitstream reader: Added changes based on review comments. * * rocDecode/Bitstream reader: File name changes based on review comments. * * rocDecode/Bitstream reader: Moved MD5 code into single header file. Added changes based on review comments. * * rocDecode/Bitstream reader: Removed redundant path. * * rocDecode/Bitstream reader: Changed rocDecode version to 0.10.0. Added minor changes based on review comments. --------- Co-authored-by: Kiriti Gowda --- CHANGELOG.md | 31 + CMakeLists.txt | 5 +- api/roc_bitstream_reader.h | 85 ++ api/rocdecode_version.h | 2 +- samples/common.h | 7 +- samples/videoDecode/README.md | 1 + samples/videoDecode/videodecode.cpp | 112 +- samples/videoDecodeMem/videodecodemem.cpp | 10 +- samples/videoDecodeRGB/videodecrgb.cpp | 14 +- samples/videoDecodeRaw/CMakeLists.txt | 86 ++ samples/videoDecodeRaw/README.md | 29 + samples/videoDecodeRaw/videodecoderaw.cpp | 322 +++++ src/bit_stream_reader/bs_reader_handle.h | 45 + src/bit_stream_reader/es_reader.cpp | 1145 +++++++++++++++++ src/bit_stream_reader/es_reader.h | 236 ++++ src/bit_stream_reader/roc_bs_reader_api.cpp | 100 ++ test/testScripts/run_rocDecode_Conformance.py | 10 +- utils/md5.h | 161 +++ utils/rocvideodecode/roc_video_dec.cpp | 111 +- utils/rocvideodecode/roc_video_dec.h | 49 +- 20 files changed, 2374 insertions(+), 187 deletions(-) create mode 100644 api/roc_bitstream_reader.h create mode 100644 samples/videoDecodeRaw/CMakeLists.txt create mode 100644 samples/videoDecodeRaw/README.md create mode 100644 samples/videoDecodeRaw/videodecoderaw.cpp create mode 100644 src/bit_stream_reader/bs_reader_handle.h create mode 100644 src/bit_stream_reader/es_reader.cpp create mode 100644 src/bit_stream_reader/es_reader.h create mode 100644 src/bit_stream_reader/roc_bs_reader_api.cpp create mode 100644 utils/md5.h diff --git a/CHANGELOG.md b/CHANGELOG.md index 64ef5a8185..c888ecf428 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,37 @@ Full documentation for rocDecode is available at [https://rocm.docs.amd.com/projects/rocDecode/en/latest/](https://rocm.docs.amd.com/projects/rocDecode/en/latest/) +## (Unreleased) rocDecode 0.10.0 + +### Added + +* The new bitstream reader feature. The bitstream reader contains a few built-in stream file parsers, including elementary stream file parser and IVF container file parser. Currently the reader can parse AVC, HEVC and AV1 elementary stream files and AV1 IVF container files. More format support will be added in the future. +* A new sample app, called videodecoderaw which uses the bitstream reader instead of FFMPEG demuxer to get picture data. + +### Changed + +* Moved MD5 code out of roc video decode utility. + +### Removed + +* + +### Resolved issues + +* + +### Tested configurations + +* Linux + * Ubuntu - `22.04` / `24.04` + * RHEL - `8` / `9` + * SLES - `15 SP5` +* ROCm: `6.3.0` +* libva-amdgpu-dev - `2.16.0` +* mesa-amdgpu-va-drivers - `1:24.3.0` +* FFmpeg - `4.4.2` / `6.1.1` +* rocDecode Setup Script - `V2.4.0` + ## (Unreleased) rocDecode 0.9.0 ### Changed diff --git a/CMakeLists.txt b/CMakeLists.txt index 80eb085ed7..4a9ddee7bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ if (NOT DEFINED CMAKE_CXX_COMPILER) endif() # NOTE: Match version with api/rocdecode_version.h -set(VERSION "0.9.0") +set(VERSION "0.10.0") set(CMAKE_CXX_STANDARD 17) # Set Project Version and Language @@ -162,7 +162,7 @@ if(HIP_FOUND AND Libva_FOUND) install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY) install(TARGETS ${PROJECT_NAME} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan) # install rocDecode include files -- {ROCM_PATH}/include/rocdecode - install(FILES api/rocdecode.h api/rocparser.h api/rocdecode_version.h + install(FILES api/rocdecode.h api/rocparser.h api/roc_bitstream_reader.h api/rocdecode_version.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} COMPONENT dev) # install rocDecode api trace include file -- {ROCM_PATH}/include/rocdecode/amd_detail install(FILES api/amd_detail/rocdecode_api_trace.h @@ -178,6 +178,7 @@ if(HIP_FOUND AND Libva_FOUND) install(FILES samples/videoDecodeRGB/CMakeLists.txt samples/videoDecodeRGB/README.md samples/videoDecodeRGB/videodecrgb.cpp DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/videoDecodeRGB COMPONENT dev) install(FILES samples/videoDecodeBatch/CMakeLists.txt samples/videoDecodeBatch/README.md samples/videoDecodeBatch/videodecodebatch.cpp DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/videoDecodeBatch COMPONENT dev) install(FILES samples/common.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev) + install(FILES utils/md5.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/utils COMPONENT dev) install(FILES utils/video_demuxer.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/utils COMPONENT dev) install(FILES utils/colorspace_kernels.cpp DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/utils COMPONENT dev) install(FILES utils/colorspace_kernels.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/utils COMPONENT dev) diff --git a/api/roc_bitstream_reader.h b/api/roc_bitstream_reader.h new file mode 100644 index 0000000000..8ed42aa496 --- /dev/null +++ b/api/roc_bitstream_reader.h @@ -0,0 +1,85 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "rocdecode.h" + +/*! + * \file + * \brief The AMD rocBitstreamReader Library. + * + * \defgroup group_roc_bitstream_reader rocDecode Parser: AMD ROCm Video Bitstream Reader API + * \brief AMD The rocBitstreamReader is a toolkit to read picture data from bitstream files for decoding on AMD’s GPUs. + */ + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +/*********************************************************************************/ +//! HANDLE of rocBitstreamReader +//! Used in subsequent API calls after rocDecCreateBitstreamReader +/*********************************************************************************/ +typedef void *RocdecBitstreamReader; + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle, const char *input_file_path) +//! Create video bitstream reader object and initialize +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle, const char *input_file_path); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec *codec_type) +//! Get the codec type of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec *codec_type); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int *bit_depth) +//! Get the bit depth of the bitstream +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int *bit_depth); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, uint8_t **pic_data, int *pic_size, int64_t *pts) +//! Read one unit of picture data from the bitstream. The unit can be a frame or field for AVC/HEVC, +//! a temporal unit for AV1, or a frame (including superframe) for VP9. The picture data unit is pointed +//! by pic_data. The size of the unit is specified by pic_size. The presentation time stamp, if available, +//! is given by pts. +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, uint8_t **pic_data, int *pic_size, int64_t *pts); + +/************************************************************************************************/ +//! \ingroup group_roc_bitstream_reader +//! \fn rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle) +//! Destroy the video parser object +/************************************************************************************************/ +extern rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle); + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ diff --git a/api/rocdecode_version.h b/api/rocdecode_version.h index 403fc555e5..716e40a246 100644 --- a/api/rocdecode_version.h +++ b/api/rocdecode_version.h @@ -35,7 +35,7 @@ extern "C" { #endif /* NOTE: Match version with CMakeLists.txt */ #define ROCDECODE_MAJOR_VERSION 0 -#define ROCDECODE_MINOR_VERSION 9 +#define ROCDECODE_MINOR_VERSION 10 #define ROCDECODE_MICRO_VERSION 0 diff --git a/samples/common.h b/samples/common.h index 20be0db9c7..1fe16691fc 100644 --- a/samples/common.h +++ b/samples/common.h @@ -23,6 +23,7 @@ THE SOFTWARE. #pragma once #include "roc_video_dec.h" +#include "md5.h" typedef enum ReconfigFlushMode_enum { RECONFIG_FLUSH_MODE_NONE = 0, /**< Just flush to get the frame count */ @@ -34,6 +35,7 @@ typedef enum ReconfigFlushMode_enum { typedef struct ReconfigDumpFileStruct_t { bool b_dump_frames_to_file; std::string output_file_name; + void *md5_generator_handle; } ReconfigDumpFileStruct; @@ -53,13 +55,14 @@ int ReconfigureFlushCallback(void *p_viddec_obj, uint32_t flush_mode, void *p_us int64_t pts; while ((pframe = viddec->GetFrame(&pts))) { if (flush_mode != RECONFIG_FLUSH_MODE_NONE) { + ReconfigDumpFileStruct *p_dump_file_struct = static_cast(p_user_struct); if (flush_mode == ReconfigFlushMode::RECONFIG_FLUSH_MODE_DUMP_TO_FILE) { - ReconfigDumpFileStruct *p_dump_file_struct = static_cast(p_user_struct); if (p_dump_file_struct->b_dump_frames_to_file) { viddec->SaveFrameToFile(p_dump_file_struct->output_file_name, pframe, surf_info); } } else if (flush_mode == ReconfigFlushMode::RECONFIG_FLUSH_MODE_CALCULATE_MD5) { - viddec->UpdateMd5ForFrame(pframe, surf_info); + MD5Generator *md5_generator = static_cast(p_dump_file_struct->md5_generator_handle); + md5_generator->UpdateMd5ForFrame(pframe, surf_info); } } // release and flush frame diff --git a/samples/videoDecode/README.md b/samples/videoDecode/README.md index ef80b48ab3..6eace3c4ce 100644 --- a/samples/videoDecode/README.md +++ b/samples/videoDecode/README.md @@ -38,4 +38,5 @@ make -j -md5_check MD5_File_Path -crop -m + -no_ffmpeg_demux ``` \ No newline at end of file diff --git a/samples/videoDecode/videodecode.cpp b/samples/videoDecode/videodecode.cpp index eb7d67cfc2..69ae40a30f 100644 --- a/samples/videoDecode/videodecode.cpp +++ b/samples/videoDecode/videodecode.cpp @@ -37,6 +37,7 @@ THE SOFTWARE. #include #endif #include "video_demuxer.h" +#include "roc_bitstream_reader.h" #include "roc_video_dec.h" #include "ffmpeg_video_dec.h" #include "common.h" @@ -59,7 +60,8 @@ void ShowHelpAndExit(const char *option = NULL) { << "-seek_criteria - Demux seek criteria & value - optional; default - 0,0; " << "[0: no seek; 1: SEEK_CRITERIA_FRAME_NUM, frame number; 2: SEEK_CRITERIA_TIME_STAMP, frame number (time calculated internally)]" << std::endl << "-seek_mode - Seek to previous key frame or exact - optional; default - 0" - << "[0: SEEK_MODE_PREV_KEY_FRAME; 1: SEEK_MODE_EXACT_FRAME]" << std::endl; + << "[0: SEEK_MODE_PREV_KEY_FRAME; 1: SEEK_MODE_EXACT_FRAME]" << std::endl + << "-no_ffmpeg_demux - use the built-in bitstream reader instead of FFMPEG demuxer to obtain picture data; optional." << std::endl; exit(0); } @@ -85,6 +87,7 @@ int main(int argc, char **argv) { // seek options uint64_t seek_to_frame = 0; int seek_criteria = 0, seek_mode = 0; + bool b_use_ffmpeg_demuxer = true; // true by default to use FFMPEG demuxer. set to false to use the built-in bitstream reader. // Parse command-line arguments if(argc <= 1) { @@ -207,6 +210,13 @@ int main(int argc, char **argv) { ShowHelpAndExit("-seek_mode"); continue; } + if (!strcmp(argv[i], "-no_ffmpeg_demux")) { + if (i == argc) { + ShowHelpAndExit("-no_ffmpeg_demux"); + } + b_use_ffmpeg_demuxer = false; + continue; + } ShowHelpAndExit(argv[i]); } @@ -214,10 +224,38 @@ int main(int argc, char **argv) { try { std::size_t found_file = input_file_path.find_last_of('/'); std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; + VideoDemuxer *demuxer; + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id; + int bit_depth; + + if (b_use_ffmpeg_demuxer) { + std::cout << "info: Using FFMPEG demuxer" << std::endl; + demuxer = new VideoDemuxer(input_file_path.c_str()); + rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer->GetCodecID()); + bit_depth = demuxer->GetBitDepth(); + } else { + std::cout << "info: Using built-in bitstream reader" << std::endl; + if (rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if (rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if (rocdec_codec_id >= rocDecVideoCodec_NumCodecs) { + std::cerr << "Unsupported stream file type or codec type by the bitstream reader. Exiting." << std::endl; + return 1; + } + if (rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + } + RocVideoDecoder *viddec; - VideoDemuxer demuxer(input_file_path.c_str()); VideoSeekContext video_seek_ctx; - rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID()); if (!backend) // gpu backend viddec = new RocVideoDecoder(device_id, mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, b_extract_sei_messages, disp_delay); else { @@ -230,10 +268,10 @@ int main(int argc, char **argv) { viddec = new FFMpegVideoDecoder(device_id, mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, b_extract_sei_messages, disp_delay, true); } - if(!viddec->CodecSupported(device_id, rocdec_codec_id, demuxer.GetBitDepth())) { + if(!viddec->CodecSupported(device_id, rocdec_codec_id, bit_depth)) { std::cerr << "rocDecode doesn't support codec!" << std::endl; return 0; - } + } std::string device_name, gcn_arch_name; int pci_bus_id, pci_domain_id, pci_device_id; @@ -253,6 +291,8 @@ int main(int argc, char **argv) { uint32_t width, height; double total_dec_time = 0; bool first_frame = true; + MD5Generator *md5_generator = nullptr; + // initialize reconfigure params: the following is configured to dump to output which is relevant for this sample reconfig_params.p_fn_reconfigure_flush = ReconfigureFlushCallback; reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; @@ -267,32 +307,41 @@ int main(int argc, char **argv) { reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; if (b_generate_md5) { - viddec->InitMd5(); + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); + reconfig_user_struct.md5_generator_handle = static_cast(md5_generator); } viddec->SetReconfigParams(&reconfig_params); do { auto start_time = std::chrono::high_resolution_clock::now(); - if (seek_criteria == 1 && first_frame) { - // use VideoSeekContext class to seek to given frame number - video_seek_ctx.seek_frame_ = seek_to_frame; - video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM; - video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); - demuxer.Seek(video_seek_ctx, &pvideo, &n_video_bytes); - pts = video_seek_ctx.out_frame_pts_; - std::cout << "info: Number of frames that were decoded during seek - " << video_seek_ctx.num_frames_decoded_ << std::endl; - first_frame = false; - } else if (seek_criteria == 2 && first_frame) { - // use VideoSeekContext class to seek to given timestamp - video_seek_ctx.seek_frame_ = seek_to_frame; - video_seek_ctx.seek_crit_ = SEEK_CRITERIA_TIME_STAMP; - video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); - demuxer.Seek(video_seek_ctx, &pvideo, &n_video_bytes); - pts = video_seek_ctx.out_frame_pts_; - std::cout << "info: Duration of frame found after seek - " << video_seek_ctx.out_frame_duration_ << " ms" << std::endl; - first_frame = false; + if (b_use_ffmpeg_demuxer) { + if (seek_criteria == 1 && first_frame) { + // use VideoSeekContext class to seek to given frame number + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM; + video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer->Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Number of frames that were decoded during seek - " << video_seek_ctx.num_frames_decoded_ << std::endl; + first_frame = false; + } else if (seek_criteria == 2 && first_frame) { + // use VideoSeekContext class to seek to given timestamp + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_TIME_STAMP; + video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer->Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Duration of frame found after seek - " << video_seek_ctx.out_frame_duration_ << " ms" << std::endl; + first_frame = false; + } else { + demuxer->Demux(&pvideo, &n_video_bytes, &pts); + } } else { - demuxer.Demux(&pvideo, &n_video_bytes, &pts); + if (rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } } // Treat 0 bitstream size as end of stream indicator if (n_video_bytes == 0) { @@ -307,7 +356,7 @@ int main(int argc, char **argv) { for (int i = 0; i < n_frame_returned; i++) { pframe = viddec->GetFrame(&pts); if (b_generate_md5) { - viddec->UpdateMd5ForFrame(pframe, surf_info); + md5_generator->UpdateMd5ForFrame(pframe, surf_info); } if (dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) { viddec->SaveFrameToFile(output_file_path, pframe, surf_info); @@ -323,7 +372,6 @@ int main(int argc, char **argv) { if (num_decoded_frames && num_decoded_frames <= n_frame) { break; } - } while (n_video_bytes); n_frame += viddec->GetNumOfFlushedFrames(); @@ -343,7 +391,7 @@ int main(int argc, char **argv) { } if (b_generate_md5) { uint8_t *digest; - viddec->FinalizeMd5(&digest); + md5_generator->FinalizeMd5(&digest); std::cout << "MD5 message digest: "; for (int i = 0; i < 16; i++) { std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast(digest[i]); @@ -371,9 +419,15 @@ int main(int argc, char **argv) { } else { std::cout << "MD5 digest does not match the reference MD5 digest: "; } - std::cout << ref_md5_string << std::endl; + std::cout << ref_md5_string.c_str() << std::endl; ref_md5_file.close(); } + delete md5_generator; + } + if (b_use_ffmpeg_demuxer && demuxer) { + delete demuxer; + } else if (bs_reader) { + rocDecDestroyBitstreamReader(bs_reader); } } catch (const std::exception &ex) { std::cout << ex.what() << std::endl; diff --git a/samples/videoDecodeMem/videodecodemem.cpp b/samples/videoDecodeMem/videodecodemem.cpp index 8df099b958..0c3491c9d8 100644 --- a/samples/videoDecodeMem/videodecodemem.cpp +++ b/samples/videoDecodeMem/videodecodemem.cpp @@ -37,6 +37,7 @@ THE SOFTWARE. #endif #include "video_demuxer.h" #include "roc_video_dec.h" +#include "md5.h" class FileStreamProvider : public VideoDemuxer::StreamProvider { public: @@ -210,9 +211,11 @@ int main(int argc, char **argv) { OutputSurfaceInfo *surf_info; uint32_t width, height; double total_dec_time = 0; + MD5Generator *md5_generator = nullptr; if (b_generate_md5) { - viddec.InitMd5(); + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); } do { @@ -233,7 +236,7 @@ int main(int argc, char **argv) { for (int i = 0; i < n_frame_returned; i++) { pframe = viddec.GetFrame(&pts); if (b_generate_md5) { - viddec.UpdateMd5ForFrame(pframe, surf_info); + md5_generator->UpdateMd5ForFrame(pframe, surf_info); } if (dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) { viddec.SaveFrameToFile(output_file_path, pframe, surf_info); @@ -258,7 +261,7 @@ int main(int argc, char **argv) { } if (b_generate_md5) { uint8_t *digest; - viddec.FinalizeMd5(&digest); + md5_generator->FinalizeMd5(&digest); std::cout << "MD5 message digest: "; for (int i = 0; i < 16; i++) { std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast(digest[i]); @@ -289,6 +292,7 @@ int main(int argc, char **argv) { std::cout << ref_md5_string << std::endl; ref_md5_file.close(); } + delete md5_generator; } } catch (const std::exception &ex) { std::cout << ex.what() << std::endl; diff --git a/samples/videoDecodeRGB/videodecrgb.cpp b/samples/videoDecodeRGB/videodecrgb.cpp index c58137e9c1..fc0d0c1925 100644 --- a/samples/videoDecodeRGB/videodecrgb.cpp +++ b/samples/videoDecodeRGB/videodecrgb.cpp @@ -42,6 +42,7 @@ THE SOFTWARE. #include "video_demuxer.h" #include "roc_video_dec.h" #include "video_post_process.h" +#include "md5.h" std::vector st_output_format_name = {"native", "bgr", "bgr48", "rgb", "rgb48", "bgra", "bgra64", "rgba", "rgba64"}; @@ -65,7 +66,7 @@ std::condition_variable cv[frame_buffers_size]; void ColorSpaceConversionThread(std::atomic& continue_processing, bool convert_to_rgb, Dim *p_resize_dim, OutputSurfaceInfo **surf_info, OutputSurfaceInfo **res_surf_info, OutputFormatEnum e_output_format, uint8_t *p_rgb_dev_mem, uint8_t *p_resize_dev_mem, bool dump_output_frames, - std::string &output_file_path, RocVideoDecoder &viddec, VideoPostProcess &post_proc, bool b_generate_md5) { + std::string &output_file_path, RocVideoDecoder &viddec, VideoPostProcess &post_proc, MD5Generator *md5_gen_handle, bool b_generate_md5) { size_t rgb_image_size, resize_image_size; hipError_t hip_status = hipSuccess; @@ -138,7 +139,7 @@ void ColorSpaceConversionThread(std::atomic& continue_processing, bool con viddec.SaveFrameToFile(output_file_path, out_frame, p_surf_info); } if(b_generate_md5 && convert_to_rgb){ - viddec.UpdateMd5ForDataBuffer(p_rgb_dev_mem, rgb_image_size); + md5_gen_handle->UpdateMd5ForDataBuffer(p_rgb_dev_mem, rgb_image_size); } @@ -270,6 +271,7 @@ int main(int argc, char **argv) { return 0; } VideoPostProcess post_process; + MD5Generator *md5_generator = nullptr; std::string device_name, gcn_arch_name; int pci_bus_id, pci_domain_id, pci_device_id; @@ -282,7 +284,8 @@ int main(int argc, char **argv) { std::cout << "info: decoding started, please wait!" << std::endl; if (b_generate_md5) { - viddec.InitMd5(); + md5_generator = new MD5Generator(); + md5_generator->InitMd5(); } int n_video_bytes = 0, n_frames_returned = 0, n_frame = 0; @@ -296,7 +299,7 @@ int main(int argc, char **argv) { convert_to_rgb = e_output_format != native; std::atomic continue_processing(true); std::thread color_space_conversion_thread(ColorSpaceConversionThread, std::ref(continue_processing), std::ref(convert_to_rgb), &resize_dim, &surf_info, &resize_surf_info, std::ref(e_output_format), - std::ref(p_rgb_dev_mem), std::ref(p_resize_dev_mem), std::ref(dump_output_frames), std::ref(output_file_path), std::ref(viddec), std::ref(post_process), b_generate_md5); + std::ref(p_rgb_dev_mem), std::ref(p_resize_dev_mem), std::ref(dump_output_frames), std::ref(output_file_path), std::ref(viddec), std::ref(post_process), md5_generator, b_generate_md5); auto startTime = std::chrono::high_resolution_clock::now(); do { @@ -379,7 +382,7 @@ int main(int argc, char **argv) { } if (b_generate_md5) { uint8_t *digest; - viddec.FinalizeMd5(&digest); + md5_generator->FinalizeMd5(&digest); std::cout << "MD5 message digest: "; for (int i = 0; i < 16; i++) { std::cout << std::setfill('0') << std::setw(2) << std::hex << static_cast(digest[i]); @@ -410,6 +413,7 @@ int main(int argc, char **argv) { std::cout << ref_md5_string << std::endl; ref_md5_file.close(); } + delete md5_generator; } } catch (const std::exception &ex) { std::cout << ex.what() << std::endl; diff --git a/samples/videoDecodeRaw/CMakeLists.txt b/samples/videoDecodeRaw/CMakeLists.txt new file mode 100644 index 0000000000..01b78879b7 --- /dev/null +++ b/samples/videoDecodeRaw/CMakeLists.txt @@ -0,0 +1,86 @@ +################################################################################ +# Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +cmake_minimum_required(VERSION 3.10) + + +# ROCM Path +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}") +elseif(ROCM_PATH) + message("-- ${White}${PROJECT_NAME} :ROCM_PATH Set -- ${ROCM_PATH}${ColourReset}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}") +endif() +# Set AMD Clang as default compiler +if (NOT DEFINED CMAKE_CXX_COMPILER) + set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/amdclang++) +endif() + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) + +project(videodecoderaw) +set(CMAKE_CXX_STANDARD 17) + +# rocDecode sample build type +set(DEFAULT_BUILD_TYPE "Release") +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "rocDecode Default Build Type" FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") +endif() +if(CMAKE_BUILD_TYPE MATCHES Debug) + # -O0 -- Don't Optimize output file + # -gdwarf-4 -- generate debugging information, dwarf-4 for making valgrind work + # -Og -- Optimize for debugging experience rather than speed or size + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -gdwarf-4 -Og") +else() + # -O3 -- Optimize output file + # -DNDEBUG -- turn off asserts + # -fPIC -- Generate position-independent code if possible + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC") +endif() + +find_package(HIP QUIET) +find_package(rocDecode QUIET) + +if(HIP_FOUND AND ROCDECODE_FOUND) + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::host) + # rocDecode and utils + include_directories (${ROCDECODE_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY}) + # sample app exe + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecoderaw.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp) + add_executable(${PROJECT_NAME} ${SOURCES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) +else() + message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!") + if (NOT HIP_FOUND) + message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!") + endif() + if (NOT ROCDECODE_FOUND) + message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!") + endif() +endif() diff --git a/samples/videoDecodeRaw/README.md b/samples/videoDecodeRaw/README.md new file mode 100644 index 0000000000..a948f20d77 --- /dev/null +++ b/samples/videoDecodeRaw/README.md @@ -0,0 +1,29 @@ +# Video decode sample + +The video decode raw sample illustrates decoding a single packetized video stream using the built-in bitstream reader, video parser, and rocDecoder to get the individual decoded frames in YUV format. This sample can be configured with a device ID and optionally able to dump the output to a file. This sample uses the high-level RocVideoDecoder class which connects both the video parser and Rocdecoder. This process repeats in a loop until all frames have been decoded. + +## Prerequisites: + +* Install [rocDecode](../../README.md#build-and-install-instructions) + +## Build + +```shell +mkdir video_decode_raw_sample && cd video_decode_raw_sample +cmake ../ +make -j +``` + +## Run + +```shell +./videodecoderaw -i + -o + -d + -f + -z + -disp_delay + -sei + -crop + -m +``` \ No newline at end of file diff --git a/samples/videoDecodeRaw/videodecoderaw.cpp b/samples/videoDecodeRaw/videodecoderaw.cpp new file mode 100644 index 0000000000..18329eb7e7 --- /dev/null +++ b/samples/videoDecodeRaw/videodecoderaw.cpp @@ -0,0 +1,322 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif + +#include "roc_bitstream_reader.h" +#include "roc_video_dec.h" + +typedef enum ReconfigFlushMode_enum { + RECONFIG_FLUSH_MODE_NONE = 0, /**< Just flush to get the frame count */ + RECONFIG_FLUSH_MODE_DUMP_TO_FILE = 1, /**< The remaining frames will be dumped to file in this mode */ + RECONFIG_FLUSH_MODE_CALCULATE_MD5 = 2, /**< Calculate the MD5 of the flushed frames */ +} ReconfigFlushMode; + +// this struct is used by videodecode and videodecodeMultiFiles to dump last frames to file +typedef struct ReconfigDumpFileStruct_t { + bool b_dump_frames_to_file; + std::string output_file_name; + void *md5_generator_handle; +} ReconfigDumpFileStruct; + + +// callback function to flush last frames and save it to file when reconfigure happens +int ReconfigureFlushCallback(void *p_viddec_obj, uint32_t flush_mode, void *p_user_struct) { + int n_frames_flushed = 0; + if ((p_viddec_obj == nullptr) || (p_user_struct == nullptr)) return n_frames_flushed; + + RocVideoDecoder *viddec = static_cast (p_viddec_obj); + OutputSurfaceInfo *surf_info; + if (!viddec->GetOutputSurfaceInfo(&surf_info)) { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + return n_frames_flushed; + } + + uint8_t *pframe = nullptr; + int64_t pts; + while ((pframe = viddec->GetFrame(&pts))) { + if (flush_mode != RECONFIG_FLUSH_MODE_NONE) { + ReconfigDumpFileStruct *p_dump_file_struct = static_cast(p_user_struct); + if (flush_mode == ReconfigFlushMode::RECONFIG_FLUSH_MODE_DUMP_TO_FILE) { + if (p_dump_file_struct->b_dump_frames_to_file) { + viddec->SaveFrameToFile(p_dump_file_struct->output_file_name, pframe, surf_info); + } + } + } + // release and flush frame + viddec->ReleaseFrame(pts, true); + n_frames_flushed ++; + } + + return n_frames_flushed; +} + +void ShowHelpAndExit(const char *option = NULL) { + std::cout << "Options:" << std::endl + << "-i Input File Path - required" << std::endl + << "-o Output File Path - dumps output if requested; optional" << std::endl + << "-d GPU device ID (0 for the first device, 1 for the second, etc.); optional; default: 0" << std::endl + << "-f Number of decoded frames - specify the number of pictures to be decoded; optional" << std::endl + << "-z force_zero_latency (force_zero_latency, Decoded frames will be flushed out for display immediately); optional;" << std::endl + << "-disp_delay -specify the number of frames to be delayed for display; optional; default: 1" << std::endl + << "-sei extract SEI messages; optional;" << std::endl + << "-crop crop rectangle for output (not used when using interopped decoded frame); optional; default: 0" << std::endl + << "-m output_surface_memory_type - decoded surface memory; optional; default - 0" + << " [0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : OUT_SURFACE_MEM_HOST_COPIED/ 3 : OUT_SURFACE_MEM_NOT_MAPPED]" << std::endl; + exit(0); +} + +int main(int argc, char **argv) { + std::string input_file_path, output_file_path; + int dump_output_frames = 0; + int device_id = 0; + int disp_delay = 1; + bool b_force_zero_latency = false; // false by default: enabling this option might affect decoding performance + bool b_extract_sei_messages = false; + bool b_flush_frames_during_reconfig = true; + Rect crop_rect = {}; + Rect *p_crop_rect = nullptr; + OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal + ReconfigParams reconfig_params = { 0 }; + ReconfigDumpFileStruct reconfig_user_struct = { 0 }; + uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream + + // Parse command-line arguments + if(argc <= 1) { + ShowHelpAndExit(); + } + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-h")) { + ShowHelpAndExit(); + } + if (!strcmp(argv[i], "-i")) { + if (++i == argc) { + ShowHelpAndExit("-i"); + } + input_file_path = argv[i]; + continue; + } + if (!strcmp(argv[i], "-o")) { + if (++i == argc) { + ShowHelpAndExit("-o"); + } + output_file_path = argv[i]; + dump_output_frames = 1; + continue; + } + if (!strcmp(argv[i], "-d")) { + if (++i == argc) { + ShowHelpAndExit("-d"); + } + device_id = atoi(argv[i]); + continue; + } + if (!strcmp(argv[i], "-disp_delay")) { + if (++i == argc) { + ShowHelpAndExit("-disp_delay"); + } + disp_delay = atoi(argv[i]); + continue; + } + if (!strcmp(argv[i], "-f")) { + if (++i == argc) { + ShowHelpAndExit("-d"); + } + num_decoded_frames = atoi(argv[i]); + continue; + } + if (!strcmp(argv[i], "-z")) { + if (i == argc) { + ShowHelpAndExit("-z"); + } + b_force_zero_latency = true; + continue; + } + if (!strcmp(argv[i], "-sei")) { + if (i == argc) { + ShowHelpAndExit("-sei"); + } + b_extract_sei_messages = true; + continue; + } + if (!strcmp(argv[i], "-crop")) { + if (++i == argc || 4 != sscanf(argv[i], "%d,%d,%d,%d", &crop_rect.left, &crop_rect.top, &crop_rect.right, &crop_rect.bottom)) { + ShowHelpAndExit("-crop"); + } + if ((crop_rect.right - crop_rect.left) % 2 == 1 || (crop_rect.bottom - crop_rect.top) % 2 == 1) { + std::cout << "output crop rectangle must have width and height of even numbers" << std::endl; + exit(1); + } + p_crop_rect = &crop_rect; + continue; + } + if (!strcmp(argv[i], "-m")) { + if (++i == argc) { + ShowHelpAndExit("-m"); + } + mem_type = static_cast(atoi(argv[i])); + continue; + } + if (!strcmp(argv[i], "flush")) { + b_flush_frames_during_reconfig = atoi(argv[i]) ? true : false; + continue; + } + + ShowHelpAndExit(argv[i]); + } + + try { + std::size_t found_file = input_file_path.find_last_of('/'); + std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; + std::cout << "info: Using built-in bitstream reader" << std::endl; + RocdecBitstreamReader bs_reader = nullptr; + rocDecVideoCodec rocdec_codec_id; + int bit_depth; + if (rocDecCreateBitstreamReader(&bs_reader, input_file_path.c_str()) != ROCDEC_SUCCESS) { + std::cerr << "Failed to create the bitstream reader." << std::endl; + return 1; + } + if (rocDecGetBitstreamCodecType(bs_reader, &rocdec_codec_id) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get stream codec type." << std::endl; + return 1; + } + if (rocdec_codec_id >= rocDecVideoCodec_NumCodecs) { + std::cerr << "Unsupported stream file type or codec type by the bitstream reader. Exiting." << std::endl; + return 1; + } + if (rocDecGetBitstreamBitDepth(bs_reader, &bit_depth) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get stream bit depth." << std::endl; + return 1; + } + + RocVideoDecoder viddec(device_id, mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, b_extract_sei_messages, disp_delay); + if(!viddec.CodecSupported(device_id, rocdec_codec_id, bit_depth)) { + std::cerr << "GPU doesn't support codec!" << std::endl; + return 0; + } + std::string device_name, gcn_arch_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + viddec.GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: Using GPU device " << device_id << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " << + std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) << + std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl; + std::cout << "info: decoding started, please wait!" << std::endl; + + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + int n_pic_decoded = 0, decoded_pics = 0; + uint8_t *pvideo = nullptr; + int pkg_flags = 0; + uint8_t *pframe = nullptr; + int64_t pts = 0; + OutputSurfaceInfo *surf_info; + uint32_t width, height; + double total_dec_time = 0; + bool first_frame = true; + // initialize reconfigure params: the following is configured to dump to output which is relevant for this sample + reconfig_params.p_fn_reconfigure_flush = ReconfigureFlushCallback; + reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; + reconfig_user_struct.output_file_name = output_file_path; + if (dump_output_frames) { + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_DUMP_TO_FILE; + } else { + reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE; + } + reconfig_params.p_reconfig_user_struct = &reconfig_user_struct; + + viddec.SetReconfigParams(&reconfig_params); + + do { + auto start_time = std::chrono::high_resolution_clock::now(); + if (rocDecGetBitstreamPicData(bs_reader, &pvideo, &n_video_bytes, &pts) != ROCDEC_SUCCESS) { + std::cerr << "Failed to get picture data." << std::endl; + return 1; + } + // Treat 0 bitstream size as end of stream indicator + if (n_video_bytes == 0) { + pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; + } + n_frame_returned = viddec.DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts, &decoded_pics); + + if (!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)) { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + break; + } + for (int i = 0; i < n_frame_returned; i++) { + pframe = viddec.GetFrame(&pts); + if (dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) { + viddec.SaveFrameToFile(output_file_path, pframe, surf_info); + } + // release frame + viddec.ReleaseFrame(pts); + } + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_decode = std::chrono::duration(end_time - start_time).count(); + total_dec_time += time_per_decode; + n_frame += n_frame_returned; + n_pic_decoded += decoded_pics; + if (num_decoded_frames && num_decoded_frames <= n_frame) { + break; + } + + } while (n_video_bytes); + + n_frame += viddec.GetNumOfFlushedFrames(); + std::cout << "info: Total pictures decoded: " << n_pic_decoded << std::endl; + std::cout << "info: Total frames output/displayed: " << n_frame << std::endl; + if (!dump_output_frames) { + std::cout << "info: avg decoding time per picture: " << total_dec_time / n_pic_decoded << " ms" < +#include +#include "roc_bitstream_reader.h" +#include "es_reader.h" + +class RocBitstreamReaderHandle { +public: + explicit RocBitstreamReaderHandle(const char *input_file_path) : bs_reader_(std::make_shared(input_file_path)) {}; + ~RocBitstreamReaderHandle() { ClearErrors(); } + bool NoError() { return error_.empty(); } + const char* ErrorMsg() { return error_.c_str(); } + void CaptureError(const std::string& err_msg) { error_ = err_msg; } + rocDecStatus GetBitstreamCodecType(rocDecVideoCodec *codec_type) { *codec_type = bs_reader_->GetCodecId(); return ROCDEC_SUCCESS; } + rocDecStatus GetBitstreamBitDepth(int *bit_depth) { *bit_depth = bs_reader_->GetBitDepth(); return ROCDEC_SUCCESS; } + rocDecStatus GetBitstreamPicData(uint8_t **pic_data, int *pic_size, int64_t *pts) { return static_cast(bs_reader_->GetPicData(pic_data, pic_size, pts)); } + +private: + std::shared_ptr bs_reader_ = nullptr; + void ClearErrors() { error_ = ""; } + + std::string error_; +}; \ No newline at end of file diff --git a/src/bit_stream_reader/es_reader.cpp b/src/bit_stream_reader/es_reader.cpp new file mode 100644 index 0000000000..11d4aa2b7f --- /dev/null +++ b/src/bit_stream_reader/es_reader.cpp @@ -0,0 +1,1145 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "es_reader.h" +#include "hevc_defines.h" +#include "avc_defines.h" +#include "av1_defines.h" +#include "roc_video_parser.h" + +RocVideoESParser::RocVideoESParser(const char *input_file_path) { + p_stream_file_.open(input_file_path, std::ifstream::in | std::ifstream::binary); + if (!p_stream_file_) { + ERR("Failed to open the bitstream file."); + } + end_of_file_ = false; + end_of_stream_ = false; + read_ptr_ = 0; + write_ptr_ = 0; + curr_byte_offset_ = read_ptr_; + pic_data_.assign(INIT_PIC_DATA_SIZE, 0); + pic_data_size_ = 0; + curr_pic_end_ = 0; + next_pic_start_ = 0; + num_pictures_ = 0; + num_start_code_ = 0; + curr_start_code_offset_ = 0; + next_start_code_offset_ = 0; + obu_byte_offset_ = 0; + obu_size_ = 0; + num_td_obus_ = 0; + num_temp_units_ = 0; + ivf_file_header_read_ = false; + + stream_type_ = ProbeStreamType(); + bit_depth_ = 8; +} + +RocVideoESParser::~RocVideoESParser() { + if (p_stream_file_) { + p_stream_file_.close(); + } +} + +int RocVideoESParser::GetDataSizeInRB() { + if (read_ptr_ == write_ptr_) { + return 0; + } else if (read_ptr_ < write_ptr_) { + return write_ptr_ - read_ptr_; + } else { + return BS_RING_SIZE - read_ptr_ + write_ptr_; + } +} + +int RocVideoESParser::FetchBitStream() +{ + int free_space; + int read_size; + int total_read_size = 0; + + // A full ring has BS_RING_SIZE - 1 bytes + free_space = BS_RING_SIZE - 1 - GetDataSizeInRB(); + if (free_space == 0) { + return 0; + } + + // First fill the ending part of the ring + if (write_ptr_ >= read_ptr_) { + int fill_space = BS_RING_SIZE - (write_ptr_ == 0 ? 1 : write_ptr_); + read_size = p_stream_file_.read(reinterpret_cast(&bs_ring_[write_ptr_]), fill_space).gcount(); + if (read_size > 0) { + write_ptr_ = (write_ptr_ + read_size) % BS_RING_SIZE; // when we still have more bytes to fill, write_ptr_ becomes 0 to continue to the next step. + } + if (read_size < fill_space) { + end_of_file_ = true; + } + total_read_size += read_size; + if (end_of_file_) { + return total_read_size; + } + free_space -= read_size; + if (free_space == 0) { + return total_read_size; + } + } + + // Continue filling the beginning part of the ring + if (read_ptr_ > 0) { + read_size = p_stream_file_.read(reinterpret_cast(&bs_ring_[write_ptr_]), free_space).gcount(); + if (read_size > 0) { + write_ptr_ = (write_ptr_ + read_size) % BS_RING_SIZE; + } + if (read_size < free_space) { + end_of_file_ = true; + } + total_read_size += read_size; + } + return total_read_size; +} + +bool RocVideoESParser::GetByte(int offset, uint8_t *data) { + offset = offset % BS_RING_SIZE; + if (offset == write_ptr_) { + if (FetchBitStream() == 0) { + end_of_stream_ = true; + return false; + } + } + *data = bs_ring_[offset]; + return true; +} + +bool RocVideoESParser::ReadBytes(int offset, int size, uint8_t *data) { + offset = offset % BS_RING_SIZE; + if (size > GetDataSizeInRB()) { + if (FetchBitStream() == 0) { + end_of_stream_ = true; + return false; + } + if (size > GetDataSizeInRB()) { + ERR("Could not read the requested bytes from ring buffer. Either ring buffer size is too small or not enough bytes left."); + return false; + } + } + if (offset + size > BS_RING_SIZE) { + int part = BS_RING_SIZE - offset; + memcpy(data, &bs_ring_[offset], part); + memcpy(&data[part], &bs_ring_[0], size - part); + } else { + memcpy(data, &bs_ring_[offset], size); + } + return true; +} + +void RocVideoESParser::SetReadPointer(int value) { + read_ptr_ = value % BS_RING_SIZE; +} + +bool RocVideoESParser::FindStartCode() { + uint8_t three_bytes[3]; + int i; + + curr_start_code_offset_ = next_start_code_offset_; + + // Search for the next start code + while (!end_of_stream_) { + for (i = 0; i < 3; i++) { + if (GetByte(curr_byte_offset_ + i, three_bytes + i) == false) { + break; + } + } + if (i < 3) { + break; + } + + if (three_bytes[0] == 0 && three_bytes[1] == 0 && three_bytes[2] == 0x01) { + num_start_code_++; + next_start_code_offset_ = curr_byte_offset_; + // Move the pointer 3 bytes forward + curr_byte_offset_ = (curr_byte_offset_ + 3) % BS_RING_SIZE; + + // For the very first NAL unit, search for the next start code (or reach the end of frame) + if (num_start_code_ == 1) { + curr_start_code_offset_ = next_start_code_offset_; + continue; + } else { + break; + } + } + curr_byte_offset_ = (curr_byte_offset_ + 1) % BS_RING_SIZE; + } + return num_start_code_ ? true : false; +} + +void RocVideoESParser::CopyNalUnitFromRing() { + int nal_start, nal_end_plus_1; + int nal_size; + nal_start = curr_start_code_offset_; + nal_end_plus_1 = curr_start_code_offset_ != next_start_code_offset_ ? next_start_code_offset_ : write_ptr_; + if (nal_end_plus_1 >= nal_start) { + nal_size = nal_end_plus_1 - nal_start; + if ((pic_data_size_ + nal_size) > pic_data_.size()) { + pic_data_.resize(pic_data_.size() + nal_size); + } + memcpy(&pic_data_[pic_data_size_], &bs_ring_[nal_start], nal_size); + } else { // wrap around + nal_size = BS_RING_SIZE - nal_start + nal_end_plus_1; + if ((pic_data_size_ + nal_size) > pic_data_.size()) { + pic_data_.resize(pic_data_.size() + nal_size); + } + memcpy(&pic_data_[pic_data_size_], &bs_ring_[nal_start], BS_RING_SIZE - nal_start); + memcpy(&pic_data_[pic_data_size_ + BS_RING_SIZE - nal_start], &bs_ring_[0], nal_end_plus_1); + } + pic_data_size_ += nal_size; + SetReadPointer(nal_end_plus_1); +} + +void RocVideoESParser::CheckHevcNalForSlice(int start_code_offset, int *slice_flag, int *first_slice_flag) { + uint8_t nal_header_byte; + GetByte(start_code_offset + 3, &nal_header_byte); + uint8_t nal_unit_type = (nal_header_byte >> 1) & 0x3F; + switch (nal_unit_type) { + case NAL_UNIT_CODED_SLICE_TRAIL_R: + case NAL_UNIT_CODED_SLICE_TRAIL_N: + case NAL_UNIT_CODED_SLICE_TLA_R: + case NAL_UNIT_CODED_SLICE_TSA_N: + case NAL_UNIT_CODED_SLICE_STSA_R: + case NAL_UNIT_CODED_SLICE_STSA_N: + case NAL_UNIT_CODED_SLICE_BLA_W_LP: + case NAL_UNIT_CODED_SLICE_BLA_W_RADL: + case NAL_UNIT_CODED_SLICE_BLA_N_LP: + case NAL_UNIT_CODED_SLICE_IDR_W_RADL: + case NAL_UNIT_CODED_SLICE_IDR_N_LP: + case NAL_UNIT_CODED_SLICE_CRA_NUT: + case NAL_UNIT_CODED_SLICE_RADL_N: + case NAL_UNIT_CODED_SLICE_RADL_R: + case NAL_UNIT_CODED_SLICE_RASL_N: + case NAL_UNIT_CODED_SLICE_RASL_R: { + *slice_flag = 1; + uint8_t slice_byte; + GetByte(start_code_offset + 5, &slice_byte); + *first_slice_flag = slice_byte >> 7; // first_slice_segment_in_pic_flag + break; + } + + default: + *slice_flag = 0; + *first_slice_flag = 0; + break; + } +} + +void RocVideoESParser::CheckAvcNalForSlice(int start_code_offset, int *slice_flag, int *first_slice_flag) { + uint8_t nal_header_byte; + GetByte(start_code_offset + 3, &nal_header_byte); + uint8_t nal_unit_type = nal_header_byte & 0x1F; + switch (nal_unit_type) { + case kAvcNalTypeSlice_IDR: + case kAvcNalTypeSlice_Non_IDR: + case kAvcNalTypeSlice_Data_Partition_A: + case kAvcNalTypeSlice_Data_Partition_B: + case kAvcNalTypeSlice_Data_Partition_C: { + *slice_flag = 1; + uint8_t slice_bytes[4]; // 4 bytes is enough to parse the Exp-Golomb codes for first_mb_in_slice + for (int i = 0; i < 4; i++) { + GetByte(start_code_offset + 4 + i, &slice_bytes[i]); + } + size_t offset = 0; + int first_mb_in_slice = Parser::ExpGolomb::ReadUe(slice_bytes, offset); + *first_slice_flag = first_mb_in_slice == 0; + break; + } + + default: + *slice_flag = 0; + *first_slice_flag = 0; + break; + } +} + +int RocVideoESParser::GetPicDataAvcHevc(uint8_t **p_pic_data, int *pic_size) { + int slice_nal_flag; + int first_slice_flag = 0; + int num_slices = 0; + + curr_pic_end_ = 0; + // Check if we have already got some NAL units for the current picture from processing of the last picture + if (next_pic_start_ > 0 && next_pic_start_ < pic_data_size_) { + memcpy(&pic_data_[0], &pic_data_[next_pic_start_], pic_data_size_ - next_pic_start_); + pic_data_size_ = pic_data_size_ - next_pic_start_; + curr_pic_end_ = pic_data_size_; + next_pic_start_ = 0; + } else { + pic_data_size_ = 0; + next_pic_start_ = 0; + } + + while (!end_of_stream_) { + if (!FindStartCode()) { + ERR("No start code in the bitstream."); + break; + } + CopyNalUnitFromRing(); + if ( stream_type_ == kStreamTypeAvcElementary) { + CheckAvcNalForSlice(curr_start_code_offset_, &slice_nal_flag, &first_slice_flag); + } else { + CheckHevcNalForSlice(curr_start_code_offset_, &slice_nal_flag, &first_slice_flag); + } + if (slice_nal_flag) { + num_slices++; + curr_pic_end_ = pic_data_size_; // update the current picture data end + } + + if (curr_start_code_offset_ == next_start_code_offset_) { + break; // end of stream + } else if (num_slices) { + if ( stream_type_ == kStreamTypeAvcElementary) { + CheckAvcNalForSlice(next_start_code_offset_, &slice_nal_flag, &first_slice_flag); // peek the next NAL + } else { + CheckHevcNalForSlice(next_start_code_offset_, &slice_nal_flag, &first_slice_flag); // peek the next NAL + } + if (slice_nal_flag && first_slice_flag) { + // Between two pictures, we can have non-slice NAL units which are associated with the next picutre + if (curr_pic_end_ < pic_data_size_) { + next_pic_start_ = curr_pic_end_; + } + break; // hit the first slice of the next picture + } + } + } + + *p_pic_data = pic_data_.data(); + if (num_slices) { + num_pictures_++; + *pic_size = curr_pic_end_; + } else { + *pic_size = 0; + } + return 0; +} + +bool RocVideoESParser::ReadObuHeaderAndSize(int *obu_type) { + uint8_t header_byte; + int obu_extension_flag; + + obu_size_ = 0; + obu_byte_offset_ = curr_byte_offset_; + // Parser header + if (GetByte(curr_byte_offset_, &header_byte) == false) { + return false; + } + *obu_type = (header_byte >> 3) & 0x0F; + obu_extension_flag = (header_byte >> 2) & 0x01; + curr_byte_offset_ = (curr_byte_offset_ + 1) % BS_RING_SIZE; + obu_size_++; + if (obu_extension_flag) { + curr_byte_offset_ = (curr_byte_offset_ + 1) % BS_RING_SIZE; + obu_size_++; + } + // Parse size + int len; + uint32_t value = 0; + uint8_t data_byte; + for (len = 0; len < 8; ++len) { + if (GetByte(curr_byte_offset_ + len, &data_byte) == false) { + return false; + } + value |= (data_byte & 0x7F) << (len * 7); + if ((data_byte & 0x80) == 0) { + ++len; + break; + } + } + obu_size_ += len + value; + curr_byte_offset_ = (curr_byte_offset_ + len + value) % BS_RING_SIZE; + + return true; +} + +bool RocVideoESParser::CopyObuFromRing() { + if (obu_size_ > GetDataSizeInRB()) { + if (FetchBitStream() == 0) { + end_of_stream_ = true; + return false; + } + if (obu_size_ > GetDataSizeInRB()) { + return false; + } + } + if ((pic_data_size_ + obu_size_) > pic_data_.size()) { + pic_data_.resize(pic_data_.size() + obu_size_); + } + int obu_end_offset = (obu_byte_offset_ + obu_size_) % BS_RING_SIZE; + if (obu_end_offset >= obu_byte_offset_) { + memcpy(&pic_data_[pic_data_size_], &bs_ring_[obu_byte_offset_], obu_size_); + } else { + memcpy(&pic_data_[pic_data_size_], &bs_ring_[obu_byte_offset_], BS_RING_SIZE - obu_byte_offset_); + memcpy(&pic_data_[pic_data_size_ + BS_RING_SIZE - obu_byte_offset_], &bs_ring_[0], obu_end_offset); + } + pic_data_size_ += obu_size_; + SetReadPointer(obu_end_offset); + return true; +} + +int RocVideoESParser::GetPicDataAv1(uint8_t **p_pic_data, int *pic_size) { + int obu_type; + pic_data_size_ = 0; + + while (!end_of_stream_) { + if (!ReadObuHeaderAndSize(&obu_type)) { + break; + } + CopyObuFromRing(); + if (obu_type == kObuTemporalDelimiter) { + num_td_obus_++; + if (num_td_obus_ > 1) { + break; + } + } + } + + *p_pic_data = pic_data_.data(); + *pic_size = pic_data_size_; + num_temp_units_++; + return 0; +} + +bool RocVideoESParser::CheckIvfFileHeader(uint8_t *stream) { + static const char *IVF_SIGNATURE = "DKIF"; + uint8_t *ptr = stream; + + // bytes 0-3: signature + if (memcmp(IVF_SIGNATURE, ptr, 4) == 0) { + ptr += 4; + // bytes 4-5: version (should be 0). Little Endian. + int ivf_version = ptr[0] | (ptr[1] << 8); + if (ivf_version != 0) { + ERR("Stream file error: Incorrect IVF version (" + TOSTR(ivf_version) + "). Should be 0."); + } + // bytes 6-7: length of header in bytes + ptr += 4; + // bytes 8-11: codec FourCC (e.g., 'AV01') + uint32_t codec_fourcc = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); + ptr += 4; + // bytes 12-13: width in pixels + uint32_t width = ptr[0] | (ptr[1] << 8); + ptr += 2; + // bytes 14-15: height in pixels + uint32_t height = ptr[0] | (ptr[1] << 8); + ptr += 2; + // bytes 16-23: time base denominator + uint32_t denominator = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); + ptr += 4; + // bytes 20-23: time base numerator + uint32_t numerator = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); + ptr += 4; + // bytes 24-27: number of frames in file + uint32_t num_frames = ptr[0] | (ptr[1] << 8); + // bytes 28-31: unused + return true; + } else { + return false; + } +} + +int RocVideoESParser::GetPicDataIvfAv1(uint8_t **p_pic_data, int *pic_size) { + uint8_t frame_header[12]; + pic_data_size_ = 0; + if (ReadBytes(curr_byte_offset_, 12, frame_header)) { + curr_byte_offset_ = (curr_byte_offset_ + 12) % BS_RING_SIZE; + SetReadPointer(curr_byte_offset_); + int frame_size = frame_header[0] | (frame_header[1] << 8) | (frame_header[2] << 16) | (frame_header[3] << 24); + if (frame_size > pic_data_.size()) { + pic_data_.resize(frame_size); + } + if (ReadBytes(curr_byte_offset_, frame_size, pic_data_.data())) { + pic_data_size_ = frame_size; + curr_byte_offset_ = (curr_byte_offset_ + frame_size) % BS_RING_SIZE; + SetReadPointer(curr_byte_offset_); + } + } + *p_pic_data = pic_data_.data(); + *pic_size = pic_data_size_; + return 0; +} + +int RocVideoESParser::GetPicData(uint8_t **p_pic_data, int *pic_size, int64_t *pts) { + *pts = 0; + switch (stream_type_) { + case kStreamTypeAvcElementary: + case kStreamTypeHevcElementary: + return GetPicDataAvcHevc(p_pic_data, pic_size); + case kStreamTypeAv1Elementary: + return GetPicDataAv1(p_pic_data, pic_size); + case kStreamTypeAv1Ivf: { + if (!ivf_file_header_read_) { + uint8_t file_header[32]; + ReadBytes(curr_byte_offset_, 32, file_header); + curr_byte_offset_ = (curr_byte_offset_ + 32) % BS_RING_SIZE; + SetReadPointer(curr_byte_offset_); + ivf_file_header_read_ = true; + } + return GetPicDataIvfAv1(p_pic_data, pic_size); + } + default: { + *p_pic_data = pic_data_.data(); + *pic_size = 0; + return 0; + } + } +} + +rocDecVideoCodec RocVideoESParser::GetCodecId() { + switch (stream_type_) { + case kStreamTypeAvcElementary: + return rocDecVideoCodec_AVC; + case kStreamTypeHevcElementary: + return rocDecVideoCodec_HEVC; + case kStreamTypeAv1Elementary: + case kStreamTypeAv1Ivf: + return rocDecVideoCodec_AV1; + default: + return rocDecVideoCodec_NumCodecs; + } +} + +int RocVideoESParser::ProbeStreamType() { + int stream_type = kStreamTypeUnsupported; + int stream_type_score = 0; + uint8_t *stream_buf; + int stream_size; + + stream_buf = static_cast(malloc(STREAM_PROBE_SIZE)); + p_stream_file_.seekg (0, p_stream_file_.beg); + stream_size = p_stream_file_.read(reinterpret_cast(stream_buf), STREAM_PROBE_SIZE).gcount(); + // When the file size is smaller than STREAM_PROBE_SIZE, the fail bit is set. If we don't clear the state, further operations will fail. + if (p_stream_file_.fail()) { + p_stream_file_.clear(); + } + + for (int i = kStreamTypeAvcElementary; i < kStreamTypeNumSupported; i++) { + int curr_score = 0; + switch (i) { + case kStreamTypeAvcElementary: + curr_score = CheckAvcEStream(stream_buf, stream_size); + if (curr_score > STREAM_TYPE_SCORE_THRESHOLD && curr_score > stream_type_score) { + stream_type = kStreamTypeAvcElementary; + stream_type_score = curr_score; + } + break; + case kStreamTypeHevcElementary: + curr_score = CheckHevcEStream(stream_buf, stream_size); + if (curr_score > STREAM_TYPE_SCORE_THRESHOLD && curr_score > stream_type_score) { + stream_type = kStreamTypeHevcElementary; + stream_type_score = curr_score; + } + break; + case kStreamTypeAv1Elementary: + curr_score = CheckAv1EStream(stream_buf, stream_size); + if (curr_score > STREAM_TYPE_SCORE_THRESHOLD && curr_score > stream_type_score) { + stream_type = kStreamTypeAv1Elementary; + stream_type_score = curr_score; + } + break; + case kStreamTypeAv1Ivf: + curr_score = CheckIvfAv1Stream(stream_buf, stream_size); + if (curr_score > STREAM_TYPE_SCORE_THRESHOLD && curr_score > stream_type_score) { + stream_type = kStreamTypeAv1Ivf; + stream_type_score = curr_score; + } + break; + } + } + + if (stream_buf) { + free(stream_buf); + } + p_stream_file_.seekg (0, std::ios::beg); + return stream_type; +} + +int RocVideoESParser::CheckAvcEStream(uint8_t *p_stream, int stream_size) { + int score = 0; + int curr_offset = 0; + int num_start_codes = 0; + int sps_present = 0; + int pps_present = 0; + int slice_present = 0; + int idr_slice_present = 0; + int first_slice_present = 0; + size_t offset = 0; + + while (curr_offset < stream_size - 2) { + if (p_stream[curr_offset] == 0 && p_stream[curr_offset + 1] == 0 && p_stream[curr_offset + 2] == 1) { + num_start_codes++; + uint8_t nal_header_byte = p_stream[curr_offset + 3]; + uint8_t nal_unit_type = nal_header_byte & 0x1F; + uint8_t nal_rbsp[256]; + memcpy(nal_rbsp, p_stream + curr_offset + 4, 256); + EbspToRbsp(nal_rbsp, 0, 256); + switch (nal_unit_type) { + case kAvcNalTypeSeq_Parameter_Set: { + offset = 0; + uint32_t profile_idc = Parser::ReadBits(nal_rbsp, offset, 8); + Parser::ReadBits(nal_rbsp, offset, 8); + uint32_t level_idc = Parser::ReadBits(nal_rbsp, offset, 8); + uint32_t seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + uint32_t chroma_format_idc; + if (profile_idc == 100 || + profile_idc == 110 || + profile_idc == 122 || + profile_idc == 244 || + profile_idc == 44 || + profile_idc == 83 || + profile_idc == 86 || + profile_idc == 118 || + profile_idc == 128 || + profile_idc == 138 || + profile_idc == 139 || + profile_idc == 134 || + profile_idc == 135) { + chroma_format_idc = Parser::ExpGolomb::ReadUe(p_stream, offset); + if (chroma_format_idc == 3) { + Parser::GetBit(p_stream, offset); // separate_colour_plane_flag + } + uint32_t bit_depth_luma = Parser::ExpGolomb::ReadUe(p_stream, offset) + 8; + uint32_t bit_depth_chroma = Parser::ExpGolomb::ReadUe(p_stream, offset) + 8; + bit_depth_ = bit_depth_luma > bit_depth_chroma ? bit_depth_luma : bit_depth_chroma; + } else { + chroma_format_idc = 1; + bit_depth_ = 8; + } + + if (profile_idc > 0 && level_idc > 0 && seq_parameter_set_id >= 0 && seq_parameter_set_id <= 31 && chroma_format_idc >= 0 && chroma_format_idc <= 3 && bit_depth_ >= 8 && bit_depth_ <= 14) { + sps_present = 1; + } + break; + } + + case kAvcNalTypePic_Parameter_Set: { + offset = 0; + uint32_t pic_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + uint32_t seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + if ( pic_parameter_set_id >= 0 && pic_parameter_set_id <= 255 && seq_parameter_set_id >= 0 && seq_parameter_set_id <= 31) { + pps_present = 1; + } + break; + } + + case kAvcNalTypeSlice_IDR: + idr_slice_present = 1; + case kAvcNalTypeSlice_Non_IDR: + case kAvcNalTypeSlice_Data_Partition_A: + case kAvcNalTypeSlice_Data_Partition_B: + case kAvcNalTypeSlice_Data_Partition_C: { + slice_present = 1; + offset = 0; + uint32_t first_mb_in_slice = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + if ( first_mb_in_slice == 0) { + first_slice_present = 1; + } + break; + } + + default: + break; + } + curr_offset += 4; + } else { + curr_offset++; + } + } + if (num_start_codes == 0) { + score = 0; + } else { + score = sps_present * 25 + pps_present * 25 + idr_slice_present * 15 + slice_present * 15 + first_slice_present * 15; + } + return score; +} + +int RocVideoESParser::CheckHevcEStream(uint8_t *p_stream, int stream_size) { + int score = 0; + int curr_offset = 0; + int num_start_codes = 0; + int vps_present = 0; + int sps_present = 0; + int pps_present = 0; + int slice_present = 0; + int rap_slice_present = 0; + int first_slice_present = 0; + size_t offset = 0; + + while (curr_offset < stream_size - 2) { + if (p_stream[curr_offset] == 0 && p_stream[curr_offset + 1] == 0 && p_stream[curr_offset + 2] == 1) { + num_start_codes++; + uint8_t nal_header_byte = p_stream[curr_offset + 3]; + uint8_t nal_unit_type = (nal_header_byte >> 1) & 0x3F; + uint8_t nal_rbsp[256]; + memcpy(nal_rbsp, p_stream + curr_offset + 5, 256); + EbspToRbsp(nal_rbsp, 0, 256); + switch (nal_unit_type) { + case NAL_UNIT_VPS: { + offset = 16; + int vps_reserved_0xffff_16bits = Parser::ReadBits(nal_rbsp, offset, 16); + if (vps_reserved_0xffff_16bits == 0xFFFF) { + vps_present = 1; + } + break; + } + + case NAL_UNIT_SPS: { + offset = 0; + Parser::ReadBits(nal_rbsp, offset, 4); // sps_video_parameter_set_id + uint32_t max_sub_layer_minus1 = Parser::ReadBits(nal_rbsp, offset, 3); + Parser::GetBit(nal_rbsp, offset); // sps_temporal_id_nesting_flag + // profile_tier_level() + int sub_layer_profile_present_flag[6]; + int sub_layer_level_present_flag[6]; + offset += 96; + for (int i = 0; i < max_sub_layer_minus1; i++) { + sub_layer_profile_present_flag[i] = Parser::GetBit(nal_rbsp, offset); + sub_layer_level_present_flag[i] = Parser::GetBit(nal_rbsp, offset); + } + if (max_sub_layer_minus1 > 0) { + for (int i = max_sub_layer_minus1; i < 8; i++) { + offset += 2; + } + } + for (int i = 0; i < max_sub_layer_minus1; i++) { + if (sub_layer_profile_present_flag[i]) { + offset += 88; + } + if (sub_layer_level_present_flag[i]) { + offset += 8; + } + } + uint32_t sps_seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + uint32_t chroma_format_idc = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + if (chroma_format_idc == 3) { + Parser::GetBit(nal_rbsp, offset); // separate_colour_plane_flag + } + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // pic_width_in_luma_samples + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // pic_height_in_luma_samples + int conformance_window_flag = Parser::GetBit(nal_rbsp, offset); + if (conformance_window_flag) { + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // conf_win_left_offset + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // conf_win_right_offset + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // conf_win_top_offset + Parser::ExpGolomb::ReadUe(nal_rbsp, offset); // conf_win_bottom_offset + } + uint32_t bit_depth_luma = Parser::ExpGolomb::ReadUe(nal_rbsp, offset) + 8; + uint32_t bit_depth_chroma = Parser::ExpGolomb::ReadUe(nal_rbsp, offset) + 8; + bit_depth_ = bit_depth_luma > bit_depth_chroma ? bit_depth_luma : bit_depth_chroma; + if (sps_seq_parameter_set_id >= 0 && sps_seq_parameter_set_id <= 15 && chroma_format_idc >= 0 && chroma_format_idc <= 3 && bit_depth_ >= 8 && bit_depth_ <= 16) { + sps_present = 1; + } + + break; + } + + case NAL_UNIT_PPS: { + offset = 0; + uint32_t pps_pic_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + uint32_t pps_seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + if ( pps_pic_parameter_set_id >= 0 && pps_pic_parameter_set_id <= 63 && pps_seq_parameter_set_id >= 0 && pps_seq_parameter_set_id <= 15) { + pps_present = 1; + } + break; + } + + case NAL_UNIT_CODED_SLICE_BLA_W_LP: + case NAL_UNIT_CODED_SLICE_BLA_W_RADL: + case NAL_UNIT_CODED_SLICE_BLA_N_LP: + case NAL_UNIT_CODED_SLICE_IDR_W_RADL: + case NAL_UNIT_CODED_SLICE_IDR_N_LP: + case NAL_UNIT_CODED_SLICE_CRA_NUT: + rap_slice_present = 1; + case NAL_UNIT_CODED_SLICE_TRAIL_R: + case NAL_UNIT_CODED_SLICE_TRAIL_N: + case NAL_UNIT_CODED_SLICE_TLA_R: + case NAL_UNIT_CODED_SLICE_TSA_N: + case NAL_UNIT_CODED_SLICE_STSA_R: + case NAL_UNIT_CODED_SLICE_STSA_N: + case NAL_UNIT_CODED_SLICE_RADL_N: + case NAL_UNIT_CODED_SLICE_RADL_R: + case NAL_UNIT_CODED_SLICE_RASL_N: + case NAL_UNIT_CODED_SLICE_RASL_R: { + offset = 0; + int first_slice_segment_in_pic_flag = Parser::GetBit(nal_rbsp, offset); + if (first_slice_segment_in_pic_flag) { + first_slice_present = 1; + } + if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type <= NAL_UNIT_RESERVED_IRAP_VCL23) { + offset++; + } + uint32_t slice_pic_parameter_set_id = Parser::ExpGolomb::ReadUe(nal_rbsp, offset); + if ( slice_pic_parameter_set_id >= 0 && slice_pic_parameter_set_id <= 63) { + slice_present = 1; + } else { + slice_present = 0; + } + if (!slice_present) { + rap_slice_present = 0; + first_slice_present = 0; + } + break; + } + + default: + break; + } + curr_offset += 5; + } else { + curr_offset++; + } + } + if (num_start_codes == 0) { + score = 0; + } else { + score = vps_present * 20 + sps_present * 20 + pps_present * 20 + rap_slice_present * 15 + slice_present * 15 + first_slice_present * 15; + } + return score; +} + +int RocVideoESParser::EbspToRbsp(uint8_t *streamBuffer, int begin_bytepos, int end_bytepos) { + int count = 0; + if (end_bytepos < begin_bytepos) { + return end_bytepos; + } + uint8_t *streamBuffer_i = streamBuffer + begin_bytepos; + uint8_t *streamBuffer_end = streamBuffer + end_bytepos; + int reduce_count = 0; + for (; streamBuffer_i != streamBuffer_end; ) { + //starting from begin_bytepos to avoid header information + //in NAL unit, 0x000000, 0x000001 or 0x000002 shall not occur at any uint8_t-aligned position + uint8_t tmp =* streamBuffer_i; + if (count == ZEROBYTES_SHORTSTARTCODE) { + if (tmp == 0x03) { + //check the 4th uint8_t after 0x000003, except when cabac_zero_word is used, in which case the last three bytes of this NAL unit must be 0x000003 + if ((streamBuffer_i + 1 != streamBuffer_end) && (streamBuffer_i[1] > 0x03)) { + return -1; + } + //if cabac_zero_word is used, the final uint8_t of this NAL unit(0x03) is discarded, and the last two bytes of RBSP must be 0x0000 + if (streamBuffer_i + 1 == streamBuffer_end) { + break; + } + memmove(streamBuffer_i, streamBuffer_i + 1, streamBuffer_end-streamBuffer_i - 1); + streamBuffer_end--; + reduce_count++; + count = 0; + tmp = *streamBuffer_i; + } else if (tmp < 0x03) { + } + } + if (tmp == 0x00) { + count++; + } else { + count = 0; + } + streamBuffer_i++; + } + return end_bytepos - begin_bytepos + reduce_count; +} + +uint32_t RocVideoESParser::ReadUVLC(const uint8_t *p_stream, size_t &bit_offset) { + int leading_zeros = 0; + while (!Parser::GetBit(p_stream, bit_offset)) { + ++leading_zeros; + } + // Maximum 32 bits. + if (leading_zeros >= 32) { + return 0xFFFFFFFF; + } + uint32_t base = (1u << leading_zeros) - 1; + uint32_t value = Parser::ReadBits(p_stream, bit_offset, leading_zeros); + return base + value; +} + +int RocVideoESParser::CheckAv1EStream(uint8_t *p_stream, int stream_size) { + int score = 0; + uint8_t *obu_stream = p_stream; + int curr_offset = 0; + int temporal_delimiter_obu_present = 0; + int seq_header_obu_present = 0; + int frame_header_obu_present = 0; + int frame_obu_present = 0; + int tile_group_obu_present = 0; + bool syntax_error = false; + size_t offset = 0; + + while (curr_offset < stream_size) { + // OBU header + Av1ObuHeader obu_header; + offset = 0; + obu_stream = p_stream + curr_offset; + obu_header.size = 1; + if (Parser::GetBit(obu_stream, offset) != 0) { + syntax_error = true; + break; + } + obu_header.obu_type = Parser::ReadBits(obu_stream, offset, 4); + obu_header.obu_extension_flag = Parser::GetBit(obu_stream, offset); + obu_header.obu_has_size_field = Parser::GetBit(obu_stream, offset); + if (!obu_header.obu_has_size_field) { + syntax_error = true; + break; + } + if (Parser::GetBit(obu_stream, offset) != 0) { + syntax_error = true; + break; + } + if (obu_header.obu_extension_flag) { + obu_header.size += 1; + obu_header.temporal_id = Parser::ReadBits(obu_stream, offset, 3); + obu_header.spatial_id = Parser::ReadBits(obu_stream, offset, 2); + if (Parser::ReadBits(obu_stream, offset, 3) != 0) { + syntax_error = true; + break; + } + } + curr_offset += obu_header.size; + obu_stream += obu_header.size; + // OBU size + int len; + uint32_t obu_size = 0; + for (len = 0; len < 8; ++len) { + obu_size |= (obu_stream[len] & 0x7F) << (len * 7); + if ((obu_stream[len] & 0x80) == 0) { + ++len; + break; + } + } + curr_offset += len; + obu_stream += len; + + switch (obu_header.obu_type) { + case kObuTemporalDelimiter: + temporal_delimiter_obu_present = 1; + break; + + case kObuSequenceHeader: { + Av1SequenceHeader seq_header = {0}; + offset = 0; + seq_header.seq_profile = Parser::ReadBits(obu_stream, offset, 3); + seq_header.still_picture = Parser::GetBit(obu_stream, offset); + seq_header.reduced_still_picture_header = Parser::GetBit(obu_stream, offset); + + if (seq_header.reduced_still_picture_header) { + seq_header.timing_info_present_flag = 0; + seq_header.decoder_model_info_present_flag = 0; + seq_header.initial_display_delay_present_flag = 0; + seq_header.operating_points_cnt_minus_1 = 0; + seq_header.operating_point_idc[0] = 0; + seq_header.seq_level_idx[0] = Parser::ReadBits(obu_stream, offset, 5); + seq_header.seq_tier[0] = 0; + seq_header.decoder_model_present_for_this_op[0] = 0; + seq_header.initial_display_delay_present_for_this_op[0] = 0; + } else { + seq_header.timing_info_present_flag = Parser::GetBit(obu_stream, offset); + if (seq_header.timing_info_present_flag) { + // timing_info() + seq_header.timing_info.num_units_in_display_tick = Parser::ReadBits(obu_stream, offset, 32); + seq_header.timing_info.time_scale = Parser::ReadBits(obu_stream, offset, 32); + seq_header.timing_info.equal_picture_interval = Parser::GetBit(obu_stream, offset); + if (seq_header.timing_info.equal_picture_interval) { + seq_header.timing_info.num_ticks_per_picture_minus_1 = ReadUVLC(obu_stream, offset); + } + seq_header.decoder_model_info_present_flag = Parser::GetBit(obu_stream, offset); + if (seq_header.decoder_model_info_present_flag) { + seq_header.decoder_model_info.buffer_delay_length_minus_1 = Parser::ReadBits(obu_stream, offset, 5); + seq_header.decoder_model_info.num_units_in_decoding_tick = Parser::ReadBits(obu_stream, offset, 32); + seq_header.decoder_model_info.buffer_removal_time_length_minus_1 = Parser::ReadBits(obu_stream, offset, 5); + seq_header.decoder_model_info.frame_presentation_time_length_minus_1 = Parser::ReadBits(obu_stream, offset, 5); + } + } else { + seq_header.decoder_model_info_present_flag = 0; + } + seq_header.initial_display_delay_present_flag = Parser::GetBit(obu_stream, offset); + seq_header.operating_points_cnt_minus_1 = Parser::ReadBits(obu_stream, offset, 5); + for (int i = 0; i < seq_header.operating_points_cnt_minus_1 + 1; i++) { + seq_header.operating_point_idc[i] = Parser::ReadBits(obu_stream, offset, 12); + seq_header.seq_level_idx[i] = Parser::ReadBits(obu_stream, offset, 5); + if (seq_header.seq_level_idx[i] > 7) { + seq_header.seq_tier[i] = Parser::GetBit(obu_stream, offset); + } else { + seq_header.seq_tier[i] = 0; + } + if (seq_header.decoder_model_info_present_flag) { + seq_header.decoder_model_present_for_this_op[i] = Parser::GetBit(obu_stream, offset); + if (seq_header.decoder_model_present_for_this_op[i]) { + seq_header.operating_parameters_info[i].decoder_buffer_delay = Parser::ReadBits(obu_stream, offset, seq_header.decoder_model_info.buffer_delay_length_minus_1 + 1); + seq_header.operating_parameters_info[i].encoder_buffer_delay = Parser::ReadBits(obu_stream, offset, seq_header.decoder_model_info.buffer_delay_length_minus_1 + 1); + seq_header.operating_parameters_info[i].low_delay_mode_flag = Parser::GetBit(obu_stream, offset); + } + } else { + seq_header.decoder_model_present_for_this_op[i] = 0; + } + + if (seq_header.initial_display_delay_present_flag) { + seq_header.initial_display_delay_present_for_this_op[i] = Parser::GetBit(obu_stream, offset); + if (seq_header.initial_display_delay_present_for_this_op[i]) { + seq_header.initial_display_delay_minus_1[i] = Parser::ReadBits(obu_stream, offset, 4); + } + } + } + } + seq_header.frame_width_bits_minus_1 = Parser::ReadBits(obu_stream, offset, 4); + seq_header.frame_height_bits_minus_1 = Parser::ReadBits(obu_stream, offset, 4); + seq_header.max_frame_width_minus_1 = Parser::ReadBits(obu_stream, offset, seq_header.frame_width_bits_minus_1 + 1); + seq_header.max_frame_height_minus_1 = Parser::ReadBits(obu_stream, offset, seq_header.frame_height_bits_minus_1 + 1); + if (seq_header.reduced_still_picture_header) { + seq_header.frame_id_numbers_present_flag = 0; + } else { + seq_header.frame_id_numbers_present_flag = Parser::GetBit(obu_stream, offset); + } + if (seq_header.frame_id_numbers_present_flag) { + seq_header.delta_frame_id_length_minus_2 = Parser::ReadBits(obu_stream, offset, 4); + seq_header.additional_frame_id_length_minus_1 = Parser::ReadBits(obu_stream, offset, 3); + } + seq_header.use_128x128_superblock = Parser::GetBit(obu_stream, offset); + seq_header.enable_filter_intra = Parser::GetBit(obu_stream, offset); + seq_header.enable_intra_edge_filter = Parser::GetBit(obu_stream, offset); + + if (seq_header.reduced_still_picture_header) { + seq_header.enable_interintra_compound = 0; + seq_header.enable_masked_compound = 0; + seq_header.enable_warped_motion = 0; + seq_header.enable_dual_filter = 0; + seq_header.enable_order_hint = 0; + seq_header.enable_jnt_comp = 0; + seq_header.enable_ref_frame_mvs = 0; + seq_header.seq_force_screen_content_tools = SELECT_SCREEN_CONTENT_TOOLS; + seq_header.seq_force_integer_mv = SELECT_INTEGER_MV; + seq_header.order_hint_bits = 0; + } else { + seq_header.enable_interintra_compound = Parser::GetBit(obu_stream, offset); + seq_header.enable_masked_compound = Parser::GetBit(obu_stream, offset); + seq_header.enable_warped_motion = Parser::GetBit(obu_stream, offset); + seq_header.enable_dual_filter = Parser::GetBit(obu_stream, offset); + seq_header.enable_order_hint = Parser::GetBit(obu_stream, offset); + if (seq_header.enable_order_hint) { + seq_header.enable_jnt_comp = Parser::GetBit(obu_stream, offset); + seq_header.enable_ref_frame_mvs = Parser::GetBit(obu_stream, offset); + } else { + seq_header.enable_jnt_comp = 0; + seq_header.enable_ref_frame_mvs = 0; + } + seq_header.seq_choose_screen_content_tools = Parser::GetBit(obu_stream, offset); + if (seq_header.seq_choose_screen_content_tools) { + seq_header.seq_force_screen_content_tools = SELECT_SCREEN_CONTENT_TOOLS; + } else { + seq_header.seq_force_screen_content_tools = Parser::GetBit(obu_stream, offset); + } + if (seq_header.seq_force_screen_content_tools > 0) { + seq_header.seq_choose_integer_mv = Parser::GetBit(obu_stream, offset); + if (seq_header.seq_choose_integer_mv) { + seq_header.seq_force_integer_mv = SELECT_INTEGER_MV; + } else { + seq_header.seq_force_integer_mv = Parser::GetBit(obu_stream, offset); + } + } else { + seq_header.seq_force_integer_mv = SELECT_INTEGER_MV; + } + + if (seq_header.enable_order_hint) { + seq_header.order_hint_bits_minus_1 = Parser::ReadBits(obu_stream, offset, 3); + seq_header.order_hint_bits = seq_header.order_hint_bits_minus_1 + 1; + } else { + seq_header.order_hint_bits = 0; + } + } + seq_header.enable_superres = Parser::GetBit(obu_stream, offset); + seq_header.enable_cdef = Parser::GetBit(obu_stream, offset); + seq_header.enable_restoration = Parser::GetBit(obu_stream, offset); + seq_header.color_config.bit_depth = 8; + seq_header.color_config.high_bitdepth = Parser::GetBit(obu_stream, offset); + if (seq_header.seq_profile == 2 && seq_header.color_config.high_bitdepth) { + seq_header.color_config.twelve_bit = Parser::GetBit(obu_stream, offset); + seq_header.color_config.bit_depth = seq_header.color_config.twelve_bit ? 12 : 10; + } else if (seq_header.seq_profile <= 2) { + seq_header.color_config.bit_depth = seq_header.color_config.high_bitdepth ? 10 : 8; + } + bit_depth_ = seq_header.color_config.bit_depth; + if (seq_header.seq_profile >= 0 && seq_header.seq_profile <= 2) { + seq_header_obu_present = 1; + } + break; + } + + case kObuFrameHeader: + frame_header_obu_present = 1; + break; + + case kObuFrame: + frame_obu_present = 1; + break; + + case kObuTileGroup: + tile_group_obu_present = 1; + break; + } + + curr_offset += obu_size; + } + if (syntax_error) { + score = 0; + } else { + score = temporal_delimiter_obu_present * 25 + seq_header_obu_present * 25 + frame_obu_present * 50 + (frame_header_obu_present & tile_group_obu_present) * 50; + } + return score; +} + +int RocVideoESParser::CheckIvfAv1Stream(uint8_t *p_stream, int stream_size) { + static const char *IVF_SIGNATURE = "DKIF"; + static const char *AV1_FourCC = "AV01"; + static const int IvfFileHeaderSize = 32; + static const int IvfFrameHeaderSize = 12; + uint8_t *ptr = p_stream; + int score = 0; + + // bytes 0-3: signature + if (memcmp(IVF_SIGNATURE, ptr, 4) == 0) { + ptr += 4; + // bytes 4-5: version (should be 0). Little Endian. + int ivf_version = ptr[0] | (ptr[1] << 8); + if (ivf_version != 0) { + score = 0; + } else { + ptr += 4; + // bytes 8-11: codec FourCC (e.g., 'AV01') + if (memcmp(AV1_FourCC, ptr, 4)) { + score = 0; + } else { + ptr = p_stream + IvfFileHeaderSize; + int frame_size = ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); + ptr += IvfFrameHeaderSize; + int size = stream_size - IvfFileHeaderSize - IvfFrameHeaderSize; + size = frame_size < size ? frame_size : size; + score = CheckAv1EStream(ptr, size); + } + } + } else { + score = 0; + } + return score; +} \ No newline at end of file diff --git a/src/bit_stream_reader/es_reader.h b/src/bit_stream_reader/es_reader.h new file mode 100644 index 0000000000..ac576db13f --- /dev/null +++ b/src/bit_stream_reader/es_reader.h @@ -0,0 +1,236 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include "rocdecode.h" + +#define BS_RING_SIZE (16 * 1024 * 1024) +#define INIT_PIC_DATA_SIZE (2 * 1024 * 1024) + +enum { + kStreamTypeUnsupported = -1, + kStreamTypeAvcElementary = 0, + kStreamTypeHevcElementary, + kStreamTypeAv1Elementary, + kStreamTypeAv1Ivf, + kStreamTypeNumSupported +} StreamFileType; + +#define STREAM_PROBE_SIZE 2 * 1024 +#define STREAM_TYPE_SCORE_THRESHOLD 50 + +class RocVideoESParser { + public: + RocVideoESParser(const char *input_file_path); + RocVideoESParser(); + ~RocVideoESParser(); + + /*! \brief Function to probe the bitstream file and try to get the codec id + * \retrun Codec id + */ + rocDecVideoCodec GetCodecId(); + + /*! \brief Function to retrieve the bitstream of a picture + * \param [out] p_pic_data Pointer to the picture data + * \param [out] pic_size Size of the picture in bytes + * \param [out] pts Presentation time stamp + */ + int GetPicData(uint8_t **p_pic_data, int *pic_size, int64_t *pts); + + /*! \brief Function to return the bit depth of the stream + */ + int GetBitDepth() {return bit_depth_;}; + + private: + std::ifstream p_stream_file_; + int stream_type_; + int bit_depth_; + + // Bitstream ring buffer + uint8_t bs_ring_[BS_RING_SIZE]; + uint32_t read_ptr_; /// start position of unprocessed stream in the ring + uint32_t write_ptr_; /// end position of unprocessed stream in the ring + bool end_of_file_; + bool end_of_stream_; + int curr_byte_offset_; + // AVC/HEVC + int num_start_code_; + int curr_start_code_offset_; + int next_start_code_offset_; + //int nal_unit_size_; + // AV1 + int obu_byte_offset_; // header offset + int obu_size_; // including header + int num_td_obus_; // number of temporal delimiter OBUs + + // Picture data (linear buffer) + std::vector pic_data_; + int pic_data_size_; + // AVC/HEVC + int curr_pic_end_; + int next_pic_start_; + int num_pictures_; + // AV1 + int num_temp_units_; // number of temporal units + + bool ivf_file_header_read_; // indicator if IVF file header has been checked + + /*! \brief Function to retrieve the bitstream of a picture for AVC/HEVC + * \param [out] p_pic_data Pointer to the picture data + * \param [out] pic_size Size of the picture in bytes + */ + int GetPicDataAvcHevc(uint8_t **p_pic_data, int *pic_size); + + /*! \brief Function to retrieve the bitstream of a temporal unit for AV1 + * \param [out] p_pic_data Pointer to the picture data + * \param [out] pic_size Size of the picture in bytes + */ + int GetPicDataAv1(uint8_t **p_pic_data, int *pic_size); + + /*! \brief Function to retrieve the bitstream of a temporal unit for AV1 from IVF container + * \param [out] p_pic_data Pointer to the picture data + * \param [out] pic_size Size of the picture in bytes + */ + int GetPicDataIvfAv1(uint8_t **p_pic_data, int *pic_size); + + /*! \brief Function to read bitstream from file and fill into the ring buffer. + * \return Number of bytes read from file. + */ + int FetchBitStream(); + + /*! \brief Function to check the remaining data size in the ring buffer + * \return Number of bytes still available in the ring + */ + int GetDataSizeInRB(); + + /*! \brief Function to read one byte from the ring buffer without advancing the read pointer + * \param [in] offset The byte offset to read + * \param [out] data The byte read + * \return True: success; False: no more byte available. + */ + bool GetByte(int offset, uint8_t *data); + + /*! \brief Function to read the specified bytes from the ring buffer without advancing the read pointer + * \param [in] offset The starting byte offset to read + * \param [in] size The numbers of bytes to read + * \param [out] data The bytes read + * \return True: success; False: can not read the set bytes + */ + bool ReadBytes(int offset, int size, uint8_t *data); + + /*! \brief Function to update the read pointer by the set bytes + * \param [in] value The new read pointer value + */ + void SetReadPointer(int value); + + /*! \brief Function to find the start codes from the ring buffer to locate the NAL units + * \return Returns: true: a new start code is found or end of stream reached; false: no start code found. + */ + bool FindStartCode(); + + /*! \brief Function to check if an HEVC NAL is the (first) slice of a picture + * \param [in] start_code_offset Start code location of the NAL unit + * \param [out] slice_flag Slice NAL unit indicator + * \param [out] first_slice_flag First slice indicator + */ + void CheckHevcNalForSlice(int start_code_offset, int *slice_flag, int *first_slice_flag); + + /*! \brief Function to check if an AVC NAL is the (first) slice of a picture + * \param [in] start_code_offset Start code location of the NAL unit + * \param [out] slice_flag Slice NAL unit indicator + * \param [out] first_slice_flag First slice indicator + */ + void CheckAvcNalForSlice(int start_code_offset, int *slice_flag, int *first_slice_flag); + + /*! \brief Function to copy a NAL unit from the bitstream ring buffer to the linear picture data buffer + */ + void CopyNalUnitFromRing(); + + /*! \brief Function to parse an OBU header and size + * \param [out] obu_type Pointer to the returned OBU type + * \return true if success + */ + bool ReadObuHeaderAndSize(int *obu_type); + + /*! \brief Function to copy an OBU from the bitstream ring buffer to the linear picture data buffer + * \return true if success + */ + bool CopyObuFromRing(); + + /*! \brief Function to check the 32 byte stream for IVF file header identity + * \return true if IVF file header is identified; false: otherwise + */ + bool CheckIvfFileHeader(uint8_t *stream); + + /*! \brief Function to probe the bitstream file and try to find if it is one of types supported. + * \return Elementary stream file type + */ + int ProbeStreamType(); + + /*! \brief Function to check the likelihood of a stream to be an AVC elementary stream. + * \param [in] p_stream Pointer to the stream + * \param [in] stream_size Size of the stream in bytes + * \return The likelihood score + */ + int CheckAvcEStream(uint8_t *p_stream, int stream_size); + + /*! \brief Function to check the likelihood of a stream to be an HEVC elementary stream. + * \param [in] p_stream Pointer to the stream + * \param [in] stream_size Size of the stream in bytes + * \return The likelihood score + */ + int CheckHevcEStream(uint8_t *p_stream, int stream_size); + + /*! \brief Function to convert from Encapsulated Byte Sequence Packets to Raw Byte Sequence Payload + * \param [inout] stream_buffer A pointer of uint8_t for the converted RBSP buffer. + * \param [in] begin_bytepos Start position in the EBSP buffer to convert + * \param [in] end_bytepos End position in the EBSP buffer to convert, generally it's size. + * \return Returns the size of the converted buffer + */ + int EbspToRbsp(uint8_t *stream_buffer, int begin_bytepos, int end_bytepos); + + /*! \brief Function to check the likelihood of a stream to be an AV1 elementary stream. + * \param [in] p_stream Pointer to the stream + * \param [in] stream_size Size of the stream in bytes + * \return The likelihood score + */ + int CheckAv1EStream(uint8_t *p_stream, int stream_size); + + /*! \brief Function to check the likelihood of a stream to be an IVF container of AV1 elementary stream. + * \param [in] p_stream Pointer to the stream + * \param [in] stream_size Size of the stream in bytes + * \return The likelihood score + */ + int CheckIvfAv1Stream(uint8_t *p_stream, int stream_size); + + /*! \brief Function to read variable length unsigned n-bit number appearing directly in the bitstream. 4.10.3. uvlc(). + * \param [in] p_stream Bit stream pointer + * \param [in] bit_offset Starting bit offset + * \param [out] bit_offset Updated bit offset + * \return The unsigned value + */ + uint32_t ReadUVLC(const uint8_t *p_stream, size_t &bit_offset); +}; \ No newline at end of file diff --git a/src/bit_stream_reader/roc_bs_reader_api.cpp b/src/bit_stream_reader/roc_bs_reader_api.cpp new file mode 100644 index 0000000000..9855f99534 --- /dev/null +++ b/src/bit_stream_reader/roc_bs_reader_api.cpp @@ -0,0 +1,100 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "../commons.h" +#include "bs_reader_handle.h" + +rocDecStatus ROCDECAPI rocDecCreateBitstreamReader(RocdecBitstreamReader *bs_reader_handle, const char *input_file_path) { + if (bs_reader_handle == nullptr || input_file_path == nullptr) { + return ROCDEC_INVALID_PARAMETER; + } + RocdecBitstreamReader handle = nullptr; + try { + handle = new RocBitstreamReaderHandle(input_file_path); + } + catch (const std::exception& e) { + ERR( STR("Failed to create RocBitstreamReader handle, ") + STR(e.what())) + return ROCDEC_RUNTIME_ERROR; + } + *bs_reader_handle = handle; + return ROCDEC_SUCCESS; +} + +rocDecStatus ROCDECAPI rocDecGetBitstreamCodecType(RocdecBitstreamReader bs_reader_handle, rocDecVideoCodec *codec_type) { + if (bs_reader_handle == nullptr || codec_type == nullptr) { + return ROCDEC_INVALID_PARAMETER; + } + auto roc_bs_reader_handle = static_cast(bs_reader_handle); + rocDecStatus ret; + try { + ret = roc_bs_reader_handle->GetBitstreamCodecType(codec_type); + } + catch (const std::exception& e) { + roc_bs_reader_handle->CaptureError(e.what()); + ERR(e.what()) + return ROCDEC_RUNTIME_ERROR; + } + return ret; +} + +rocDecStatus ROCDECAPI rocDecGetBitstreamBitDepth(RocdecBitstreamReader bs_reader_handle, int *bit_depth) { + if (bs_reader_handle == nullptr || bit_depth == nullptr) { + return ROCDEC_INVALID_PARAMETER; + } + auto roc_bs_reader_handle = static_cast(bs_reader_handle); + rocDecStatus ret; + try { + ret = roc_bs_reader_handle->GetBitstreamBitDepth(bit_depth); + } + catch (const std::exception& e) { + roc_bs_reader_handle->CaptureError(e.what()); + ERR(e.what()) + return ROCDEC_RUNTIME_ERROR; + } + return ret; +} + +rocDecStatus ROCDECAPI rocDecGetBitstreamPicData(RocdecBitstreamReader bs_reader_handle, uint8_t **pic_data, int *pic_size, int64_t *pts) { + if (bs_reader_handle == nullptr || pic_data == nullptr || pic_size == nullptr || pts == nullptr) { + return ROCDEC_INVALID_PARAMETER; + } + auto roc_bs_reader_handle = static_cast(bs_reader_handle); + rocDecStatus ret; + try { + ret = roc_bs_reader_handle->GetBitstreamPicData(pic_data, pic_size, pts); + } + catch (const std::exception& e) { + roc_bs_reader_handle->CaptureError(e.what()); + ERR(e.what()) + return ROCDEC_RUNTIME_ERROR; + } + return ret; +} + +rocDecStatus ROCDECAPI rocDecDestroyBitstreamReader(RocdecBitstreamReader bs_reader_handle) { + if (bs_reader_handle == nullptr) { + return ROCDEC_INVALID_PARAMETER; + } + auto roc_bs_reader_handle = static_cast(bs_reader_handle); + delete roc_bs_reader_handle; + return ROCDEC_SUCCESS; +} diff --git a/test/testScripts/run_rocDecode_Conformance.py b/test/testScripts/run_rocDecode_Conformance.py index 84fed65e5d..dc172ed284 100644 --- a/test/testScripts/run_rocDecode_Conformance.py +++ b/test/testScripts/run_rocDecode_Conformance.py @@ -45,6 +45,8 @@ parser.add_argument('--files_directory', type=str, default='', help='The path to a dirctory containing one or more supported files for decoding (e.g., mp4, mov, etc.) and their corresponding reference MD5 digests - required') parser.add_argument('--results_directory', type=str, default='', help='The path to a dirctory to store results - optional') +parser.add_argument('--use_ffmpeg_demuxer', type=int, default=1, + help='Indicator to use FFMPEG demuxer - optional (default:1). If set to 0, built-in bitstream reader is used.') args = parser.parse_args() @@ -53,6 +55,12 @@ gpuDeviceID = args.gpu_device_id filesDir = args.files_directory videoDecodeEXE = args.videodecode_exe resultsDir = args.results_directory +useFFDemuxer = args.use_ffmpeg_demuxer + +if useFFDemuxer == 1: + bsReaderOption = '' +else: + bsReaderOption = '-no_ffmpeg_demux' print("\nrunrocDecodeTests V"+__version__+"\n") @@ -108,7 +116,7 @@ if streamListSize != md5ListSize: for i in range(streamListSize): streamFilePath = streamFileDir + streamFileList[i] md5FilePath = md5FileDir + md5FileList[i] - os.system(run_rocDecode_app +' -i ' + streamFilePath + ' -md5_check ' + md5FilePath + ' -d ' + str(gpuDeviceID) + ' | tee -a ' + resultsPath + '/rocDecode_output.log') + os.system(run_rocDecode_app +' -i ' + streamFilePath + ' ' + bsReaderOption + ' -md5_check ' + md5FilePath + ' -d ' + str(gpuDeviceID) + ' | tee -a ' + resultsPath + '/rocDecode_output.log') print("======================================================================================\n") fileString = 'Input file' diff --git a/utils/md5.h b/utils/md5.h new file mode 100644 index 0000000000..9bb6886246 --- /dev/null +++ b/utils/md5.h @@ -0,0 +1,161 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +extern "C" { +#include "libavutil/md5.h" +#include "libavutil/mem.h" +} +#include "roc_video_dec.h" + +/*! + * \file + * \brief The MD5 message digest generation utility. + */ + +class MD5Generator { +public: + MD5Generator() {}; + ~MD5Generator() {}; + + /*! \brief Function to start MD5 calculation + */ + void InitMd5() { + md5_ctx_ = av_md5_alloc(); + av_md5_init(md5_ctx_); + } + + /*! \brief Function to update MD5 digest for a device data buffer + * \param [in] data_buf Pointer to the data buffer + * \param [in] buf_size Buffer info + */ + void UpdateMd5ForDataBuffer(void *data_buf, int buf_size) { + uint8_t *hstPtr = nullptr; + hstPtr = new uint8_t[buf_size]; + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void *)hstPtr, data_buf, buf_size); + if (hip_status != hipSuccess) { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; + delete [] hstPtr; + return; + } + av_md5_update(md5_ctx_, hstPtr, buf_size); + if (hstPtr) { + delete [] hstPtr; + } + } + + /*! \brief Function to update MD5 digest for a decoded frame + * \param [in] surf_mem Pointer to surface memory + * \param [in] surf_info Surface info + */ + void UpdateMd5ForFrame(void *surf_mem, OutputSurfaceInfo *surf_info) { + int i; + uint8_t *hst_ptr = nullptr; + uint64_t output_image_size = surf_info->output_surface_size_in_bytes; + if (surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) { + if (hst_ptr == nullptr) { + hst_ptr = new uint8_t [output_image_size]; + } + hipError_t hip_status = hipSuccess; + hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem, output_image_size); + if (hip_status != hipSuccess) { + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; + delete [] hst_ptr; + return; + } + } else + hst_ptr = static_cast (surf_mem); + + // Need to covert interleaved planar to stacked planar, assuming 4:2:0 chroma sampling. + uint8_t *stacked_ptr = new uint8_t [output_image_size]; + uint8_t *tmp_hst_ptr = hst_ptr; + int output_stride = surf_info->output_pitch; + tmp_hst_ptr += (surf_info->disp_rect.top * output_stride) + surf_info->disp_rect.left * surf_info->bytes_per_pixel; + uint8_t *tmp_stacked_ptr = stacked_ptr; + int img_width = surf_info->output_width; + int img_height = surf_info->output_height; + // Luma + if (img_width * surf_info->bytes_per_pixel == output_stride && img_height == surf_info->output_vstride) { + memcpy(stacked_ptr, hst_ptr, img_width * surf_info->bytes_per_pixel * img_height); + } else { + for (i = 0; i < img_height; i++) { + memcpy(tmp_stacked_ptr, tmp_hst_ptr, img_width * surf_info->bytes_per_pixel); + tmp_hst_ptr += output_stride; + tmp_stacked_ptr += img_width * surf_info->bytes_per_pixel; + } + } + // Chroma + int img_width_chroma = img_width >> 1; + tmp_hst_ptr = hst_ptr + output_stride * surf_info->output_vstride; + if (surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) { + tmp_hst_ptr += ((surf_info->disp_rect.top >> 1) * output_stride) + (surf_info->disp_rect.left * surf_info->bytes_per_pixel); + } + tmp_stacked_ptr = stacked_ptr + img_width * surf_info->bytes_per_pixel * img_height; // Cb + uint8_t *tmp_stacked_ptr_v = tmp_stacked_ptr + img_width_chroma * surf_info->bytes_per_pixel * surf_info->chroma_height; // Cr + for (i = 0; i < surf_info->chroma_height; i++) { + for ( int j = 0; j < img_width_chroma; j++) { + uint8_t *src_ptr, *dst_ptr; + // Cb + src_ptr = &tmp_hst_ptr[j * surf_info->bytes_per_pixel * 2]; + dst_ptr = &tmp_stacked_ptr[j * surf_info->bytes_per_pixel]; + memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); + // Cr + src_ptr += surf_info->bytes_per_pixel; + dst_ptr = &tmp_stacked_ptr_v[j * surf_info->bytes_per_pixel]; + memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); + } + tmp_hst_ptr += output_stride; + tmp_stacked_ptr += img_width_chroma * surf_info->bytes_per_pixel; + tmp_stacked_ptr_v += img_width_chroma * surf_info->bytes_per_pixel; + } + + int img_size = img_width * surf_info->bytes_per_pixel * (img_height + surf_info->chroma_height); + // For 10 bit, convert from P010 to LSB to match reference decoder output + if (surf_info->bytes_per_pixel == 2) { + uint16_t *ptr = reinterpret_cast (stacked_ptr); + for (i = 0; i < img_size / 2; i++) { + ptr[i] = ptr[i] >> 6; + } + } + + av_md5_update(md5_ctx_, stacked_ptr, img_size); + if (hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) { + delete [] hst_ptr; + } + delete [] stacked_ptr; + } + + /*! \brief Function to complete MD5 calculation + * \param [out] digest Pointer to the 16 byte message digest + */ + void FinalizeMd5(uint8_t **digest) { + av_md5_final(md5_ctx_, md5_digest_); + av_freep(&md5_ctx_); + *digest = md5_digest_; + } + +private: + struct AVMD5 *md5_ctx_; + uint8_t md5_digest_[16]; +}; \ No newline at end of file diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index 066558b52e..9fd45b799d 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -381,6 +381,8 @@ int RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) { output_surface_info_.output_height = target_height_; output_surface_info_.output_pitch = surface_stride_; output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) ? surface_vstride_ : videoDecodeCreateInfo.target_height; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; output_surface_info_.bytes_per_pixel = byte_per_pixel_; output_surface_info_.surface_format = video_surface_format_; @@ -541,6 +543,8 @@ int RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat *p_video_format) { output_surface_info_.output_height = target_height_; output_surface_info_.output_pitch = surface_stride_; output_surface_info_.output_vstride = (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) ? surface_vstride_ : target_height_; + output_surface_info_.disp_rect = disp_rect_; + output_surface_info_.chroma_height = chroma_height_; output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; output_surface_info_.bytes_per_pixel = byte_per_pixel_; output_surface_info_.surface_format = video_surface_format_; @@ -1011,113 +1015,6 @@ void RocVideoDecoder::ResetSaveFrameToFile() { } } -void RocVideoDecoder::InitMd5() { - md5_ctx_ = av_md5_alloc(); - av_md5_init(md5_ctx_); -} - -void RocVideoDecoder::UpdateMd5ForDataBuffer(void *pDevMem, int rgb_image_size){ - uint8_t *hstPtr = nullptr; - hstPtr = new uint8_t [rgb_image_size]; - hipError_t hip_status = hipSuccess; - hip_status = hipMemcpyDtoH((void *)hstPtr, pDevMem, rgb_image_size); - if (hip_status != hipSuccess) { - std::cout << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; - delete [] hstPtr; - return; - } - av_md5_update(md5_ctx_, hstPtr, rgb_image_size); - if(hstPtr){ - delete [] hstPtr; - } -} - -void RocVideoDecoder::UpdateMd5ForFrame(void *surf_mem, OutputSurfaceInfo *surf_info) { - int i; - uint8_t *hst_ptr = nullptr; - uint64_t output_image_size = surf_info->output_surface_size_in_bytes; - if (surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL || surf_info->mem_type == OUT_SURFACE_MEM_DEV_COPIED) { - if (hst_ptr == nullptr) { - hst_ptr = new uint8_t [output_image_size]; - } - hipError_t hip_status = hipSuccess; - hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem, output_image_size); - if (hip_status != hipSuccess) { - std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; - delete [] hst_ptr; - return; - } - } else - hst_ptr = static_cast (surf_mem); - - // Need to covert interleaved planar to stacked planar, assuming 4:2:0 chroma sampling. - uint8_t *stacked_ptr = new uint8_t [output_image_size]; - - uint8_t *tmp_hst_ptr = hst_ptr; - int output_stride = surf_info->output_pitch; - tmp_hst_ptr += (disp_rect_.top * output_stride) + disp_rect_.left * surf_info->bytes_per_pixel; - uint8_t *tmp_stacked_ptr = stacked_ptr; - int img_width = surf_info->output_width; - int img_height = surf_info->output_height; - // Luma - if (img_width * surf_info->bytes_per_pixel == output_stride && img_height == surf_info->output_vstride) { - memcpy(stacked_ptr, hst_ptr, img_width * surf_info->bytes_per_pixel * img_height); - } else { - for (i = 0; i < img_height; i++) { - memcpy(tmp_stacked_ptr, tmp_hst_ptr, img_width * surf_info->bytes_per_pixel); - tmp_hst_ptr += output_stride; - tmp_stacked_ptr += img_width * surf_info->bytes_per_pixel; - } - } - // Chroma - int img_width_chroma = img_width >> 1; - tmp_hst_ptr = hst_ptr + output_stride * surf_info->output_vstride; - if (surf_info->mem_type == OUT_SURFACE_MEM_DEV_INTERNAL) { - tmp_hst_ptr += ((disp_rect_.top >> 1) * output_stride) + (disp_rect_.left * surf_info->bytes_per_pixel); - } - tmp_stacked_ptr = stacked_ptr + img_width * surf_info->bytes_per_pixel * img_height; // Cb - uint8_t *tmp_stacked_ptr_v = tmp_stacked_ptr + img_width_chroma * surf_info->bytes_per_pixel * chroma_height_; // Cr - for (i = 0; i < chroma_height_; i++) { - for ( int j = 0; j < img_width_chroma; j++) { - uint8_t *src_ptr, *dst_ptr; - // Cb - src_ptr = &tmp_hst_ptr[j * surf_info->bytes_per_pixel * 2]; - dst_ptr = &tmp_stacked_ptr[j * surf_info->bytes_per_pixel]; - memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); - // Cr - src_ptr += surf_info->bytes_per_pixel; - dst_ptr = &tmp_stacked_ptr_v[j * surf_info->bytes_per_pixel]; - memcpy(dst_ptr, src_ptr, surf_info->bytes_per_pixel); - } - tmp_hst_ptr += output_stride; - tmp_stacked_ptr += img_width_chroma * surf_info->bytes_per_pixel; - tmp_stacked_ptr_v += img_width_chroma * surf_info->bytes_per_pixel; - } - - int img_size = img_width * surf_info->bytes_per_pixel * (img_height + chroma_height_); - - // For 10 bit, convert from P010 to little endian to match reference decoder output - if (surf_info->bytes_per_pixel == 2) { - uint16_t *ptr = reinterpret_cast (stacked_ptr); - for (i = 0; i < img_size / 2; i++) { - ptr[i] = ptr[i] >> 6; - } - } - - av_md5_update(md5_ctx_, stacked_ptr, img_size); - - if (hst_ptr && (surf_info->mem_type != OUT_SURFACE_MEM_HOST_COPIED)) { - delete [] hst_ptr; - } - delete [] stacked_ptr; -} - -void RocVideoDecoder::FinalizeMd5(uint8_t **digest) { - av_md5_final(md5_ctx_, md5_digest_); - av_freep(&md5_ctx_); - *digest = md5_digest_; -} - void RocVideoDecoder::GetDeviceinfo(std::string &device_name, std::string &gcn_arch_name, int &pci_bus_id, int &pci_domain_id, int &pci_device_id) { device_name = hip_dev_prop_.name; gcn_arch_name = hip_dev_prop_.gcnArchName; diff --git a/utils/rocvideodecode/roc_video_dec.h b/utils/rocvideodecode/roc_video_dec.h index 21546d3ce2..f33791c8e4 100644 --- a/utils/rocvideodecode/roc_video_dec.h +++ b/utils/rocvideodecode/roc_video_dec.h @@ -37,10 +37,6 @@ THE SOFTWARE. #include #include #include -extern "C" { -#include "libavutil/md5.h" -#include "libavutil/mem.h" -} #include "rocdecode.h" #include "rocparser.h" @@ -181,16 +177,18 @@ typedef struct DecFrameBuffer_ { typedef struct OutputSurfaceInfoType { - uint32_t output_width; /**< Output width of decoded surface*/ - uint32_t output_height; /**< Output height of decoded surface*/ - uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be inferred based on chromaFormat*/ - uint32_t output_vstride; /**< Output vertical stride in case of using internal mem pointer **/ - uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/ - uint32_t bit_depth; /**< Output BitDepth of the image*/ - uint32_t num_chroma_planes; /**< Output Chroma number of planes*/ - uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both luma and chroma planes*/ - rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/ - OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/ + uint32_t output_width; /**< Output width of decoded surface*/ + uint32_t output_height; /**< Output height of decoded surface*/ + uint32_t output_pitch; /**< Output pitch in bytes of luma plane, chroma pitch can be inferred based on chromaFormat*/ + uint32_t output_vstride; /**< Output vertical stride in case of using internal mem pointer **/ + uint32_t chroma_height; /**< Chroma plane height **/ + Rect disp_rect; /**< Display area **/ + uint32_t bytes_per_pixel; /**< Output BytesPerPixel of decoded image*/ + uint32_t bit_depth; /**< Output BitDepth of the image*/ + uint32_t num_chroma_planes; /**< Output Chroma number of planes*/ + uint64_t output_surface_size_in_bytes; /**< Output Image Size in Bytes; including both luma and chroma planes*/ + rocDecVideoSurfaceFormat surface_format; /**< Chroma format of the decoded image*/ + OutputSurfaceMemoryType mem_type; /**< Output mem_type of the surface*/ } OutputSurfaceInfo; typedef struct ReconfigParams_t { @@ -371,27 +369,6 @@ class RocVideoDecoder { */ virtual void ResetSaveFrameToFile(); - /** - * @brief Helper function to start MD5 calculation - */ - void InitMd5(); - - void UpdateMd5ForDataBuffer(void *pDevMem, int rgb_image_size); - - /** - * @brief Helper function to dump decoded output surface to file - * - * @param dev_mem - pointer to surface memory - * @param surf_info - surface info - */ - void UpdateMd5ForFrame(void *surf_mem, OutputSurfaceInfo *surf_info); - - /** - * @brief Helper function to complete MD5 calculation - * - * @param [out] digest Pointer to the 16 byte message digest - */ - void FinalizeMd5(uint8_t **digest); /** * @brief Get the Num Of Flushed Frames from video decoder object * @@ -542,8 +519,6 @@ class RocVideoDecoder { Rect crop_rect_ = {}; // user specified region of interest within diplayable area disp_rect_ FILE *fp_sei_ = NULL; FILE *fp_out_ = NULL; - struct AVMD5 *md5_ctx_; - uint8_t md5_digest_[16]; bool is_decoder_reconfigured_ = false; std::string current_output_filename = ""; uint32_t extra_output_file_count_ = 0;