From b51ce09f9bcd7027eee86ca90dc9a0260b477a56 Mon Sep 17 00:00:00 2001 From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com> Date: Thu, 19 Oct 2023 14:07:36 -0400 Subject: [PATCH] Jj/hevc decode wip (#18) * rocDecode/HEVC parsing: Implemented correct logic to set active VPS, SPS and PPS: start from slice header and up. Added video image size change detection. Fixed slice_segment_address bits calculation in slice segment header parsing. * rocDecode/HEVC: Changed variable naming to Google style. [ROCm/rocdecode commit: 94ff35e03c0b6d42fdbf3ecf92a76292d3ac0e98] --- projects/rocdecode/src/parser/hevc_parser.cpp | 99 +++++++++++-------- projects/rocdecode/src/parser/hevc_parser.h | 8 +- .../rocdecode/src/parser/roc_video_parser.cpp | 8 +- .../rocdecode/src/parser/roc_video_parser.h | 6 +- 4 files changed, 72 insertions(+), 49 deletions(-) diff --git a/projects/rocdecode/src/parser/hevc_parser.cpp b/projects/rocdecode/src/parser/hevc_parser.cpp index 23070d971c..887c099e78 100644 --- a/projects/rocdecode/src/parser/hevc_parser.cpp +++ b/projects/rocdecode/src/parser/hevc_parser.cpp @@ -23,6 +23,9 @@ THE SOFTWARE. #include "hevc_parser.h" HEVCVideoParser::HEVCVideoParser() { + m_active_vps_id_ = -1; + m_active_sps_id_ = -1; + m_active_pps_id_ = -1; b_new_picture_ = false; m_vps_ = NULL; m_sps_ = NULL; @@ -131,9 +134,6 @@ HEVCVideoParser::SliceHeaderData* HEVCVideoParser::AllocSliceHeader() { } ParserResult HEVCVideoParser::Init() { - m_active_vps_ = 0; - m_active_sps_ = 0; - m_active_pps_ = 0; b_new_picture_ = false; m_vps_ = AllocVps(); m_sps_ = AllocSps(); @@ -204,7 +204,7 @@ bool HEVCVideoParser::ParseFrameData(const uint8_t* p_stream, uint32_t frame_dat case NAL_UNIT_CODED_SLICE_BLA_N_LP: case NAL_UNIT_CODED_SLICE_IDR_W_RADL: case NAL_UNIT_CODED_SLICE_IDR_N_LP: - case NAL_UNIT_CODED_SLICE_CRA: + case NAL_UNIT_CODED_SLICE_CRA_NUT: case NAL_UNIT_CODED_SLICE_RADL_N: case NAL_UNIT_CODED_SLICE_RADL_R: case NAL_UNIT_CODED_SLICE_RASL_N: @@ -288,6 +288,7 @@ void HEVCVideoParser::ParsePtl(H265ProfileTierLevel *ptl, bool profile_present_f ptl->general_frame_only_constraint_flag = Parser::GetBit(nalu, offset); //ReadBits is limited to 32 offset += 44; + // Todo: add constrant flags parsing. } ptl->general_level_idc = Parser::ReadBits(nalu, offset, 8); @@ -657,7 +658,7 @@ void HEVCVideoParser::ParseVps(uint8_t *nalu, size_t size) { void HEVCVideoParser::ParseSps(uint8_t *nalu, size_t size) { size_t offset = 0; - m_active_vps_ = Parser::ReadBits(nalu, offset, 4); + uint32_t vps_id = Parser::ReadBits(nalu, offset, 4); uint32_t max_sub_layer_minus1 = Parser::ReadBits(nalu, offset, 3); uint32_t sps_temporal_id_nesting_flag = Parser::GetBit(nalu, offset); H265ProfileTierLevel ptl; @@ -665,7 +666,7 @@ void HEVCVideoParser::ParseSps(uint8_t *nalu, size_t size) { ParsePtl(&ptl, true, max_sub_layer_minus1, nalu, size, offset); uint32_t sps_id = Parser::ExpGolomb::ReadUe(nalu, offset); memset(&m_sps_[sps_id], 0, sizeof(m_sps_[sps_id])); - m_sps_[sps_id].sps_video_parameter_set_id = m_active_vps_; + m_sps_[sps_id].sps_video_parameter_set_id = vps_id; m_sps_[sps_id].sps_max_sub_layers_minus1 = max_sub_layer_minus1; m_sps_[sps_id].sps_temporal_id_nesting_flag = sps_temporal_id_nesting_flag; memcpy (&m_sps_[sps_id].profile_tier_level, &ptl, sizeof(ptl)); @@ -769,9 +770,7 @@ void HEVCVideoParser::ParsePps(uint8_t *nalu, size_t size) { memset(&m_pps_[pps_id], 0, sizeof(m_pps_[pps_id])); m_pps_[pps_id].pps_pic_parameter_set_id = pps_id; - m_active_sps_ = Parser::ExpGolomb::ReadUe(nalu, offset); - - m_pps_[pps_id].pps_seq_parameter_set_id = m_active_sps_; + m_pps_[pps_id].pps_seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nalu, offset); m_pps_[pps_id].dependent_slice_segments_enabled_flag = Parser::GetBit(nalu, offset); m_pps_[pps_id].output_flag_present_flag = Parser::GetBit(nalu, offset); m_pps_[pps_id].num_extra_slice_header_bits = Parser::ReadBits(nalu, offset, 3); @@ -833,44 +832,58 @@ void HEVCVideoParser::ParsePps(uint8_t *nalu, size_t size) { } bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, size_t size) { + PpsData *pps_ptr = NULL; + SpsData *sps_ptr = NULL; size_t offset = 0; SliceHeaderData temp_sh; memset(&temp_sh, 0, sizeof(temp_sh)); temp_sh.first_slice_segment_in_pic_flag = m_sh_->first_slice_segment_in_pic_flag = Parser::GetBit(nalu, offset); - if (nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL - || nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_N_LP - || nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_N_LP - || nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_W_RADL - || nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_W_LP - || nal_unit_type == NAL_UNIT_CODED_SLICE_CRA - ) { + if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type <= NAL_UNIT_RESERVED_IRAP_VCL23) { temp_sh.no_output_of_prior_pics_flag = m_sh_->no_output_of_prior_pics_flag = Parser::GetBit(nalu, offset); } - m_active_pps_ = Parser::ExpGolomb::ReadUe(nalu, offset); - temp_sh.slice_pic_parameter_set_id = m_sh_->slice_pic_parameter_set_id = m_active_pps_; + // Set active VPS, SPS and PPS for the current slice + m_active_pps_id_ = Parser::ExpGolomb::ReadUe(nalu, offset); + temp_sh.slice_pic_parameter_set_id = m_sh_->slice_pic_parameter_set_id = m_active_pps_id_; + pps_ptr = &m_pps_[m_active_pps_id_]; + m_active_sps_id_ = pps_ptr->pps_seq_parameter_set_id; + sps_ptr = &m_sps_[m_active_sps_id_]; + m_active_vps_id_ = sps_ptr->sps_video_parameter_set_id; + + // Check video dimension change + if ( pic_width_ != sps_ptr->pic_width_in_luma_samples || pic_height_ != sps_ptr->pic_height_in_luma_samples) + { + pic_width_ = sps_ptr->pic_width_in_luma_samples; + pic_height_ = sps_ptr->pic_height_in_luma_samples; + pic_dimension_changed_ = true; // Note: clear this flag after the actions with size change are taken. + } + if (!m_sh_->first_slice_segment_in_pic_flag) { - if (m_pps_[m_active_pps_].dependent_slice_segments_enabled_flag) { + if (pps_ptr->dependent_slice_segments_enabled_flag) { temp_sh.dependent_slice_segment_flag = m_sh_->dependent_slice_segment_flag = Parser::GetBit(nalu, offset); } - int num_ctus = 0; - int max_parts = (1 << (m_sps_[m_active_sps_].max_cu_depth << 1)); - int bits_slice_segment_address = 0; - while(num_ctus > (1 << bits_slice_segment_address)) { - bits_slice_segment_address++; - } - temp_sh.slice_segment_address = m_sh_->slice_segment_address = Parser::ReadBits(nalu, offset, bits_slice_segment_address); + + int min_cb_log2_size_y = sps_ptr->log2_min_luma_coding_block_size_minus3 + 3; // MinCbLog2SizeY + int ctb_log2_size_y = min_cb_log2_size_y + sps_ptr->log2_diff_max_min_luma_coding_block_size; // CtbLog2SizeY + int ctb_size_y = 1 << ctb_log2_size_y; // CtbSizeY + int pic_width_in_ctbs_y = (sps_ptr->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y; // PicWidthInCtbsY + int pic_height_in_ctbs_y = (sps_ptr->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y; // PicHeightInCtbsY + int pic_size_in_ctbs_y = pic_width_in_ctbs_y * pic_height_in_ctbs_y; // PicSizeInCtbsY + int bits_slice_segment_address = (int)ceilf(log2f((float)pic_size_in_ctbs_y)); + + temp_sh.slice_segment_address = m_sh_->slice_segment_address = Parser::ReadBits(nalu, offset, bits_slice_segment_address); } + if (!m_sh_->dependent_slice_segment_flag) { - for (int i = 0; i < m_pps_[m_active_pps_].num_extra_slice_header_bits; i++) { + for (int i = 0; i < pps_ptr->num_extra_slice_header_bits; i++) { m_sh_->slice_reserved_flag[i] = Parser::GetBit(nalu, offset); } m_sh_->slice_type = Parser::ExpGolomb::ReadUe(nalu, offset); - if (m_pps_[m_active_pps_].output_flag_present_flag) { + if (pps_ptr->output_flag_present_flag) { m_sh_->pic_output_flag = Parser::GetBit(nalu, offset); } - if (m_sps_[m_active_sps_].separate_colour_plane_flag) { + if (sps_ptr->separate_colour_plane_flag) { m_sh_->colour_plane_id = Parser::ReadBits(nalu, offset, 2); } if (nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_N_LP) { @@ -883,13 +896,13 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si } else { //length of slice_pic_order_cnt_lsb is log2_max_pic_order_cnt_lsb_minus4 + 4 bits. - m_sh_->slice_pic_order_cnt_lsb = Parser::ReadBits(nalu, offset, (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4)); + m_sh_->slice_pic_order_cnt_lsb = Parser::ReadBits(nalu, offset, (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4)); //get POC m_slice_->curr_poc_lsb = m_sh_->slice_pic_order_cnt_lsb; - m_slice_->max_poc_lsb = 1 << (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4); + m_slice_->max_poc_lsb = 1 << (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4); - if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type < NAL_UNIT_CODED_SLICE_CRA) { + if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type < NAL_UNIT_CODED_SLICE_CRA_NUT) { m_slice_->curr_poc_msb = 0; } else { @@ -909,11 +922,11 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si m_sh_->short_term_ref_pic_set_sps_flag = Parser::GetBit(nalu, offset); int32_t pos = offset; if (!m_sh_->short_term_ref_pic_set_sps_flag) { - ParseShortTermRefPicSet(&m_sh_->st_rps, m_sps_[m_active_sps_].num_short_term_ref_pic_sets, m_sps_[m_active_sps_].num_short_term_ref_pic_sets, m_sps_[m_active_sps_].st_rps, nalu, size, offset); + ParseShortTermRefPicSet(&m_sh_->st_rps, sps_ptr->num_short_term_ref_pic_sets, sps_ptr->num_short_term_ref_pic_sets, sps_ptr->st_rps, nalu, size, offset); } - else if (m_sps_[m_active_sps_].num_short_term_ref_pic_sets > 1) { + else if (sps_ptr->num_short_term_ref_pic_sets > 1) { int num_bits = 0; - while ((1 << num_bits) < m_sps_[m_active_sps_].num_short_term_ref_pic_sets) { + while ((1 << num_bits) < sps_ptr->num_short_term_ref_pic_sets) { num_bits++; } if (num_bits > 0) { @@ -922,29 +935,29 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si } m_sh_->short_term_ref_pic_set_size = offset - pos; - if (m_sps_[m_active_sps_].long_term_ref_pics_present_flag) { - if (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > 0) { + if (sps_ptr->long_term_ref_pics_present_flag) { + if (sps_ptr->num_long_term_ref_pics_sps > 0) { m_sh_->num_long_term_sps = Parser::ExpGolomb::ReadUe(nalu, offset); } m_sh_->num_long_term_pics = Parser::ExpGolomb::ReadUe(nalu, offset); int bits_for_ltrp_in_sps = 0; - while (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > (1 << bits_for_ltrp_in_sps)) { + while (sps_ptr->num_long_term_ref_pics_sps > (1 << bits_for_ltrp_in_sps)) { bits_for_ltrp_in_sps++; } m_sh_->lt_rps.num_of_pics = m_sh_->num_long_term_sps + m_sh_->num_long_term_pics; for (int i = 0; i < (m_sh_->num_long_term_sps + m_sh_->num_long_term_pics); i++) { if (i < m_sh_->num_long_term_sps) { - if (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > 1) { + if (sps_ptr->num_long_term_ref_pics_sps > 1) { if( bits_for_ltrp_in_sps > 0) { m_sh_->lt_idx_sps[i] = Parser::ReadBits(nalu, offset, bits_for_ltrp_in_sps); - m_sh_->lt_rps.pocs[i] = m_sps_[m_active_sps_].lt_rps.pocs[m_sh_->lt_idx_sps[i]]; - m_sh_->lt_rps.used_by_curr_pic[i] = m_sps_[m_active_sps_].lt_rps.used_by_curr_pic[m_sh_->lt_idx_sps[i]]; + m_sh_->lt_rps.pocs[i] = sps_ptr->lt_rps.pocs[m_sh_->lt_idx_sps[i]]; + m_sh_->lt_rps.used_by_curr_pic[i] = sps_ptr->lt_rps.used_by_curr_pic[m_sh_->lt_idx_sps[i]]; } } } else { - m_sh_->poc_lsb_lt[i] = Parser::ReadBits(nalu, offset, (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4)); + m_sh_->poc_lsb_lt[i] = Parser::ReadBits(nalu, offset, (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4)); m_sh_->used_by_curr_pic_lt_flag[i] = Parser::GetBit(nalu, offset); m_sh_->lt_rps.pocs[i] = m_sh_->poc_lsb_lt[i]; m_sh_->lt_rps.used_by_curr_pic[i] = m_sh_->used_by_curr_pic_lt_flag[i]; @@ -955,7 +968,7 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si } } } - if (m_sps_[m_active_sps_].sps_temporal_mvp_enabled_flag) { + if (sps_ptr->sps_temporal_mvp_enabled_flag) { m_sh_->slice_temporal_mvp_enabled_flag = Parser::GetBit(nalu, offset); } } diff --git a/projects/rocdecode/src/parser/hevc_parser.h b/projects/rocdecode/src/parser/hevc_parser.h index b14364fa82..89fecebbe0 100644 --- a/projects/rocdecode/src/parser/hevc_parser.h +++ b/projects/rocdecode/src/parser/hevc_parser.h @@ -93,7 +93,7 @@ protected: NAL_UNIT_CODED_SLICE_BLA_N_LP, // 18 NAL_UNIT_CODED_SLICE_IDR_W_RADL, // 19 NAL_UNIT_CODED_SLICE_IDR_N_LP, // 20 - NAL_UNIT_CODED_SLICE_CRA, // 21 + NAL_UNIT_CODED_SLICE_CRA_NUT, // 21 NAL_UNIT_RESERVED_IRAP_VCL22, NAL_UNIT_RESERVED_IRAP_VCL23, @@ -568,9 +568,9 @@ protected: size_t EBSPtoRBSP(uint8_t *stream_buffer, size_t begin_bytepos, size_t end_bytepos); // Data members of HEVC class - uint32_t m_active_vps_; - uint32_t m_active_sps_; - uint32_t m_active_pps_; + int32_t m_active_vps_id_; + int32_t m_active_sps_id_; + int32_t m_active_pps_id_; VpsData* m_vps_; SpsData* m_sps_; PpsData* m_pps_; diff --git a/projects/rocdecode/src/parser/roc_video_parser.cpp b/projects/rocdecode/src/parser/roc_video_parser.cpp index ea869935ae..8f588111ab 100644 --- a/projects/rocdecode/src/parser/roc_video_parser.cpp +++ b/projects/rocdecode/src/parser/roc_video_parser.cpp @@ -22,6 +22,12 @@ THE SOFTWARE. #include "roc_video_parser.h" +RocVideoParser::RocVideoParser() { + pic_width_ = 0; + pic_height_ = 0; + pic_dimension_changed_ = false; +} + /** * @brief Initializes any parser related stuff for all parsers * @@ -39,6 +45,6 @@ rocDecStatus RocVideoParser::Initialize(RocdecParserParams *pParams) { pfn_get_sei_message_cb_ = pParams->pfnGetSEIMsg; /**< Called when all SEI messages are parsed for particular frame */ parser_params_ = pParams; - + return ROCDEC_SUCCESS; } diff --git a/projects/rocdecode/src/parser/roc_video_parser.h b/projects/rocdecode/src/parser/roc_video_parser.h index 5a190aa277..64a123eb1f 100644 --- a/projects/rocdecode/src/parser/roc_video_parser.h +++ b/projects/rocdecode/src/parser/roc_video_parser.h @@ -32,7 +32,7 @@ THE SOFTWARE. */ class RocVideoParser { public: - RocVideoParser() {}; // default constructor + RocVideoParser(); // default constructor RocVideoParser(RocdecParserParams *pParams) : parser_params_(pParams) {}; virtual ~RocVideoParser() = default ; virtual void SetParserParams(RocdecParserParams *pParams) { parser_params_ = pParams; }; @@ -50,6 +50,10 @@ protected: PFNVIDDECODECALLBACK pfn_decode_picture_cb_; /**< Called when a picture is ready to be decoded (decode order) */ PFNVIDDISPLAYCALLBACK pfn_display_picture_cb_; /**< Called whenever a picture is ready to be displayed (display order) */ PFNVIDSEIMSGCALLBACK pfn_get_sei_message_cb_; /**< Called when all SEI messages are parsed for particular frame */ + + uint32_t pic_width_; + uint32_t pic_height_; + bool pic_dimension_changed_; }; enum ParserSeekOrigin {