From 5a4735ea8794c76df40e66cb2535fd2d6f67bc07 Mon Sep 17 00:00:00 2001
From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com>
Date: Fri, 19 Apr 2024 16:51:41 -0400
Subject: [PATCH] * rocDecode/AVC: Added support for gaps in frame_num. (#319)
- Fixed decode failure of conformance stream MR3_TANDBERG_B.264.
[ROCm/rocdecode commit: a8bcc1e62f181c839d3cadd6ef86e595eb438955]
---
projects/rocdecode/src/parser/avc_defines.h | 2 +-
projects/rocdecode/src/parser/avc_parser.cpp | 217 ++++++++++++++++++-
projects/rocdecode/src/parser/avc_parser.h | 6 +
3 files changed, 220 insertions(+), 5 deletions(-)
diff --git a/projects/rocdecode/src/parser/avc_defines.h b/projects/rocdecode/src/parser/avc_defines.h
index 5bb13eeb3a..7465fa9c8d 100644
--- a/projects/rocdecode/src/parser/avc_defines.h
+++ b/projects/rocdecode/src/parser/avc_defines.h
@@ -33,7 +33,7 @@ THE SOFTWARE.
#define AVC_MAX_REF_FRAME_NUM 16
#define AVC_MAX_REF_PICTURE_NUM 32
-#define AVC_MAX_DPB_FRAMES 16
+#define AVC_MAX_DPB_FRAMES 18
#define AVC_MACRO_BLOCK_SIZE 16
diff --git a/projects/rocdecode/src/parser/avc_parser.cpp b/projects/rocdecode/src/parser/avc_parser.cpp
index e3858c586e..b4285e3a18 100644
--- a/projects/rocdecode/src/parser/avc_parser.cpp
+++ b/projects/rocdecode/src/parser/avc_parser.cpp
@@ -29,6 +29,7 @@ AvcVideoParser::AvcVideoParser() {
prev_top_field_order_cnt_ = 0;
prev_frame_num_offset_ = 0;
prev_frame_num_ = 0;
+ prev_ref_frame_num_ = 0;
prev_has_mmco_5_ = 0;
curr_has_mmco_5_ = 0;
prev_ref_pic_bottom_field_ = 0;
@@ -80,6 +81,13 @@ rocDecStatus AvcVideoParser::ParseVideoData(RocdecSourceDataPacket *p_data) {
return ROCDEC_SUCCESS;
}
+ // Output decoded pictures from DPB if any are ready in case of frame_num gaps.
+ if (pfn_display_picture_cb_ && dpb_buffer_.num_output_pics > 0) {
+ if (OutputDecodedPictures() != PARSER_OK) {
+ return ROCDEC_RUNTIME_ERROR;
+ }
+ }
+
// Decode the picture
if (SendPicForDecode() != PARSER_OK) {
ERR(STR("Failed to decode!"));
@@ -190,6 +198,8 @@ ParserResult AvcVideoParser::ParsePictureData(const uint8_t *p_stream, uint32_t
// This is to consider the possibility of non-slice NAL units between slices.
pic_stream_data_size_ = pic_data_size - curr_start_code_offset_;
+ // Decode gaps in frame_num if needed (8.2.5.2)
+ DecodeFrameNumGaps();
// Set current picture properties
CalculateCurrPoc(); // 8.2.1
@@ -475,7 +485,7 @@ ParserResult AvcVideoParser::SendPicForDecode() {
}
}
- for (i = buf_index; i < AVC_MAX_DPB_FRAMES; i++) {
+ for (i = buf_index; i < 16; i++) {
p_pic_param->ref_frames[i].pic_idx = 0xFF;
p_pic_param->ref_frames[i].frame_idx = 0;
p_pic_param->ref_frames[i].flags = RocdecAvcPicture_FLAGS_INVALID;
@@ -1137,7 +1147,7 @@ ParserResult AvcVideoParser::ParseSliceHeader(uint8_t *p_stream, size_t stream_s
active_sps_id_ = p_pps->seq_parameter_set_id;
p_sps = &sps_list_[active_sps_id_];
// Re-set DPB size.
- dpb_buffer_.dpb_size = p_sps->max_num_ref_frames + 2;
+ dpb_buffer_.dpb_size = p_sps->max_num_ref_frames + 3;
dpb_buffer_.dpb_size = dpb_buffer_.dpb_size > AVC_MAX_DPB_FRAMES ? AVC_MAX_DPB_FRAMES : dpb_buffer_.dpb_size;
new_sps_activated_ = true; // Note: clear this flag after the actions are taken.
}
@@ -1151,7 +1161,7 @@ ParserResult AvcVideoParser::ParseSliceHeader(uint8_t *p_stream, size_t stream_s
pic_height_ = curr_pic_height;
// Take care of the case where a new SPS replaces the old SPS with the same id but with different dimensions
// Re-set DPB size.
- dpb_buffer_.dpb_size = p_sps->max_num_ref_frames + 2;
+ dpb_buffer_.dpb_size = p_sps->max_num_ref_frames + 3;
dpb_buffer_.dpb_size = dpb_buffer_.dpb_size > AVC_MAX_DPB_FRAMES ? AVC_MAX_DPB_FRAMES : dpb_buffer_.dpb_size;
new_sps_activated_ = true; // Note: clear this flag after the actions are taken.
}
@@ -1736,6 +1746,204 @@ void AvcVideoParser::CalculateCurrPoc() {
}
}
+// 8.2.5.2 Decoding process for gaps in frame_num
+ParserResult AvcVideoParser::DecodeFrameNumGaps() {
+ AvcSeqParameterSet *p_sps = &sps_list_[active_sps_id_];
+ if (p_sps->gaps_in_frame_num_value_allowed_flag == 0) {
+ return PARSER_OK;
+ }
+
+ AvcSliceHeader *p_slice_header = &slice_info_list_[0].slice_header;
+ int max_frame_num = 1 << (p_sps->log2_max_frame_num_minus4 + 4); // MaxFrameNum
+ int i;
+
+ if (slice_nal_unit_header_.nal_unit_type == kAvcNalTypeSlice_IDR) {
+ prev_ref_frame_num_ = 0;
+ } else if ((p_slice_header->frame_num != prev_ref_frame_num_) && (p_slice_header->frame_num != ((prev_ref_frame_num_ + 1) % max_frame_num))) {
+ int unused_short_term_frame_num = (prev_ref_frame_num_ + 1) % max_frame_num;
+ while (unused_short_term_frame_num != p_slice_header->frame_num) {
+ AvcPicture non_existing_pic = {0};
+ non_existing_pic.frame_num = unused_short_term_frame_num;
+ non_existing_pic.is_reference = kUsedForShortTerm;
+ non_existing_pic.pic_output_flag = 0;
+
+ // Calculate POC
+ int max_pic_order_cnt_lsb = 1 << (p_sps->log2_max_pic_order_cnt_lsb_minus4 + 4); // MaxPicOrderCntLsb
+ int frame_num_offset; // FrameNumOffset
+ if (p_sps->pic_order_cnt_type == 0) {
+ // -1 is to try to avoid generating POC for the non-existing reference frames the same value as the existing ref.
+ // Need to implement a more robust way.
+ non_existing_pic.top_field_order_cnt = prev_top_field_order_cnt_ - 1;
+ non_existing_pic.bottom_field_order_cnt = non_existing_pic.top_field_order_cnt + p_slice_header->delta_pic_order_cnt_bottom - 1;
+
+ non_existing_pic.pic_order_cnt = non_existing_pic.top_field_order_cnt <= non_existing_pic.bottom_field_order_cnt ? non_existing_pic.top_field_order_cnt : non_existing_pic.bottom_field_order_cnt;
+ } else if (p_sps->pic_order_cnt_type == 1) {
+ int abs_frame_num; // absFrameNum
+ if (prev_has_mmco_5_) {
+ prev_frame_num_offset_ = 0;
+ prev_frame_num_ = 0;
+ }
+ if (prev_frame_num_ > p_slice_header->frame_num) {
+ frame_num_offset = prev_frame_num_offset_ + max_frame_num;
+ } else {
+ frame_num_offset = prev_frame_num_offset_;
+ }
+
+ if (p_sps->num_ref_frames_in_pic_order_cnt_cycle) {
+ abs_frame_num = frame_num_offset + p_slice_header->frame_num;
+ } else {
+ abs_frame_num = 0;
+ }
+ if ((!slice_nal_unit_header_.nal_ref_idc) && abs_frame_num > 0) {
+ abs_frame_num--;
+ }
+
+ int expected_delta_per_pic_order_cnt_cycle = 0; // ExpectedDeltaPerPicOrderCntCycle
+ if (p_sps->num_ref_frames_in_pic_order_cnt_cycle) {
+ for (int i = 0; i < p_sps->num_ref_frames_in_pic_order_cnt_cycle; i++) {
+ expected_delta_per_pic_order_cnt_cycle += p_sps->offset_for_ref_frame[i];
+ }
+ }
+
+ int expected_pic_order_cnt; // expectedPicOrderCnt
+ if( abs_frame_num > 0 ) {
+ int pic_order_cnt_cycle_cnt = (abs_frame_num - 1) / p_sps->num_ref_frames_in_pic_order_cnt_cycle; // picOrderCntCycleCnt
+ int frame_num_in_pic_order_cnt_cycle = (abs_frame_num - 1) % p_sps->num_ref_frames_in_pic_order_cnt_cycle; // frameNumInPicOrderCntCycle
+ expected_pic_order_cnt = pic_order_cnt_cycle_cnt * expected_delta_per_pic_order_cnt_cycle;
+ for (int i = 0; i <= frame_num_in_pic_order_cnt_cycle; i++) {
+ expected_pic_order_cnt += p_sps->offset_for_ref_frame[i];
+ }
+ } else {
+ expected_pic_order_cnt = 0;
+ }
+ if (!slice_nal_unit_header_.nal_ref_idc ) {
+ expected_pic_order_cnt += p_sps->offset_for_non_ref_pic;
+ }
+
+ if (!p_slice_header->field_pic_flag) {
+ non_existing_pic.top_field_order_cnt = expected_pic_order_cnt + p_slice_header->delta_pic_order_cnt[0];
+ non_existing_pic.bottom_field_order_cnt = non_existing_pic.top_field_order_cnt + p_sps->offset_for_top_to_bottom_field + p_slice_header->delta_pic_order_cnt[1];
+ } else if (p_slice_header->bottom_field_flag) {
+ non_existing_pic.bottom_field_order_cnt = expected_pic_order_cnt + p_sps->offset_for_top_to_bottom_field + p_slice_header->delta_pic_order_cnt[0];
+ } else {
+ non_existing_pic.top_field_order_cnt = expected_pic_order_cnt + p_slice_header->delta_pic_order_cnt[0];
+ }
+
+ prev_frame_num_ = p_slice_header->frame_num;
+ prev_frame_num_offset_ = frame_num_offset;
+ } else if (p_sps->pic_order_cnt_type == 2) {
+ if (prev_has_mmco_5_) {
+ prev_frame_num_offset_ = 0;
+ }
+ if (prev_frame_num_ > non_existing_pic.frame_num) {
+ frame_num_offset = prev_frame_num_offset_ + max_frame_num;
+ } else {
+ frame_num_offset = prev_frame_num_offset_;
+ }
+
+ int temp_pic_order_cnt; // tempPicOrderCnt
+ // nal_ref_idc inferred to be not equal to 0
+ temp_pic_order_cnt = 2 * (frame_num_offset + non_existing_pic.frame_num);
+ // field_pic_flag inferred to be equal to 0
+ non_existing_pic.top_field_order_cnt = temp_pic_order_cnt;
+ non_existing_pic.bottom_field_order_cnt = temp_pic_order_cnt;
+ non_existing_pic.pic_order_cnt = non_existing_pic.top_field_order_cnt <= non_existing_pic.bottom_field_order_cnt ? non_existing_pic.top_field_order_cnt : non_existing_pic.bottom_field_order_cnt;
+
+ prev_frame_num_ = non_existing_pic.frame_num;
+ prev_frame_num_offset_ = frame_num_offset;
+ }
+
+ // Decode picture numbers. 8.2.4.1
+ for (i = 0; i < dpb_buffer_.dpb_size; i++) {
+ AvcPicture *p_ref_pic = &dpb_buffer_.frame_buffer_list[i];
+ if (p_ref_pic->is_reference == kUsedForShortTerm) {
+ // Eq. 8-27
+ if (p_ref_pic->frame_num > non_existing_pic.frame_num) {
+ p_ref_pic->frame_num_wrap = p_ref_pic->frame_num - max_frame_num;
+ } else {
+ p_ref_pic->frame_num_wrap = p_ref_pic->frame_num;
+ }
+
+ if (non_existing_pic.pic_structure == kFrame) {
+ p_ref_pic->pic_num = p_ref_pic->frame_num_wrap; // Eq. 8-28
+ } else if (((non_existing_pic.pic_structure == kTopField) && (p_ref_pic->pic_structure == kTopField)) || ((non_existing_pic.pic_structure == kBottomField) && (p_ref_pic->pic_structure == kBottomField))) {
+ p_ref_pic->pic_num = 2 * p_ref_pic->frame_num_wrap + 1; // Eq. 8-30
+ } else {
+ p_ref_pic->pic_num = 2 * p_ref_pic->frame_num_wrap; // Eq. 8-31
+ }
+ } else if (p_ref_pic->is_reference == kUsedForLongTerm) {
+ if (non_existing_pic.pic_structure == kFrame) {
+ p_ref_pic->long_term_pic_num = p_ref_pic->long_term_frame_idx; // Eq. 8-29
+ } else if (((non_existing_pic.pic_structure == kTopField) && (p_ref_pic->pic_structure == kTopField)) || ((non_existing_pic.pic_structure == kBottomField) && (p_ref_pic->pic_structure == kBottomField))) {
+ p_ref_pic->long_term_pic_num = 2 * p_ref_pic->long_term_frame_idx + 1; // Eq. 8-32
+ } else {
+ p_ref_pic->long_term_pic_num = 2 * p_ref_pic->long_term_frame_idx; // Eq. 8-33
+ }
+ }
+ }
+
+ // Sliding window decoded reference picture marking. 8.2.5.3
+ if (dpb_buffer_.num_short_term + dpb_buffer_.num_long_term == p_sps->max_num_ref_frames) {
+ int32_t min_frame_num_wrap = 0xFFFFFF; // more than the largest possible value of FrameNumWrap (2 ^ 16)
+ int min_index = AVC_MAX_DPB_FRAMES;
+ for (i = 0; i < dpb_buffer_.dpb_size; i++) {
+ if (dpb_buffer_.frame_buffer_list[i].is_reference == kUsedForShortTerm) {
+ if (dpb_buffer_.frame_buffer_list[i].frame_num_wrap < min_frame_num_wrap) {
+ min_frame_num_wrap = dpb_buffer_.frame_buffer_list[i].frame_num_wrap;
+ min_index = i;
+ }
+ }
+ }
+ if (min_index < dpb_buffer_.dpb_size) {
+ dpb_buffer_.frame_buffer_list[min_index].is_reference = kUnusedForReference;
+ } else {
+ ERR("Could not find any short term ref picture.");
+ return PARSER_FAIL;
+ }
+ dpb_buffer_.num_short_term--;
+ }
+
+ // Insert the non-existing short-term reference picture to DPB
+ if (dpb_buffer_.dpb_fullness == dpb_buffer_.dpb_size) {
+ if (BumpPicFromDpb() != PARSER_OK) {
+ return PARSER_FAIL;
+ }
+ }
+
+ for (i = 0; i < dpb_buffer_.dpb_size; i++) {
+ if (dpb_buffer_.frame_buffer_list[i].use_status == 0) {
+ break;
+ }
+ }
+ if (i < dpb_buffer_.dpb_size) {
+ non_existing_pic.pic_idx = dpb_buffer_.frame_buffer_list[i].pic_idx;
+ non_existing_pic.use_status = 3;
+ dpb_buffer_.frame_buffer_list[i] = non_existing_pic;
+ dpb_buffer_.dpb_fullness++;
+ dpb_buffer_.num_short_term++;
+ } else {
+ ERR("Could not find any free frame buffer in DPB.");
+ return PARSER_FAIL;
+ }
+
+ // Update prev_ref_frame_num_ to value of frame_num for the last of the "non-existing" reference frames inferred
+ // by the decoding process for gaps in frame_num specified in clause 8.2.5.2.
+ prev_ref_frame_num_ = unused_short_term_frame_num;
+
+ unused_short_term_frame_num = (unused_short_term_frame_num + 1) % max_frame_num;
+ }
+ if (slice_nal_unit_header_.nal_ref_idc) {
+ prev_ref_frame_num_ = p_slice_header->frame_num;
+ }
+ } else {
+ if (slice_nal_unit_header_.nal_ref_idc) {
+ prev_ref_frame_num_ = p_slice_header->frame_num;
+ }
+ }
+
+ return PARSER_OK;
+}
+
// 8.2.4
static inline int ComparePicNumDesc(const void *p_pic_info_1, const void *p_pic_info_2) {
int pic_num_1 = ((AvcVideoParser::AvcPicture*)p_pic_info_1)->pic_num;
@@ -2276,6 +2484,7 @@ ParserResult AvcVideoParser::MarkDecodedRefPics() {
curr_pic_.frame_num = 0;
curr_pic_.pic_num = 0;
prev_frame_num_ = 0;
+ prev_ref_frame_num_ = 0;
if (p_slice_header->field_pic_flag) {
if (p_slice_header->bottom_field_flag) {
curr_pic_.bottom_field_order_cnt = 0;
@@ -2722,7 +2931,7 @@ void AvcVideoParser::PrintVappiBufInfo() {
MSG(std::dec);
MSG("Reference pictures:");
- for (int i = 0; i < AVC_MAX_DPB_FRAMES; i++) {
+ for (int i = 0; i < 16; i++) {
RocdecAvcPicture *p_ref_pic = &p_pic_param->ref_frames[i];
MSG_NO_NEWLINE("Ref pic " << i << ": " << "pic_idx = " << p_ref_pic->pic_idx << ", frame_idx = " << p_ref_pic->frame_idx << ", top_field_order_cnt = " << p_ref_pic->top_field_order_cnt << ", bottom_field_order_cnt = " << p_ref_pic->bottom_field_order_cnt);
MSG(", flags = 0x" << std::hex << p_ref_pic->flags);
diff --git a/projects/rocdecode/src/parser/avc_parser.h b/projects/rocdecode/src/parser/avc_parser.h
index 1978665fd1..f7007d7bae 100644
--- a/projects/rocdecode/src/parser/avc_parser.h
+++ b/projects/rocdecode/src/parser/avc_parser.h
@@ -119,6 +119,7 @@ protected:
int prev_top_field_order_cnt_;
int prev_frame_num_offset_; // prevFrameNumOffset
int prev_frame_num_; // prevFrameNum
+ int prev_ref_frame_num_; // PrevRefFrameNum
int prev_has_mmco_5_;
int curr_has_mmco_5_;
int prev_ref_pic_bottom_field_;
@@ -217,6 +218,11 @@ protected:
*/
void CalculateCurrPoc();
+ /*! \brief Function to check and decode gaps in frame_num. 8.2.5.2.
+ * \return ParserResult
+ */
+ ParserResult DecodeFrameNumGaps();
+
/*! \brief Function to set up the reference picutre lists for each slice. 8.2.4.
* \param [in] p_slice_info Poiner to slice info struct
* \return ParserResult