Jj/hevc decode wip (#18)

* rocDecode/HEVC parsing: Implemented correct logic to set active VPS, SPS and PPS: start from slice header and up. Added video image size change detection. Fixed slice_segment_address bits calculation in slice segment header parsing.

* rocDecode/HEVC: Changed variable naming to Google style.

[ROCm/rocdecode commit: 94ff35e03c]
Tento commit je obsažen v:
jeffqjiangNew
2023-10-19 14:07:36 -04:00
odevzdal GitHub
rodič 97c97bcf39
revize b51ce09f9b
4 změnil soubory, kde provedl 72 přidání a 49 odebrání
+56 -43
Zobrazit soubor
@@ -23,6 +23,9 @@ THE SOFTWARE.
#include "hevc_parser.h"
HEVCVideoParser::HEVCVideoParser() {
m_active_vps_id_ = -1;
m_active_sps_id_ = -1;
m_active_pps_id_ = -1;
b_new_picture_ = false;
m_vps_ = NULL;
m_sps_ = NULL;
@@ -131,9 +134,6 @@ HEVCVideoParser::SliceHeaderData* HEVCVideoParser::AllocSliceHeader() {
}
ParserResult HEVCVideoParser::Init() {
m_active_vps_ = 0;
m_active_sps_ = 0;
m_active_pps_ = 0;
b_new_picture_ = false;
m_vps_ = AllocVps();
m_sps_ = AllocSps();
@@ -204,7 +204,7 @@ bool HEVCVideoParser::ParseFrameData(const uint8_t* p_stream, uint32_t frame_dat
case NAL_UNIT_CODED_SLICE_BLA_N_LP:
case NAL_UNIT_CODED_SLICE_IDR_W_RADL:
case NAL_UNIT_CODED_SLICE_IDR_N_LP:
case NAL_UNIT_CODED_SLICE_CRA:
case NAL_UNIT_CODED_SLICE_CRA_NUT:
case NAL_UNIT_CODED_SLICE_RADL_N:
case NAL_UNIT_CODED_SLICE_RADL_R:
case NAL_UNIT_CODED_SLICE_RASL_N:
@@ -288,6 +288,7 @@ void HEVCVideoParser::ParsePtl(H265ProfileTierLevel *ptl, bool profile_present_f
ptl->general_frame_only_constraint_flag = Parser::GetBit(nalu, offset);
//ReadBits is limited to 32
offset += 44;
// Todo: add constrant flags parsing.
}
ptl->general_level_idc = Parser::ReadBits(nalu, offset, 8);
@@ -657,7 +658,7 @@ void HEVCVideoParser::ParseVps(uint8_t *nalu, size_t size) {
void HEVCVideoParser::ParseSps(uint8_t *nalu, size_t size) {
size_t offset = 0;
m_active_vps_ = Parser::ReadBits(nalu, offset, 4);
uint32_t vps_id = Parser::ReadBits(nalu, offset, 4);
uint32_t max_sub_layer_minus1 = Parser::ReadBits(nalu, offset, 3);
uint32_t sps_temporal_id_nesting_flag = Parser::GetBit(nalu, offset);
H265ProfileTierLevel ptl;
@@ -665,7 +666,7 @@ void HEVCVideoParser::ParseSps(uint8_t *nalu, size_t size) {
ParsePtl(&ptl, true, max_sub_layer_minus1, nalu, size, offset);
uint32_t sps_id = Parser::ExpGolomb::ReadUe(nalu, offset);
memset(&m_sps_[sps_id], 0, sizeof(m_sps_[sps_id]));
m_sps_[sps_id].sps_video_parameter_set_id = m_active_vps_;
m_sps_[sps_id].sps_video_parameter_set_id = vps_id;
m_sps_[sps_id].sps_max_sub_layers_minus1 = max_sub_layer_minus1;
m_sps_[sps_id].sps_temporal_id_nesting_flag = sps_temporal_id_nesting_flag;
memcpy (&m_sps_[sps_id].profile_tier_level, &ptl, sizeof(ptl));
@@ -769,9 +770,7 @@ void HEVCVideoParser::ParsePps(uint8_t *nalu, size_t size) {
memset(&m_pps_[pps_id], 0, sizeof(m_pps_[pps_id]));
m_pps_[pps_id].pps_pic_parameter_set_id = pps_id;
m_active_sps_ = Parser::ExpGolomb::ReadUe(nalu, offset);
m_pps_[pps_id].pps_seq_parameter_set_id = m_active_sps_;
m_pps_[pps_id].pps_seq_parameter_set_id = Parser::ExpGolomb::ReadUe(nalu, offset);
m_pps_[pps_id].dependent_slice_segments_enabled_flag = Parser::GetBit(nalu, offset);
m_pps_[pps_id].output_flag_present_flag = Parser::GetBit(nalu, offset);
m_pps_[pps_id].num_extra_slice_header_bits = Parser::ReadBits(nalu, offset, 3);
@@ -833,44 +832,58 @@ void HEVCVideoParser::ParsePps(uint8_t *nalu, size_t size) {
}
bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, size_t size) {
PpsData *pps_ptr = NULL;
SpsData *sps_ptr = NULL;
size_t offset = 0;
SliceHeaderData temp_sh;
memset(&temp_sh, 0, sizeof(temp_sh));
temp_sh.first_slice_segment_in_pic_flag = m_sh_->first_slice_segment_in_pic_flag = Parser::GetBit(nalu, offset);
if (nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL
|| nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_N_LP
|| nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_N_LP
|| nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_W_RADL
|| nal_unit_type == NAL_UNIT_CODED_SLICE_BLA_W_LP
|| nal_unit_type == NAL_UNIT_CODED_SLICE_CRA
) {
if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type <= NAL_UNIT_RESERVED_IRAP_VCL23) {
temp_sh.no_output_of_prior_pics_flag = m_sh_->no_output_of_prior_pics_flag = Parser::GetBit(nalu, offset);
}
m_active_pps_ = Parser::ExpGolomb::ReadUe(nalu, offset);
temp_sh.slice_pic_parameter_set_id = m_sh_->slice_pic_parameter_set_id = m_active_pps_;
// Set active VPS, SPS and PPS for the current slice
m_active_pps_id_ = Parser::ExpGolomb::ReadUe(nalu, offset);
temp_sh.slice_pic_parameter_set_id = m_sh_->slice_pic_parameter_set_id = m_active_pps_id_;
pps_ptr = &m_pps_[m_active_pps_id_];
m_active_sps_id_ = pps_ptr->pps_seq_parameter_set_id;
sps_ptr = &m_sps_[m_active_sps_id_];
m_active_vps_id_ = sps_ptr->sps_video_parameter_set_id;
// Check video dimension change
if ( pic_width_ != sps_ptr->pic_width_in_luma_samples || pic_height_ != sps_ptr->pic_height_in_luma_samples)
{
pic_width_ = sps_ptr->pic_width_in_luma_samples;
pic_height_ = sps_ptr->pic_height_in_luma_samples;
pic_dimension_changed_ = true; // Note: clear this flag after the actions with size change are taken.
}
if (!m_sh_->first_slice_segment_in_pic_flag) {
if (m_pps_[m_active_pps_].dependent_slice_segments_enabled_flag) {
if (pps_ptr->dependent_slice_segments_enabled_flag) {
temp_sh.dependent_slice_segment_flag = m_sh_->dependent_slice_segment_flag = Parser::GetBit(nalu, offset);
}
int num_ctus = 0;
int max_parts = (1 << (m_sps_[m_active_sps_].max_cu_depth << 1));
int bits_slice_segment_address = 0;
while(num_ctus > (1 << bits_slice_segment_address)) {
bits_slice_segment_address++;
}
temp_sh.slice_segment_address = m_sh_->slice_segment_address = Parser::ReadBits(nalu, offset, bits_slice_segment_address);
int min_cb_log2_size_y = sps_ptr->log2_min_luma_coding_block_size_minus3 + 3; // MinCbLog2SizeY
int ctb_log2_size_y = min_cb_log2_size_y + sps_ptr->log2_diff_max_min_luma_coding_block_size; // CtbLog2SizeY
int ctb_size_y = 1 << ctb_log2_size_y; // CtbSizeY
int pic_width_in_ctbs_y = (sps_ptr->pic_width_in_luma_samples + ctb_size_y - 1) / ctb_size_y; // PicWidthInCtbsY
int pic_height_in_ctbs_y = (sps_ptr->pic_height_in_luma_samples + ctb_size_y - 1) / ctb_size_y; // PicHeightInCtbsY
int pic_size_in_ctbs_y = pic_width_in_ctbs_y * pic_height_in_ctbs_y; // PicSizeInCtbsY
int bits_slice_segment_address = (int)ceilf(log2f((float)pic_size_in_ctbs_y));
temp_sh.slice_segment_address = m_sh_->slice_segment_address = Parser::ReadBits(nalu, offset, bits_slice_segment_address);
}
if (!m_sh_->dependent_slice_segment_flag) {
for (int i = 0; i < m_pps_[m_active_pps_].num_extra_slice_header_bits; i++) {
for (int i = 0; i < pps_ptr->num_extra_slice_header_bits; i++) {
m_sh_->slice_reserved_flag[i] = Parser::GetBit(nalu, offset);
}
m_sh_->slice_type = Parser::ExpGolomb::ReadUe(nalu, offset);
if (m_pps_[m_active_pps_].output_flag_present_flag) {
if (pps_ptr->output_flag_present_flag) {
m_sh_->pic_output_flag = Parser::GetBit(nalu, offset);
}
if (m_sps_[m_active_sps_].separate_colour_plane_flag) {
if (sps_ptr->separate_colour_plane_flag) {
m_sh_->colour_plane_id = Parser::ReadBits(nalu, offset, 2);
}
if (nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_W_RADL || nal_unit_type == NAL_UNIT_CODED_SLICE_IDR_N_LP) {
@@ -883,13 +896,13 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si
}
else {
//length of slice_pic_order_cnt_lsb is log2_max_pic_order_cnt_lsb_minus4 + 4 bits.
m_sh_->slice_pic_order_cnt_lsb = Parser::ReadBits(nalu, offset, (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4));
m_sh_->slice_pic_order_cnt_lsb = Parser::ReadBits(nalu, offset, (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4));
//get POC
m_slice_->curr_poc_lsb = m_sh_->slice_pic_order_cnt_lsb;
m_slice_->max_poc_lsb = 1 << (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4);
m_slice_->max_poc_lsb = 1 << (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4);
if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type < NAL_UNIT_CODED_SLICE_CRA) {
if (nal_unit_type >= NAL_UNIT_CODED_SLICE_BLA_W_LP && nal_unit_type < NAL_UNIT_CODED_SLICE_CRA_NUT) {
m_slice_->curr_poc_msb = 0;
}
else {
@@ -909,11 +922,11 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si
m_sh_->short_term_ref_pic_set_sps_flag = Parser::GetBit(nalu, offset);
int32_t pos = offset;
if (!m_sh_->short_term_ref_pic_set_sps_flag) {
ParseShortTermRefPicSet(&m_sh_->st_rps, m_sps_[m_active_sps_].num_short_term_ref_pic_sets, m_sps_[m_active_sps_].num_short_term_ref_pic_sets, m_sps_[m_active_sps_].st_rps, nalu, size, offset);
ParseShortTermRefPicSet(&m_sh_->st_rps, sps_ptr->num_short_term_ref_pic_sets, sps_ptr->num_short_term_ref_pic_sets, sps_ptr->st_rps, nalu, size, offset);
}
else if (m_sps_[m_active_sps_].num_short_term_ref_pic_sets > 1) {
else if (sps_ptr->num_short_term_ref_pic_sets > 1) {
int num_bits = 0;
while ((1 << num_bits) < m_sps_[m_active_sps_].num_short_term_ref_pic_sets) {
while ((1 << num_bits) < sps_ptr->num_short_term_ref_pic_sets) {
num_bits++;
}
if (num_bits > 0) {
@@ -922,29 +935,29 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si
}
m_sh_->short_term_ref_pic_set_size = offset - pos;
if (m_sps_[m_active_sps_].long_term_ref_pics_present_flag) {
if (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > 0) {
if (sps_ptr->long_term_ref_pics_present_flag) {
if (sps_ptr->num_long_term_ref_pics_sps > 0) {
m_sh_->num_long_term_sps = Parser::ExpGolomb::ReadUe(nalu, offset);
}
m_sh_->num_long_term_pics = Parser::ExpGolomb::ReadUe(nalu, offset);
int bits_for_ltrp_in_sps = 0;
while (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > (1 << bits_for_ltrp_in_sps)) {
while (sps_ptr->num_long_term_ref_pics_sps > (1 << bits_for_ltrp_in_sps)) {
bits_for_ltrp_in_sps++;
}
m_sh_->lt_rps.num_of_pics = m_sh_->num_long_term_sps + m_sh_->num_long_term_pics;
for (int i = 0; i < (m_sh_->num_long_term_sps + m_sh_->num_long_term_pics); i++) {
if (i < m_sh_->num_long_term_sps) {
if (m_sps_[m_active_sps_].num_long_term_ref_pics_sps > 1) {
if (sps_ptr->num_long_term_ref_pics_sps > 1) {
if( bits_for_ltrp_in_sps > 0) {
m_sh_->lt_idx_sps[i] = Parser::ReadBits(nalu, offset, bits_for_ltrp_in_sps);
m_sh_->lt_rps.pocs[i] = m_sps_[m_active_sps_].lt_rps.pocs[m_sh_->lt_idx_sps[i]];
m_sh_->lt_rps.used_by_curr_pic[i] = m_sps_[m_active_sps_].lt_rps.used_by_curr_pic[m_sh_->lt_idx_sps[i]];
m_sh_->lt_rps.pocs[i] = sps_ptr->lt_rps.pocs[m_sh_->lt_idx_sps[i]];
m_sh_->lt_rps.used_by_curr_pic[i] = sps_ptr->lt_rps.used_by_curr_pic[m_sh_->lt_idx_sps[i]];
}
}
}
else {
m_sh_->poc_lsb_lt[i] = Parser::ReadBits(nalu, offset, (m_sps_[m_active_sps_].log2_max_pic_order_cnt_lsb_minus4 + 4));
m_sh_->poc_lsb_lt[i] = Parser::ReadBits(nalu, offset, (sps_ptr->log2_max_pic_order_cnt_lsb_minus4 + 4));
m_sh_->used_by_curr_pic_lt_flag[i] = Parser::GetBit(nalu, offset);
m_sh_->lt_rps.pocs[i] = m_sh_->poc_lsb_lt[i];
m_sh_->lt_rps.used_by_curr_pic[i] = m_sh_->used_by_curr_pic_lt_flag[i];
@@ -955,7 +968,7 @@ bool HEVCVideoParser::ParseSliceHeader(uint32_t nal_unit_type, uint8_t *nalu, si
}
}
}
if (m_sps_[m_active_sps_].sps_temporal_mvp_enabled_flag) {
if (sps_ptr->sps_temporal_mvp_enabled_flag) {
m_sh_->slice_temporal_mvp_enabled_flag = Parser::GetBit(nalu, offset);
}
}