From 190f2d9d33110e1ed9cd8cfdd871675ad02bb885 Mon Sep 17 00:00:00 2001 From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:04:18 -0400 Subject: [PATCH] * rocDecode/AV1: Added stream syntax defines and sequence header parsing. (#306) [ROCm/rocdecode commit: e96b782dcf32a5501a6d22c00eb3462a921595fa] --- projects/rocdecode/src/parser/av1_defines.h | 439 ++++++++++++++++++- projects/rocdecode/src/parser/av1_parser.cpp | 223 +++++++++- projects/rocdecode/src/parser/av1_parser.h | 128 ++++++ 3 files changed, 788 insertions(+), 2 deletions(-) diff --git a/projects/rocdecode/src/parser/av1_defines.h b/projects/rocdecode/src/parser/av1_defines.h index 5e86733d05..bbdbbcdfc3 100644 --- a/projects/rocdecode/src/parser/av1_defines.h +++ b/projects/rocdecode/src/parser/av1_defines.h @@ -24,4 +24,441 @@ THE SOFTWARE. #include -//todo: add the defines \ No newline at end of file +#define OPERATING_POINTS_CNT_MAX 32 + +#define SELECT_SCREEN_CONTENT_TOOLS 2 +#define SELECT_INTEGER_MV 2 + +#define CP_BT_709 1 +#define CP_UNSPECIFIED 2 + +#define TC_SRGB 13 +#define TC_UNSPECIFIED 2 + +#define MC_IDENTITY 0 +#define MC_UNSPECIFIED 2 + +#define CSP_UNKNOWN 0 + +#define NUM_REF_FRAMES 8 +#define PRIMARY_REF_NONE 7 + +#define REFS_PER_FRAME 7 // Number of reference frames that can be used for inter prediction +#define TOTAL_REFS_PER_FRAME 8 // Number of reference frame types (including intra type) + +#define MAX_TILE_WIDTH 4096 // Maximum width of a tile in units of luma samples +#define MAX_TILE_AREA 4096 * 2304 // Maximum area of a tile in units of luma samples +#define MAX_TILE_ROWS 64 // Maximum number of tile rows +#define MAX_TILE_COLS 64 // Maximum number of tile columns + +#define SUPERRES_NUM 8 // Numerator for upscaling ratio +#define SUPERRES_DENOM_MIN 9 // Smallest denominator for upscaling ratio +#define SUPERRES_DENOM_BITS 3 // Number of bits sent to specify denominator of upscaling ratio + +#define MAX_SEGMENTS 8 // Number of segments allowed in segmentation map +#define SEG_LVL_REF_FRAME 5 // Index for reference frame segment feature +#define SEG_LVL_MAX 8 // Number of segment features + +#define MAX_LOOP_FILTER 63 // Maximum value used for loop filtering +#define RESTORATION_TILESIZE_MAX 256 // Maximum size of a loop restoration tile + +#define WARPEDMODEL_PREC_BITS 16 // Internal precision of warped motion models +#define GM_ABS_TRANS_BITS 12 // Number of bits encoded for translational components of global motion models, if part of a ROTZOOM or AFFINE model +#define GM_ABS_TRANS_ONLY_BITS 9 // Number of bits encoded for translational components of global motion models, if part of a TRANSLATION model +#define GM_ABS_ALPHA_BITS 12 // Number of bits encoded for non-translational components of global motion models +#define GM_ALPHA_PREC_BITS 15 // Number of fractional bits for sending non- translational warp model coefficients +#define GM_TRANS_PREC_BITS 6 // Number of fractional bits for sending translational warp model coefficients +#define GM_TRANS_ONLY_PREC_BITS 3 // Number of fractional bits used for pure translational warps + +typedef enum { + kObuSequenceHeader = 1, + kObuTemporalDelimiter = 2, + kObuFrameHeader = 3, + kObuTileGroup = 4, + kObuMetaData = 5, + kObuFrame = 6, + kObuRedundantFrameHeader = 7, + kObuTileList = 8, + kObuPadding = 15, +} ObuType; + +typedef enum { + kKeyFrame = 0, + kInterFrame = 1, + kIntraOnlyFrame = 2, + kSwitchFrame = 3, +} FrameType; + +typedef enum { + kEightTap = 0, + kEightTapSmooth = 1, + kEightTapSharp = 2, + kBilinear = 3, + kSwitchable = 4, +} InterpolotionFilterType; + +typedef enum { + kNone = -1, + kIntraFrame = 0, + kLastFrame = 1, + kLast2Frame = 2, + kLast3Frame = 3, + kGoldenFrame = 4, + kBwdRefFrame = 5, + kAltRef2Frame = 6, + kAltRefFrame = 7, +} RefFrame; + +typedef enum { + kRestoreNone = 0, + kRestoreSwitchable = 3, + kRestoreWiener = 1, + kRestoreSgrproj = 2, +} FrameRestorationType; + +typedef enum { + kOnly4x4 = 0, + kTxModeLargest = 1, + kTxModeSelect = 2, +} TX_Mode; + +typedef enum { + kIdentity = 0, // Warp model is just an identity transform + kTranslation = 1, // Warp model is a pure translation + kRotZoom = 2, // Warp model is a rotation + symmetric zoom + translation + kAffine = 3, // Warp model is a general affine transform +} WarpModel; + +typedef struct { + uint32_t size; + uint32_t obu_forbidden_bit; + uint32_t obu_type; + uint32_t obu_extension_flag; + uint32_t obu_has_size_field; + uint32_t obu_reserved_1bit; + uint32_t temporal_id; + uint32_t spatial_id; + uint32_t extension_header_reserved_3bits; +} Av1ObuHeader; + +typedef struct { + uint32_t num_units_in_display_tick; + uint32_t time_scale; + uint32_t equal_picture_interval; + uint32_t num_ticks_per_picture_minus_1; +} Av1TimingInfo; + +typedef struct { + uint32_t buffer_delay_length_minus_1; + uint32_t num_units_in_decoding_tick; + uint32_t buffer_removal_time_length_minus_1; + uint32_t frame_presentation_time_length_minus_1; +} Av1DecoderModelInfo; + +typedef struct { + uint32_t decoder_buffer_delay; + uint32_t encoder_buffer_delay; + uint32_t low_delay_mode_flag; +} Av1OperatingParametersInfo; + +typedef struct { + uint32_t high_bitdepth; + uint32_t twelve_bit; + uint32_t bit_depth; // BitDepth + uint32_t mono_chrome; + uint32_t num_planes; // NumPlanes + uint32_t color_description_present_flag; + uint32_t color_primaries; + uint32_t transfer_characteristics; + uint32_t matrix_coefficients; + uint32_t color_range; + uint32_t subsampling_x; + uint32_t subsampling_y; + uint32_t chroma_sample_position; + uint32_t separate_uv_delta_q; +} Av1ColorConfig; + +typedef struct { + uint32_t seq_profile; + uint32_t still_picture; + uint32_t reduced_still_picture_header; + uint32_t timing_info_present_flag; + Av1TimingInfo timing_info; + uint32_t decoder_model_info_present_flag; + Av1DecoderModelInfo decoder_model_info; + uint32_t initial_display_delay_present_flag; + uint32_t operating_points_cnt_minus_1; + uint32_t operating_point_idc[OPERATING_POINTS_CNT_MAX]; + uint32_t seq_level_idx[OPERATING_POINTS_CNT_MAX]; + uint32_t seq_tier[OPERATING_POINTS_CNT_MAX]; + uint32_t decoder_model_present_for_this_op[OPERATING_POINTS_CNT_MAX]; + Av1OperatingParametersInfo operating_parameters_info[OPERATING_POINTS_CNT_MAX]; + uint32_t initial_display_delay_present_for_this_op[OPERATING_POINTS_CNT_MAX]; + uint32_t initial_display_delay_minus_1[OPERATING_POINTS_CNT_MAX]; + uint32_t frame_width_bits_minus_1; + uint32_t frame_height_bits_minus_1; + uint32_t max_frame_width_minus_1; + uint32_t max_frame_height_minus_1; + uint32_t frame_id_numbers_present_flag; + uint32_t delta_frame_id_length_minus_2; + uint32_t additional_frame_id_length_minus_1; + uint32_t use_128x128_superblock; + uint32_t enable_filter_intra; + uint32_t enable_intra_edge_filter; + uint32_t enable_interintra_compound; + uint32_t enable_masked_compound; + uint32_t enable_warped_motion; + uint32_t enable_dual_filter; + uint32_t enable_order_hint; + uint32_t enable_jnt_comp; + uint32_t enable_ref_frame_mvs; + uint32_t seq_choose_screen_content_tools; + uint32_t seq_force_screen_content_tools; + uint32_t seq_choose_integer_mv; + uint32_t seq_force_integer_mv; + uint32_t order_hint_bits_minus_1; + uint32_t order_hint_bits; // OrderHintBits + uint32_t enable_superres; + uint32_t enable_cdef; + uint32_t enable_restoration; + Av1ColorConfig color_config; + uint32_t film_grain_params_present; +} Av1SequenceHeader; + +typedef struct { + uint32_t frame_presentation_time; +} Av1TemporalPointInfo; + +typedef struct { + uint32_t use_superres; + uint32_t coded_denom; +} Av1SuperResParams; + +typedef struct { + uint32_t frame_width_minus_1; + uint32_t frame_height_minus_1; + Av1SuperResParams superres_params; + uint32_t mi_cols; + uint32_t mi_rows; +} Av1FrameSize; + +typedef struct { + uint32_t render_and_frame_size_different; + uint32_t render_width_minus_1; + uint32_t render_height_minus_1; +} Av1RenderSize; + +typedef struct { + uint32_t uniform_tile_spacing_flag; + int32_t tile_cols_log2; + int32_t tile_rows_log2; + uint32_t increment_tile_cols_log2; + uint32_t increment_tile_rows_log2; + int32_t mi_col_starts[MAX_TILE_COLS + 1]; + int32_t mi_row_starts[MAX_TILE_ROWS + 1]; + int32_t tile_cols; + int32_t tile_rows; + uint32_t width_in_sbs_minus_1; + uint32_t height_in_sbs_minus_1; + uint32_t context_update_tile_id; + uint32_t tile_size_bytes_minus_1; +} Av1TileInfoSyntx; + +typedef struct { + uint32_t base_q_idx; + uint32_t delta_coded; + uint32_t delta_q; + uint32_t delta_q_y_dc; + uint32_t diff_uv_delta; + uint32_t delta_q_u_dc; + uint32_t delta_q_u_ac; + uint32_t delta_q_v_dc; + uint32_t delta_q_v_ac; + uint32_t using_qmatrix; + uint32_t qm_y; + uint32_t qm_u; + uint32_t qm_v; +} Av1QuantizationParams; + +typedef struct { + uint32_t segmentation_enabled; + uint32_t segmentation_update_map; + uint32_t segmentation_temporal_update; + uint32_t segmentation_update_data; + uint32_t feature_enabled; + uint32_t feature_enabled_flags[MAX_SEGMENTS][SEG_LVL_MAX]; + uint32_t feature_value; + int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; + uint32_t seg_id_pre_skip; + uint32_t last_active_seg_id; +} Av1SegmentationParams; + +typedef struct { + uint32_t delta_q_present; + uint32_t delta_q_res; +} Av1DeltaQParams; + +typedef struct { + uint32_t delta_lf_present; + uint32_t delta_lf_res; + uint32_t delta_lf_multi; +} Av1DeltaLFParams; + +typedef struct { + uint32_t loop_filter_level[4]; + uint32_t loop_filter_sharpness; + uint32_t loop_filter_delta_enabled; + uint32_t loop_filter_delta_update; + uint32_t update_ref_delta; + uint32_t loop_filter_ref_deltas[TOTAL_REFS_PER_FRAME]; + uint32_t update_mode_delta; + uint32_t loop_filter_mode_deltas[2]; +} Av1LoopFilterParams; + +typedef struct { + uint32_t cdef_damping_minus_3; + uint32_t cdef_bits; + uint32_t cdef_y_pri_strength[8]; + uint32_t cdef_y_sec_strength[8]; + uint32_t cdef_uv_pri_strength[8]; + uint32_t cdef_uv_sec_strength[8]; + uint32_t cdef_damping; +} Av1CdefParams; + +typedef struct { + uint32_t frame_restoration_type[3]; + uint32_t uses_lr; + uint32_t lr_type; + uint32_t lr_unit_shift; + uint32_t lr_unit_extra_shift; + uint32_t loop_restoration_size[3]; + uint32_t lr_uv_shift; +} Av1LRParams; + +typedef struct { + uint32_t tx_mode_select; + uint32_t tx_mode; +} Av1TxMode; + +typedef struct { + uint32_t reference_select; +} Av1FrameReferenceMode; + +typedef struct { + uint32_t skip_mode_frame[2]; + uint32_t skip_mode_present; +} Av1SkipModeParams; + +typedef struct { + uint32_t gm_type[NUM_REF_FRAMES]; + uint32_t gm_params[NUM_REF_FRAMES][6]; + uint32_t prev_gm_params[NUM_REF_FRAMES][6]; + uint32_t is_global; + uint32_t is_rot_zoom; + uint32_t is_translation; +} Av1GlobalMotionParams; + +typedef struct { + uint32_t apply_grain; + uint32_t grain_seed; + uint32_t update_grain; + uint32_t film_grain_params_ref_idx; + uint32_t num_y_points; + uint32_t point_y_value[14]; + uint32_t point_y_scaling[14]; + uint32_t chroma_scaling_from_luma; + uint32_t num_cb_points; + uint32_t num_cr_points; + uint32_t point_cb_value[10]; + uint32_t point_cb_scaling[10]; + uint32_t point_cr_value[10]; + uint32_t point_cr_scaling[10]; + uint32_t grain_scaling_minus_8; + uint32_t ar_coeff_lag; + uint32_t ar_coeffs_y_plus_128[24]; + uint32_t ar_coeffs_cb_plus_128[25]; + uint32_t ar_coeffs_cr_plus_128[25]; + uint32_t ar_coeff_shift_minus_6; + uint32_t grain_scale_shift; + uint32_t cb_mult; + uint32_t cb_luma_mult; + uint32_t cb_offset; + uint32_t cr_mult; + uint32_t cr_luma_mult; + uint32_t cr_offset; + uint32_t overlap_flag; + uint32_t clip_to_restricted_range; +} Av1FilmGrainParams; + +typedef struct { + uint32_t show_existing_frame; + uint32_t frame_to_show_map_idx; + Av1TemporalPointInfo temporal_point_info; + uint32_t display_frame_id; + uint32_t frame_type; + uint32_t frame_isIntra; + uint32_t show_frame; + uint32_t showable_frame; + uint32_t error_resilient_mode; + uint32_t disable_cdf_update; + uint32_t allow_screen_content_tools; + uint32_t force_integer_mv; + uint32_t current_frame_id; + uint32_t prev_frame_id; + uint32_t frame_size_override_flag; + uint32_t order_hint; + uint32_t order_hints[NUM_REF_FRAMES]; + uint32_t primary_ref_frame; + uint32_t buffer_removal_time_present_flag; + uint32_t buffer_removal_time[OPERATING_POINTS_CNT_MAX]; + uint32_t refresh_frame_flags; + uint32_t ref_order_hint[NUM_REF_FRAMES]; + uint32_t ref_frame_sign_bias[NUM_REF_FRAMES]; + uint32_t found_ref; + Av1FrameSize frame_size; + Av1RenderSize render_size; + uint32_t allow_intrabc; + uint32_t frame_refs_short_signaling; + uint32_t last_frame_idx; + uint32_t gold_frame_idx; + int32_t ref_frame_idx[REFS_PER_FRAME]; + uint32_t delta_frame_id_minus_1; + uint32_t expectedFrameId[REFS_PER_FRAME]; + uint32_t allow_high_precision_mv; + uint32_t is_filter_switchable; + uint32_t interpolation_filter; + uint32_t is_motion_mode_switchable; + uint32_t use_ref_frame_mvs; + uint32_t disable_frame_end_update_cdf; + Av1TileInfoSyntx tile_info; + Av1QuantizationParams quantization_params; + Av1SegmentationParams segmentation_params; + Av1DeltaQParams delta_q_params; + Av1DeltaLFParams delta_lf_params; + uint32_t coded_lossless; + uint32_t lossless_array[MAX_SEGMENTS]; + uint32_t seg_qm__level[3][MAX_SEGMENTS]; + uint32_t all_lossless; + Av1LoopFilterParams loop_filter_params; + Av1CdefParams cdef_params; + Av1LRParams lr_params; + Av1TxMode tx_mode; + Av1FrameReferenceMode frame_reference_mode; + Av1SkipModeParams skip_mode_params; + uint32_t allow_warped_motion; + uint32_t reduced_tx_set; + Av1GlobalMotionParams global_motion_params; + Av1FilmGrainParams film_grain_params; +} Av1FrameHeader; + +typedef struct { + uint32_t offset; + uint32_t size; +} Av1TileDataInfo; + +typedef struct { + uint32_t buffer_id; // buffer ID in the bitstream buffer pool. + uint8_t *buffer_ptr; // pointer of the tile group data buffer. + uint32_t buffer_size; // total size of the data buffer, may include the header bytes. + Av1TileDataInfo tile_data_info[MAX_TILE_ROWS][MAX_TILE_COLS]; +} Av1TileGroupDataInfo; \ No newline at end of file diff --git a/projects/rocdecode/src/parser/av1_parser.cpp b/projects/rocdecode/src/parser/av1_parser.cpp index ecb72f6002..538b831d83 100644 --- a/projects/rocdecode/src/parser/av1_parser.cpp +++ b/projects/rocdecode/src/parser/av1_parser.cpp @@ -41,4 +41,225 @@ rocDecStatus Av1VideoParser::UnInitialize() { rocDecStatus Av1VideoParser::ParseVideoData(RocdecSourceDataPacket *p_data) { //to be implemented return ROCDEC_NOT_IMPLEMENTED; -} \ No newline at end of file +} + +void Av1VideoParser::ParseSequenceHeader(uint8_t *p_stream, size_t size) { + Av1SequenceHeader *p_seq_header = &seq_header_; + size_t offset = 0; // current bit offset + + memset(p_seq_header, 0, sizeof(Av1SequenceHeader)); + + p_seq_header->seq_profile = Parser::ReadBits(p_stream, offset, 3); + p_seq_header->still_picture = Parser::GetBit(p_stream, offset); + p_seq_header->reduced_still_picture_header = Parser::GetBit(p_stream, offset); + + if (p_seq_header->reduced_still_picture_header) { + p_seq_header->timing_info_present_flag = 0; + p_seq_header->decoder_model_info_present_flag = 0; + p_seq_header->initial_display_delay_present_flag = 0; + p_seq_header->operating_points_cnt_minus_1 = 0; + p_seq_header->operating_point_idc[0] = 0; + p_seq_header->seq_level_idx[0] = Parser::ReadBits(p_stream, offset, 5); + p_seq_header->seq_tier[0] = 0; + p_seq_header->decoder_model_present_for_this_op[0] = 0; + p_seq_header->initial_display_delay_present_for_this_op[0] = 0; + } else { + p_seq_header->timing_info_present_flag = Parser::GetBit(p_stream, offset); + if (p_seq_header->timing_info_present_flag) { + // timing_info() + p_seq_header->timing_info.num_units_in_display_tick = Parser::ReadBits(p_stream, offset, 32); + p_seq_header->timing_info.time_scale = Parser::ReadBits(p_stream, offset, 32); + p_seq_header->timing_info.equal_picture_interval = Parser::GetBit(p_stream, offset); + if (p_seq_header->timing_info.equal_picture_interval) { + p_seq_header->timing_info.num_ticks_per_picture_minus_1 = ReadUVLC(p_stream, offset); + } + + p_seq_header->decoder_model_info_present_flag = Parser::GetBit(p_stream, offset); + if (p_seq_header->decoder_model_info_present_flag) { + p_seq_header->decoder_model_info.buffer_delay_length_minus_1 = Parser::ReadBits(p_stream, offset, 5); + p_seq_header->decoder_model_info.num_units_in_decoding_tick = Parser::ReadBits(p_stream, offset, 32); + p_seq_header->decoder_model_info.buffer_removal_time_length_minus_1 = Parser::ReadBits(p_stream, offset, 5); + p_seq_header->decoder_model_info.frame_presentation_time_length_minus_1 = Parser::ReadBits(p_stream, offset, 5); + } + } else { + p_seq_header->decoder_model_info_present_flag = 0; + } + + p_seq_header->initial_display_delay_present_flag = Parser::GetBit(p_stream, offset); + p_seq_header->operating_points_cnt_minus_1 = Parser::ReadBits(p_stream, offset, 5); + for (int i = 0; i < p_seq_header->operating_points_cnt_minus_1 + 1; i++) { + p_seq_header->operating_point_idc[i] = Parser::ReadBits(p_stream, offset, 12); + p_seq_header->seq_level_idx[i] = Parser::ReadBits(p_stream, offset, 5); + if (p_seq_header->seq_level_idx[i] > 7) { + p_seq_header->seq_tier[i] = Parser::GetBit(p_stream, offset); + } else { + p_seq_header->seq_tier[i] = 0; + } + + if (p_seq_header->decoder_model_info_present_flag) { + p_seq_header->decoder_model_present_for_this_op[i] = Parser::GetBit(p_stream, offset); + if (p_seq_header->decoder_model_present_for_this_op[i]) { + p_seq_header->operating_parameters_info[i].decoder_buffer_delay = Parser::ReadBits(p_stream, offset, p_seq_header->decoder_model_info.buffer_delay_length_minus_1 + 1); + p_seq_header->operating_parameters_info[i].encoder_buffer_delay = Parser::ReadBits(p_stream, offset, p_seq_header->decoder_model_info.buffer_delay_length_minus_1 + 1); + p_seq_header->operating_parameters_info[i].low_delay_mode_flag = Parser::GetBit(p_stream, offset); + } + } else { + p_seq_header->decoder_model_present_for_this_op[i] = 0; + } + + if (p_seq_header->initial_display_delay_present_flag) { + p_seq_header->initial_display_delay_present_for_this_op[i] = Parser::GetBit(p_stream, offset); + if (p_seq_header->initial_display_delay_present_for_this_op[i]) { + p_seq_header->initial_display_delay_minus_1[i] = Parser::ReadBits(p_stream, offset, 4); + } + } + } + } + + // Todo: Choose operating point. + + p_seq_header->frame_width_bits_minus_1 = Parser::ReadBits(p_stream, offset, 4); + p_seq_header->frame_height_bits_minus_1 = Parser::ReadBits(p_stream, offset, 4); + p_seq_header->max_frame_width_minus_1 = Parser::ReadBits(p_stream, offset, p_seq_header->frame_width_bits_minus_1 + 1); + p_seq_header->max_frame_height_minus_1 = Parser::ReadBits(p_stream, offset, p_seq_header->frame_height_bits_minus_1 + 1); + if (p_seq_header->reduced_still_picture_header) { + p_seq_header->frame_id_numbers_present_flag = 0; + } else { + p_seq_header->frame_id_numbers_present_flag = Parser::GetBit(p_stream, offset); + } + if (p_seq_header->frame_id_numbers_present_flag) { + p_seq_header->delta_frame_id_length_minus_2 = Parser::ReadBits(p_stream, offset, 4); + p_seq_header->additional_frame_id_length_minus_1 = Parser::ReadBits(p_stream, offset, 3); + } + p_seq_header->use_128x128_superblock = Parser::GetBit(p_stream, offset); + p_seq_header->enable_filter_intra = Parser::GetBit(p_stream, offset); + p_seq_header->enable_intra_edge_filter = Parser::GetBit(p_stream, offset); + + if (p_seq_header->reduced_still_picture_header) { + p_seq_header->enable_interintra_compound = 0; + p_seq_header->enable_masked_compound = 0; + p_seq_header->enable_warped_motion = 0; + p_seq_header->enable_dual_filter = 0; + p_seq_header->enable_order_hint = 0; + p_seq_header->enable_jnt_comp = 0; + p_seq_header->enable_ref_frame_mvs = 0; + p_seq_header->seq_force_screen_content_tools = SELECT_SCREEN_CONTENT_TOOLS; + p_seq_header->seq_force_integer_mv = SELECT_INTEGER_MV; + p_seq_header->order_hint_bits = 0; + } else { + p_seq_header->enable_interintra_compound = Parser::GetBit(p_stream, offset); + p_seq_header->enable_masked_compound = Parser::GetBit(p_stream, offset); + p_seq_header->enable_warped_motion = Parser::GetBit(p_stream, offset); + p_seq_header->enable_dual_filter = Parser::GetBit(p_stream, offset); + p_seq_header->enable_order_hint = Parser::GetBit(p_stream, offset); + if (p_seq_header->enable_order_hint) { + p_seq_header->enable_jnt_comp = Parser::GetBit(p_stream, offset); + p_seq_header->enable_ref_frame_mvs = Parser::GetBit(p_stream, offset); + } else { + p_seq_header->enable_jnt_comp = 0; + p_seq_header->enable_ref_frame_mvs = 0; + } + + p_seq_header->seq_choose_screen_content_tools = Parser::GetBit(p_stream, offset); + if (p_seq_header->seq_choose_screen_content_tools) { + p_seq_header->seq_force_screen_content_tools = SELECT_SCREEN_CONTENT_TOOLS; + } else { + p_seq_header->seq_force_screen_content_tools = Parser::GetBit(p_stream, offset); + } + if (p_seq_header->seq_force_screen_content_tools > 0) { + p_seq_header->seq_choose_integer_mv = Parser::GetBit(p_stream, offset); + if (p_seq_header->seq_choose_integer_mv) { + p_seq_header->seq_force_integer_mv = SELECT_INTEGER_MV; + } else { + p_seq_header->seq_force_integer_mv = Parser::GetBit(p_stream, offset); + } + } else { + p_seq_header->seq_force_integer_mv = SELECT_INTEGER_MV; + } + + if (p_seq_header->enable_order_hint) { + p_seq_header->order_hint_bits_minus_1 = Parser::ReadBits(p_stream, offset, 3); + p_seq_header->order_hint_bits = p_seq_header->order_hint_bits_minus_1 + 1; + } else { + p_seq_header->order_hint_bits = 0; + } + } + + p_seq_header->enable_superres = Parser::GetBit(p_stream, offset); + p_seq_header->enable_cdef = Parser::GetBit(p_stream, offset); + p_seq_header->enable_restoration = Parser::GetBit(p_stream, offset); + + ParseColorConfig(p_stream, offset, p_seq_header); + + p_seq_header->film_grain_params_present = Parser::GetBit(p_stream, offset); +} + +void Av1VideoParser::ParseColorConfig(const uint8_t *p_stream, size_t &offset, Av1SequenceHeader *p_seq_header) { + p_seq_header->color_config.bit_depth = 8; + + p_seq_header->color_config.high_bitdepth = Parser::GetBit(p_stream, offset); + if (p_seq_header->seq_profile == 2 && p_seq_header->color_config.high_bitdepth) { + p_seq_header->color_config.twelve_bit = Parser::GetBit(p_stream, offset); + p_seq_header->color_config.bit_depth = p_seq_header->color_config.twelve_bit ? 12 : 10; + } else if (p_seq_header->seq_profile <= 2) { + p_seq_header->color_config.bit_depth = p_seq_header->color_config.high_bitdepth ? 10 : 8; + } + + if (p_seq_header->seq_profile == 1) { + p_seq_header->color_config.mono_chrome = 0; + } else { + p_seq_header->color_config.mono_chrome = Parser::GetBit(p_stream, offset); + } + p_seq_header->color_config.num_planes = p_seq_header->color_config.mono_chrome ? 1 : 3; + + p_seq_header->color_config.color_description_present_flag = Parser::GetBit(p_stream, offset); + if (p_seq_header->color_config.color_description_present_flag) { + p_seq_header->color_config.color_primaries = Parser::ReadBits(p_stream, offset, 8); + p_seq_header->color_config.transfer_characteristics = Parser::ReadBits(p_stream, offset, 8); + p_seq_header->color_config.matrix_coefficients = Parser::ReadBits(p_stream, offset, 8); + } else { + p_seq_header->color_config.color_primaries = CP_UNSPECIFIED; + p_seq_header->color_config.transfer_characteristics = TC_UNSPECIFIED; + p_seq_header->color_config.matrix_coefficients = MC_UNSPECIFIED; + } + + if (p_seq_header->color_config.mono_chrome) { + p_seq_header->color_config.color_range = Parser::GetBit(p_stream, offset); + p_seq_header->color_config.subsampling_x = 1; + p_seq_header->color_config.subsampling_y = 1; + p_seq_header->color_config.chroma_sample_position = CSP_UNKNOWN; + p_seq_header->color_config.separate_uv_delta_q = 0; + return; + } else if (p_seq_header->color_config.color_primaries == CP_BT_709 && p_seq_header->color_config.transfer_characteristics == TC_SRGB && p_seq_header->color_config.matrix_coefficients == MC_IDENTITY) { + p_seq_header->color_config.color_range = 1; + p_seq_header->color_config.subsampling_x = 0; + p_seq_header->color_config.subsampling_y = 0; + } else { + p_seq_header->color_config.color_range = Parser::GetBit(p_stream, offset); + if (p_seq_header->seq_profile == 0) { + p_seq_header->color_config.subsampling_x = 1; + p_seq_header->color_config.subsampling_y = 1; + } else if (p_seq_header->seq_profile == 1) { + p_seq_header->color_config.subsampling_x = 0; + p_seq_header->color_config.subsampling_y = 0; + } else { + if (p_seq_header->color_config.bit_depth == 12) { + p_seq_header->color_config.subsampling_x = Parser::GetBit(p_stream, offset); + if (p_seq_header->color_config.subsampling_x) { + p_seq_header->color_config.subsampling_y = Parser::GetBit(p_stream, offset); + } else { + p_seq_header->color_config.subsampling_y = 0; + } + } else { + p_seq_header->color_config.subsampling_x = 1; + p_seq_header->color_config.subsampling_y = 0; + } + } + + if (p_seq_header->color_config.subsampling_x && p_seq_header->color_config.subsampling_y) { + p_seq_header->color_config.chroma_sample_position = Parser::ReadBits(p_stream, offset, 2); + } + } + + p_seq_header->color_config.separate_uv_delta_q = Parser::GetBit(p_stream, offset); +} diff --git a/projects/rocdecode/src/parser/av1_parser.h b/projects/rocdecode/src/parser/av1_parser.h index cb8e24f578..160b521a31 100644 --- a/projects/rocdecode/src/parser/av1_parser.h +++ b/projects/rocdecode/src/parser/av1_parser.h @@ -24,6 +24,9 @@ THE SOFTWARE. #include "av1_defines.h" #include "roc_video_parser.h" +#define OBU_HEADER_SIZE 1 +#define OBU_EXTENSION_SIZE 1 + class Av1VideoParser : public RocVideoParser { public: /*! \brief Av1VideoParser constructor @@ -50,4 +53,129 @@ public: * @return rocDecStatus */ virtual rocDecStatus UnInitialize(); // derived method + +protected: + Av1SequenceHeader seq_header_; + + /*! \brief Function to parse a sequence header OBU + * \param p_stream Pointer to the bit stream + * \param size Byte size of the stream + * \return None + */ + void ParseSequenceHeader(uint8_t *p_stream, size_t size); + + /*! \brief Function to parse color config in sequence header + * \param p_stream Pointer to the bit stream + * \param [in] offset Starting bit offset + * \param [out] offset Updated bit offset + * \param [out] p_seq_header Pointer to sequence header struct + * \return None + */ + void ParseColorConfig(const uint8_t *p_stream, size_t &offset, Av1SequenceHeader *p_seq_header); + + /*! \brief Function to calculate the floor of the base 2 logarithm of the input x + * \param [in] x A 32-bit unsigned integer + * \return the location of the most significant bit in x + */ + inline uint32_t FloorLog2(uint32_t x) { + uint32_t s = 0; + while (x != 0) { + x = x >> 1; + s++; + } + return x ? s - 1 : 0; + } + + /*! \brief Function to read variable length unsigned n-bit number appearing directly in the bitstream. 4.10.3. uvlc(). + * \param [in] p_stream Bit stream pointer + * \param [in] bit_offset Starting bit offset + * \param [out] bit_offset Updated bit offset + * \return The unsigned value + */ + inline uint32_t ReadUVLC(const uint8_t *p_stream, size_t &bit_offset) { + int leading_zeros = 0; + while (!Parser::GetBit(p_stream, bit_offset)) { + ++leading_zeros; + } + // Maximum 32 bits. + if (leading_zeros >= 32) { + return 0xFFFFFFFF; + } + uint32_t base = (1u << leading_zeros) - 1; + uint32_t value = Parser::ReadBits(p_stream, bit_offset, leading_zeros); + return base + value; + } + + /*! \brief Function to read unsigned little-endian num_bytes-byte number appearing directly in the bitstream. 4.10.4. le(n). + * \param [in] p_stream Bit stream pointer + * \param [in] num_bytes Number of bytes to read + * \return The unsigned value + */ + inline uint32_t ReadLeBytes(const uint8_t *p_stream, int num_bytes) { + uint32_t t = 0; + for (int i = 0; i < num_bytes; i++) { + t += (p_stream[i] << ( i * 8 ) ); + } + return t; + } + + /*! \brief Function to read unsigned integer represented by a variable number of little-endian bytes, which + * is less than or equal to (1 << 32) - 1. 4.10.5. leb128(). + * \param [in] p_stream Bit stream pointer + * \param [out] p_num_bytes_read Number of bytes read + * \return The unsigned value + */ + inline uint32_t ReadLeb128(const uint8_t *p_stream, uint32_t *p_num_bytes_read) { + uint32_t value = 0; + *p_num_bytes_read = 0; + uint32_t len; + for (len = 0; len < 4; ++len) { + value |= (p_stream[len] & 0x7F) << (len * 7); + if ((p_stream[len] & 0x80) == 0) { + ++len; + *p_num_bytes_read = len; + break; + } + } + return value; + } + + /*! \brief Function to read signed integer converted from an n bits unsigned integer in the bitstream. 4.10.6. su(n). + * \param [in] p_stream Bit stream pointer + * \param [in] bit_offset Starting bit offset + * \param [out] bit_offset Updated bit offset + * \param [in] num_bits Number of bits to read + * \return The signed value + */ + inline int32_t ReadSigned(const uint8_t *p_stream, size_t &bit_offset, int num_bits) { + int32_t value; + uint32_t u_value = Parser::ReadBits(p_stream, bit_offset, num_bits); + uint32_t sign_mask = 1 << (num_bits - 1); + if ( u_value & sign_mask ) { + value = u_value - 2 * sign_mask; + } else { + value = u_value; + } + return value; + } + + /*! \brief Function to read unsigned encoded (non-symmetric) integer with maximum number of values num_bits + * (i.e. output in range 0..num_bits-1). This encoding is non-symmetric because the values are not all + * coded with the same number of bits. 4.10.7. ns(n). + * \param [in] p_stream Bit stream pointer + * \param [in] bit_offset Starting bit offset + * \param [out] bit_offset Updated bit offset + * \param [in] num_bits Number of bits to read + * \return The unsigned value + */ + inline uint32_t ReadUnsignedNonSymmetic(const uint8_t *p_stream, size_t &bit_offset, int num_bits) { + uint32_t w = FloorLog2(num_bits) + 1; + uint32_t m = (1 << w) - num_bits; + uint32_t v = Parser::ReadBits(p_stream, bit_offset, w - 1); + if (v < m) { + return v; + } + uint32_t extra_bit = Parser::GetBit(p_stream, bit_offset); + return (v << 1) - m + extra_bit; + } }; \ No newline at end of file