From c8a7b82794731aaf169bcd8b41f125449d0a3509 Mon Sep 17 00:00:00 2001 From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com> Date: Wed, 12 Feb 2025 17:18:09 -0500 Subject: [PATCH] VP9: Added support for video size change (scaling down) on inter frames. (#508) * * rocDecode/VP9: Added support for video size change (scaling down) on inter frames. - A new reconfigure mode is added, where we keep the existing VAAPI surfaces and context, and work on the smaller images and store the images in the surfaces. * * rocDecode/VP9 resize: Added changes based on review comments. [ROCm/rocdecode commit: 2af7bbf4e751fb9048ca1692166a34b75958085a] --- projects/rocdecode/api/rocparser.h | 14 ++++++++-- projects/rocdecode/src/parser/av1_parser.cpp | 1 + projects/rocdecode/src/parser/avc_parser.cpp | 1 + projects/rocdecode/src/parser/hevc_parser.cpp | 1 + projects/rocdecode/src/parser/vp9_parser.cpp | 28 +++++++++++++++++-- projects/rocdecode/src/parser/vp9_parser.h | 6 ++++ .../utils/rocvideodecode/roc_video_dec.cpp | 20 +++++++------ 7 files changed, 58 insertions(+), 13 deletions(-) diff --git a/projects/rocdecode/api/rocparser.h b/projects/rocdecode/api/rocparser.h index ebf70f8b43..431a1a00b4 100644 --- a/projects/rocdecode/api/rocparser.h +++ b/projects/rocdecode/api/rocparser.h @@ -46,6 +46,16 @@ extern "C" { typedef void *RocdecVideoParser; typedef uint64_t RocdecTimeStamp; +/***************************************************************/ +//! \enum RocdecReconfigOptions +//! Decoder reconfigure options for VP9 decode +//! Used in RocdecVideoFormat structure +/***************************************************************/ +typedef enum { + ROCDEC_RECONFIG_NEW_SURFACES = 0, /**< Allocate new surfaces during decoder reconfig */ + ROCDEC_RECONFIG_KEEP_SURFACES = 1, /**< Keep existing surfaces during decoder reconfig */ +} RocdecReconfigOptions; + /** * @brief ROCDEC_VIDEO_FORMAT struct * @ingroup group_rocdec_struct @@ -89,7 +99,6 @@ typedef struct { int right; /**< OUT: right position of display rect */ int bottom; /**< OUT: bottom position of display rect */ } display_area; - rocDecVideoChromaFormat chroma_format; /**< OUT: Chroma format */ uint32_t bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ /** @@ -111,7 +120,8 @@ typedef struct { uint8_t transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */ uint8_t matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */ } video_signal_description; - uint32_t seqhdr_data_length; /**< OUT: Additional bytes following (RocdecVideoFormatEx) */ + uint32_t seqhdr_data_length; /**< OUT: Additional bytes following (RocdecVideoFormatEx) */ + uint32_t reconfig_options; /**< OUT: Refer to RocdecReconfigOptions */ } RocdecVideoFormat; /****************************************************************/ diff --git a/projects/rocdecode/src/parser/av1_parser.cpp b/projects/rocdecode/src/parser/av1_parser.cpp index 50722258e9..3b40dd3e26 100644 --- a/projects/rocdecode/src/parser/av1_parser.cpp +++ b/projects/rocdecode/src/parser/av1_parser.cpp @@ -230,6 +230,7 @@ ParserResult Av1VideoParser::NotifyNewSequence(Av1SequenceHeader *p_seq_header, video_format_params_.display_aspect_ratio.x = disp_width / gcd; video_format_params_.display_aspect_ratio.y = disp_height / gcd; + video_format_params_.reconfig_options = ROCDEC_RECONFIG_NEW_SURFACES; video_format_params_.video_signal_description = {0}; video_format_params_.seqhdr_data_length = 0; diff --git a/projects/rocdecode/src/parser/avc_parser.cpp b/projects/rocdecode/src/parser/avc_parser.cpp index f1a7572f17..b18012cc69 100644 --- a/projects/rocdecode/src/parser/avc_parser.cpp +++ b/projects/rocdecode/src/parser/avc_parser.cpp @@ -387,6 +387,7 @@ ParserResult AvcVideoParser::NotifyNewSps(AvcSeqParameterSet *p_sps) { video_format_params_.display_aspect_ratio.x = disp_width / gcd; video_format_params_.display_aspect_ratio.y = disp_height / gcd; + video_format_params_.reconfig_options = ROCDEC_RECONFIG_NEW_SURFACES; if (p_sps->vui_parameters_present_flag) { video_format_params_.video_signal_description.video_format = p_sps->vui_seq_parameters.video_format; video_format_params_.video_signal_description.video_full_range_flag = p_sps->vui_seq_parameters.video_full_range_flag; diff --git a/projects/rocdecode/src/parser/hevc_parser.cpp b/projects/rocdecode/src/parser/hevc_parser.cpp index 4e137930e2..93d271939f 100644 --- a/projects/rocdecode/src/parser/hevc_parser.cpp +++ b/projects/rocdecode/src/parser/hevc_parser.cpp @@ -190,6 +190,7 @@ int HevcVideoParser::FillSeqCallbackFn(HevcSeqParamSet* sps_data) { video_format_params_.display_aspect_ratio.x = disp_width / gcd; video_format_params_.display_aspect_ratio.y = disp_height / gcd; + video_format_params_.reconfig_options = ROCDEC_RECONFIG_NEW_SURFACES; if (sps_data->vui_parameters_present_flag) { video_format_params_.video_signal_description.video_format = sps_data->vui_parameters.video_format; video_format_params_.video_signal_description.video_full_range_flag = sps_data->vui_parameters.video_full_range_flag; diff --git a/projects/rocdecode/src/parser/vp9_parser.cpp b/projects/rocdecode/src/parser/vp9_parser.cpp index 442fbd3c3a..420f1da7d3 100644 --- a/projects/rocdecode/src/parser/vp9_parser.cpp +++ b/projects/rocdecode/src/parser/vp9_parser.cpp @@ -35,6 +35,9 @@ Vp9VideoParser::Vp9VideoParser() { InitDpb(); num_frames_in_chunck_ = 1; frame_sizes_.assign(1, 0); + curr_surface_width_ = 0; + curr_surface_height_ = 0; + reconfig_option_ = ROCDEC_RECONFIG_NEW_SURFACES; } Vp9VideoParser::~Vp9VideoParser() { @@ -88,6 +91,9 @@ ParserResult Vp9VideoParser::ParsePictureData(const uint8_t *p_stream, uint32_t } // Init Roc decoder for the first time or reconfigure the existing decoder if (new_seq_activated_) { + if ((ret = FlushDpb()) != PARSER_OK) { + return ret; + } if ((ret = NotifyNewSequence(&uncompressed_header_)) != PARSER_OK) { return ret; } @@ -212,8 +218,8 @@ ParserResult Vp9VideoParser::NotifyNewSequence(Vp9UncompressedHeader *p_uncomp_h video_format_params_.display_area.left = 0; video_format_params_.display_area.top = 0; - video_format_params_.display_area.right = p_uncomp_header->render_size.render_width; - video_format_params_.display_area.bottom = p_uncomp_header->render_size.render_height; + video_format_params_.display_area.right = p_uncomp_header->frame_size.frame_width; + video_format_params_.display_area.bottom = p_uncomp_header->frame_size.frame_height; video_format_params_.bitrate = 0; // Dispaly aspect ratio @@ -223,6 +229,7 @@ ParserResult Vp9VideoParser::NotifyNewSequence(Vp9UncompressedHeader *p_uncomp_h video_format_params_.display_aspect_ratio.x = disp_width / gcd; video_format_params_.display_aspect_ratio.y = disp_height / gcd; + video_format_params_.reconfig_options = reconfig_option_; video_format_params_.video_signal_description = {0}; video_format_params_.seqhdr_data_length = 0; @@ -546,12 +553,27 @@ ParserResult Vp9VideoParser::ParseUncompressedHeader(uint8_t *p_stream, size_t s p_uncomp_header->header_size_in_bytes = Parser::ReadBits(p_stream, offset, 16); + // Arbitrary size change is only supported on key frames. For other frame types, particularly inter-coded frames, only size down is + // supported where the existing surface can be re-used. if (pic_width_ != p_uncomp_header->frame_size.frame_width || pic_height_ != p_uncomp_header->frame_size.frame_height) { pic_width_ = p_uncomp_header->frame_size.frame_width; pic_height_ = p_uncomp_header->frame_size.frame_height; + if (p_uncomp_header->frame_type == kVp9KeyFrame) { + curr_surface_width_ = pic_width_; + curr_surface_height_ = pic_height_; + reconfig_option_ = ROCDEC_RECONFIG_NEW_SURFACES; // Normal mode: free existing surfaces and allocate new surfaces. + } else { + if (pic_width_ <= curr_surface_width_ && pic_height_ <= curr_surface_height_) { + reconfig_option_ = ROCDEC_RECONFIG_KEEP_SURFACES; // Keep the existing surfaces + } else { + ERR("VP9 video size (up) change on non-key frames is not supported. Decode errors can occur."); + curr_surface_width_ = pic_width_; + curr_surface_height_ = pic_height_; + reconfig_option_ = ROCDEC_RECONFIG_NEW_SURFACES; // Normal mode: free existing surfaces and allocate new surfaces. + } + } new_seq_activated_ = true; } - uncomp_header_size_ = (offset + 7) >> 3; return PARSER_OK; } diff --git a/projects/rocdecode/src/parser/vp9_parser.h b/projects/rocdecode/src/parser/vp9_parser.h index 3996207e70..9d9bcf1d27 100644 --- a/projects/rocdecode/src/parser/vp9_parser.h +++ b/projects/rocdecode/src/parser/vp9_parser.h @@ -90,6 +90,12 @@ protected: DecodedPictureBuffer dpb_buffer_; Vp9Picture curr_pic_; + // Current surface size, used to support size change (down) on inter frames, where we keep the + // previously allocated surfaces and use them to store the smaller images. + uint32_t curr_surface_width_; + uint32_t curr_surface_height_; + uint32_t reconfig_option_; + /*! \brief Function to parse one picture bit stream received from the demuxer. * \param [in] p_stream A pointer of uint8_t for the input stream to be parsed * \param [in] pic_data_size Size of the input stream diff --git a/projects/rocdecode/utils/rocvideodecode/roc_video_dec.cpp b/projects/rocdecode/utils/rocvideodecode/roc_video_dec.cpp index 6ea6fdd93a..ce7db2363a 100644 --- a/projects/rocdecode/utils/rocvideodecode/roc_video_dec.cpp +++ b/projects/rocdecode/utils/rocvideodecode/roc_video_dec.cpp @@ -528,15 +528,19 @@ int RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat *p_video_format) { } } - if (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) { - GetSurfaceStrideInternal(video_surface_format_, coded_width_, coded_height_, &surface_stride_, &surface_vstride_); - } else { - surface_stride_ = target_width_ * byte_per_pixel_; + if (p_video_format->reconfig_options == ROCDEC_RECONFIG_NEW_SURFACES) { + if (out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL || out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED) { + GetSurfaceStrideInternal(video_surface_format_, coded_width_, coded_height_, &surface_stride_, &surface_vstride_); + } else { + surface_stride_ = target_width_ * byte_per_pixel_; + } } chroma_height_ = static_cast(ceil(target_height_ * GetChromaHeightFactor(video_surface_format_))); num_chroma_planes_ = GetChromaPlaneCount(video_surface_format_); if (p_video_format->chroma_format == rocDecVideoChromaFormat_Monochrome) num_chroma_planes_ = 0; - chroma_vstride_ = static_cast(std::ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + if (p_video_format->reconfig_options == ROCDEC_RECONFIG_NEW_SURFACES) { + chroma_vstride_ = static_cast(std::ceil(surface_vstride_ * GetChromaHeightFactor(video_surface_format_))); + } // Fill output_surface_info_ output_surface_info_.output_width = target_width_; output_surface_info_.output_height = target_height_; @@ -590,8 +594,9 @@ int RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat *p_video_format) { ROCDEC_THROW("Reconfigurition of the decoder detected but the decoder was not initialized previoulsy!", ROCDEC_NOT_SUPPORTED); return 0; } - ROCDEC_API_CALL(rocDecReconfigureDecoder(roc_decoder_, &reconfig_params)); - + if (p_video_format->reconfig_options == ROCDEC_RECONFIG_NEW_SURFACES) { + ROCDEC_API_CALL(rocDecReconfigureDecoder(roc_decoder_, &reconfig_params)); + } input_video_info_str_.str(""); input_video_info_str_.clear(); @@ -908,7 +913,6 @@ bool RocVideoDecoder::ReleaseInternalFrames() { return true; } - void RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void *surf_mem, OutputSurfaceInfo *surf_info, size_t rgb_image_size) { uint8_t *hst_ptr = nullptr; bool is_rgb = (rgb_image_size != 0);