From 97be0bb3ca2747cf3bf34a99385f1b40db49a352 Mon Sep 17 00:00:00 2001 From: Lakshmi Kumar Date: Wed, 1 May 2024 14:03:27 -0700 Subject: [PATCH] Seek - Bug Fixes + Sample Update (#327) * adds seek to videodecode and seek bug fixes * fixes both frames & timestamp with prev_key_frame option * switch to exact key frame and bug fix * fix decoded_frame_number from seek * fix out_frame_duration * clean up + exact fram fix [ROCm/rocdecode commit: 2d50d685f77232fb86a0ce1c24291c902cd8373e] --- .../samples/videoDecode/videodecode.cpp | 53 ++++++++++++++++- projects/rocdecode/utils/video_demuxer.h | 59 ++++++++++--------- 2 files changed, 81 insertions(+), 31 deletions(-) diff --git a/projects/rocdecode/samples/videoDecode/videodecode.cpp b/projects/rocdecode/samples/videoDecode/videodecode.cpp index 5434b613f3..457d872138 100644 --- a/projects/rocdecode/samples/videoDecode/videodecode.cpp +++ b/projects/rocdecode/samples/videoDecode/videodecode.cpp @@ -52,7 +52,11 @@ void ShowHelpAndExit(const char *option = NULL) { << "-md5_check MD5 File Path - generate MD5 message digest on the decoded YUV image sequence and compare to the reference MD5 string in a file; optional;" << std::endl << "-crop crop rectangle for output (not used when using interopped decoded frame); optional; default: 0" << std::endl << "-m output_surface_memory_type - decoded surface memory; optional; default - 0" - << " [0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : OUT_SURFACE_MEM_HOST_COPIED/ 3 : OUT_SURFACE_MEM_NOT_MAPPED]" << std::endl; + << " [0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : OUT_SURFACE_MEM_HOST_COPIED/ 3 : OUT_SURFACE_MEM_NOT_MAPPED]" << std::endl + << "-seek_criteria - Demux seek criteria & value - optional; default - 0,0; " + << "[0: no seek; 1: SEEK_CRITERIA_FRAME_NUM, frame number; 2: SEEK_CRITERIA_TIME_STAMP, frame number (time calculated internally)]" << std::endl + << "-seek_mode - Seek to previous key frame or exact - optional; default - 0" + << "[0: SEEK_MODE_PREV_KEY_FRAME; 1: SEEK_MODE_EXACT_FRAME]" << std::endl; exit(0); } @@ -73,6 +77,9 @@ int main(int argc, char **argv) { ReconfigParams reconfig_params = { 0 }; ReconfigDumpFileStruct reconfig_user_struct = { 0 }; uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream + // seek options + uint64_t seek_to_frame = 0; + int seek_criteria = 0, seek_mode = 0; // Parse command-line arguments if(argc <= 1) { @@ -163,6 +170,23 @@ int main(int argc, char **argv) { b_flush_frames_during_reconfig = atoi(argv[i]) ? true : false; continue; } + if (!strcmp(argv[i], "-seek_criteria")) { + if (++i == argc || 2 != sscanf(argv[i], "%d,%lu", &seek_criteria, &seek_to_frame)) { + ShowHelpAndExit("-seek_criteria"); + } + if (0 > seek_criteria || seek_criteria >= 3) + ShowHelpAndExit("-seek_criteria"); + continue; + } + if (!strcmp(argv[i], "-seek_mode")) { + if (++i == argc) { + ShowHelpAndExit("-seek_mode"); + } + seek_mode = atoi(argv[i]); + if (seek_mode != 0 && seek_mode != 1) + ShowHelpAndExit("-seek_mode"); + continue; + } ShowHelpAndExit(argv[i]); } @@ -171,6 +195,7 @@ int main(int argc, char **argv) { std::size_t found_file = input_file_path.find_last_of('/'); std::cout << "info: Input file: " << input_file_path.substr(found_file + 1) << std::endl; VideoDemuxer demuxer(input_file_path.c_str()); + VideoSeekContext video_seek_ctx; rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer.GetCodecID()); RocVideoDecoder viddec(device_id, mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, b_extract_sei_messages); @@ -191,6 +216,7 @@ int main(int argc, char **argv) { OutputSurfaceInfo *surf_info; uint32_t width, height; double total_dec_time = 0; + bool first_frame = true; // initialize reconfigure params: the following is configured to dump to output which is relevant for this sample reconfig_params.p_fn_reconfigure_flush = ReconfigureFlushCallback; reconfig_user_struct.b_dump_frames_to_file = dump_output_frames; @@ -214,13 +240,33 @@ int main(int argc, char **argv) { do { auto start_time = std::chrono::high_resolution_clock::now(); - demuxer.Demux(&pvideo, &n_video_bytes, &pts); + if (seek_criteria == 1 && first_frame) { + // use VideoSeekContext class to seek to given frame number + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM; + video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer.Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Number of frames that were decoded during seek - " << video_seek_ctx.num_frames_decoded_ << std::endl; + first_frame = false; + } else if (seek_criteria == 2 && first_frame) { + // use VideoSeekContext class to seek to given timestamp + video_seek_ctx.seek_frame_ = seek_to_frame; + video_seek_ctx.seek_crit_ = SEEK_CRITERIA_TIME_STAMP; + video_seek_ctx.seek_mode_ = (seek_mode ? SEEK_MODE_EXACT_FRAME : SEEK_MODE_PREV_KEY_FRAME); + demuxer.Seek(video_seek_ctx, &pvideo, &n_video_bytes); + pts = video_seek_ctx.out_frame_pts_; + std::cout << "info: Duration of frame found after seek - " << video_seek_ctx.out_frame_duration_ << " ms" << std::endl; + first_frame = false; + } else { + demuxer.Demux(&pvideo, &n_video_bytes, &pts); + } // Treat 0 bitstream size as end of stream indicator if (n_video_bytes == 0) { pkg_flags |= ROCDEC_PKT_ENDOFSTREAM; } n_frame_returned = viddec.DecodeFrame(pvideo, n_video_bytes, pkg_flags, pts); - + if (!n_frame && !viddec.GetOutputSurfaceInfo(&surf_info)) { std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; break; @@ -243,6 +289,7 @@ int main(int argc, char **argv) { if (num_decoded_frames && num_decoded_frames <= n_frame) { break; } + } while (n_video_bytes); n_frame += viddec.GetNumOfFlushedFrames(); diff --git a/projects/rocdecode/utils/video_demuxer.h b/projects/rocdecode/utils/video_demuxer.h index e4a2eebc66..9caeb9643a 100644 --- a/projects/rocdecode/utils/video_demuxer.h +++ b/projects/rocdecode/utils/video_demuxer.h @@ -143,7 +143,6 @@ class VideoDemuxer { int64_t TsFromTime(double ts_sec) { // Convert integer timestamp representation to AV_TIME_BASE and switch to fixed_point auto const ts_tbu = llround(ts_sec * AV_TIME_BASE); - // Rescale the timestamp to value represented in stream base units; AVRational time_factor = {1, AV_TIME_BASE}; return av_rescale_q(ts_tbu, time_factor, av_fmt_input_ctx_->streams[av_stream_]->time_base); @@ -183,6 +182,7 @@ class VideoDemuxer { uint32_t bit_depth_ = 0; uint32_t byte_per_pixel_ = 0; uint32_t bit_rate_ = 0; + int64_t pkt_dts_ = 0; // used for Seek Exact frame }; VideoDemuxer::~VideoDemuxer() { @@ -237,28 +237,29 @@ bool VideoDemuxer::Demux(uint8_t **video, int *video_size, int64_t *pts) { } *video = packet_filtered_->data; *video_size = packet_filtered_->size; + pkt_dts_ = packet_filtered_->dts; if (pts) *pts = (int64_t) (packet_filtered_->pts * default_time_scale_ * time_base_); - } else { - if (is_mpeg4_ && (frame_count_ == 0)) { - int ext_data_size = av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata_size; - if (ext_data_size > 0) { - data_with_header_ = (uint8_t *)av_malloc(ext_data_size + packet_->size - 3 * sizeof(uint8_t)); - if (!data_with_header_) { - std::cerr << "ERROR: av_malloc failed!" << std::endl; - return false; - } - memcpy(data_with_header_, av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata, ext_data_size); - memcpy(data_with_header_ + ext_data_size, packet_->data + 3, packet_->size - 3 * sizeof(uint8_t)); - *video = data_with_header_; - *video_size = ext_data_size + packet_->size - 3 * sizeof(uint8_t); + } else { + if (is_mpeg4_ && (frame_count_ == 0)) { + int ext_data_size = av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata_size; + if (ext_data_size > 0) { + data_with_header_ = (uint8_t *)av_malloc(ext_data_size + packet_->size - 3 * sizeof(uint8_t)); + if (!data_with_header_) { + std::cerr << "ERROR: av_malloc failed!" << std::endl; + return false; } - } else { - *video = packet_->data; - *video_size = packet_->size; + memcpy(data_with_header_, av_fmt_input_ctx_->streams[av_stream_]->codecpar->extradata, ext_data_size); + memcpy(data_with_header_ + ext_data_size, packet_->data + 3, packet_->size - 3 * sizeof(uint8_t)); + *video = data_with_header_; + *video_size = ext_data_size + packet_->size - 3 * sizeof(uint8_t); } - if (pts) - *pts = (int64_t)(packet_->pts * default_time_scale_ * time_base_); + } else { + *video = packet_->data; + *video_size = packet_->size; + } + if (pts) + *pts = (int64_t)(packet_->pts * default_time_scale_ * time_base_); } frame_count_++; return true; @@ -394,7 +395,7 @@ VideoDemuxer::VideoDemuxer(AVFormatContext *av_fmt_input_ctx) : av_fmt_input_ctx } } -bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* video_size) { +bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* video_size) { /* !!! IMPORTANT !!! * Across this function, packet decode timestamp (DTS) values are used to * compare given timestamp against. This is done because DTS values shall @@ -433,7 +434,7 @@ bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* vid } if (ret < 0) { - throw std::runtime_error("ERROR: seeking for frame: "); + throw std::runtime_error("ERROR: seeking for frame"); } }; @@ -453,10 +454,10 @@ bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* vid return -1; } - if (pkt_data.dts == target_ts) { + if (pkt_dts_ == target_ts) { return 0; } - else if (pkt_data.dts > target_ts) { + else if (pkt_dts_ > target_ts) { return 1; } else { @@ -473,7 +474,7 @@ bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* vid int seek_done = 0; do { - if (!Demux(pp_video, video_size)) { + if (!Demux(pp_video, video_size, &pkt_data.pts)) { break; } seek_done = is_seek_done(pkt_data, seek_ctx); @@ -485,7 +486,8 @@ bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* vid } // Need to read more frames until we reach requested number; else if (seek_done < 0) { - continue; + tmp_ctx.seek_frame_++; + seek_frame(tmp_ctx, AVSEEK_FLAG_ANY); } } while (seek_done != 0); @@ -495,10 +497,11 @@ bool VideoDemuxer::Seek(VideoSeekContext& seek_ctx, uint8_t** pp_video, int* vid // Seek for closest key frame in the past; auto seek_for_prev_key_frame = [&](PacketData& pkt_data, VideoSeekContext& seek_ctx) { - seek_frame(seek_ctx.seek_crit_, AVSEEK_FLAG_BACKWARD); - Demux(pp_video, video_size); + seek_frame(seek_ctx, AVSEEK_FLAG_BACKWARD); + Demux(pp_video, video_size, &pkt_data.pts); + seek_ctx.num_frames_decoded_ = static_cast(pkt_data.pts / 1000 * frame_rate_); seek_ctx.out_frame_pts_ = pkt_data.pts; - seek_ctx.out_frame_duration_ = pkt_data.duration; + seek_ctx.out_frame_duration_ = static_cast(pkt_data.pts / 1000); }; PacketData pktData;