* WIP: ffmpeg based software decoder

* minor update

* WIP: FFMpeg SW decoder support

* FFMpeg decoder working version

* working version without threading

* multithreading working with FFMpeg decoder

* moved ffmpeg decoder to separate folder

* updated changelog

* minor change

* resolved review comments

* resolved review comments

* fixed a bug in hipMemcpy for copying decoded output to GPU

* added support for multithreading in FFMpeg decoder

* addressed review comments

* fixed format changes requested

---------

Co-authored-by: Lakshmi Kumar <lakshmi.kumar@amd.com>

[ROCm/rocdecode commit: 5365ceabec]
Этот коммит содержится в:
Rajy Rawther
2024-12-20 13:50:15 -08:00
коммит произвёл GitHub
родитель 6bcc0a4067
Коммит 8136f59dc0
2 изменённых файлов: 77 добавлений и 29 удалений
+76 -29
Просмотреть файл
@@ -105,7 +105,7 @@ FFMpegVideoDecoder::FFMpegVideoDecoder(int device_id, OutputSurfaceMemoryType ou
parser_params.user_data = this;
parser_params.pfn_sequence_callback = FFMpegHandleVideoSequenceProc;
parser_params.pfn_decode_picture = FFMpegHandlePictureDecodeProc;
parser_params.pfn_display_picture = FFMpegHandlePictureDisplayProc;
parser_params.pfn_display_picture = nullptr; // HandlePictureDisplay will be controlled by FFMpegDecoder
parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? RocVideoDecoder::HandleSEIMessagesProc : NULL;
ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params));
}
@@ -194,19 +194,28 @@ int FFMpegVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) {
if (!dec_context_) {
THROW("Could not allocate video codec context");
}
// set codec to automatically determine how many threads suits best for the decoding job
dec_context_->thread_count = 0;
if (decoder_->capabilities & AV_CODEC_CAP_FRAME_THREADS)
dec_context_->thread_type = FF_THREAD_FRAME;
else if (decoder_->capabilities & AV_CODEC_CAP_SLICE_THREADS)
dec_context_->thread_type = FF_THREAD_SLICE;
else
dec_context_->thread_count = 1;
// open the codec
if (avcodec_open2(dec_context_, decoder_, NULL) < 0) {
THROW("Could not open codec");
}
// get the output pixel format from dec_context_
decoder_pixel_format_ = (dec_context_->pix_fmt == AV_PIX_FMT_NONE) ? AV_PIX_FMT_YUV420P : dec_context_->pix_fmt;
dec_context_->thread_count = 4;
dec_context_->thread_type = FF_THREAD_FRAME;
}
// allocate av_frame buffer pool for number of surfaces
// allocate av_frame buffer pool for number of surfaces to be in the decoder pool
// Note: with multi-threading, av_codec needs (dec_context_->delay + max_num_B_frames) number of av_frames.
// max_num_B_frames is assumed to be 4 here: revisit this to fix with the correct number from seq_header
if (dec_frames_.empty()) {
for (int i = 0; i < num_decode_surfaces; i++) {
for (int i = 0; i < (dec_context_->delay + 4); i++) {
AVFrame *p_frame = av_frame_alloc();
dec_frames_.push_back(p_frame);
}
@@ -452,9 +461,29 @@ int FFMpegVideoDecoder::HandlePictureDecode(RocdecPicParams *pPicParams) {
if (no_multithreading_) {
DecodeAvFrame(av_pkt, dec_frames_[av_frame_cnt_]);
int num_frames_to_display = decoded_pic_cnt_;
while (num_frames_to_display) {
RocdecParserDispInfo dispInfo = {0}; // dispinfo is not used in ffmpeg decoder, so setting it to zero
HandlePictureDisplay(&dispInfo);
num_frames_to_display--;
};
if (!last_packet_.payload_size && !end_of_stream_) {
AVPacket pkt = {0};
DecodeAvFrame(&pkt, dec_frames_[av_frame_cnt_]);
int num_frames_to_display = decoded_pic_cnt_;
while (num_frames_to_display) {
RocdecParserDispInfo dispInfo = {0}; // don't care about this as this will be igonored
HandlePictureDisplay(&dispInfo);
num_frames_to_display--;
};
}
} else {
//push packet into packet q for decoding
PushPacket(av_pkt);
if (!last_packet_.payload_size && !end_of_stream_) {
AVPacket pkt = {0};
PushPacket(&pkt);
}
}
av_pkt_cnt_ = (av_pkt_cnt_ + 1) % av_packets_.size();
if (!av_pkt->data || !av_pkt->size) {
@@ -471,18 +500,6 @@ int FFMpegVideoDecoder::HandlePictureDecode(RocdecPicParams *pPicParams) {
* @return int 0:fail 1: success
*/
int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
// check if we reached eos here. This is a hack since HandlePictureDecode won't be called at the end_of_sequece.
// so we need to flush FFMpeg decoder when we have received the lastpacket with 0 bytes
if (!last_packet_.payload_size && !end_of_stream_) {
AVPacket pkt = { 0 };
if (no_multithreading_) {
DecodeAvFrame(&pkt, dec_frames_[av_frame_cnt_]);
} else {
//push packet into packet q for decoding
PushPacket(&pkt);
}
}
if (b_extract_sei_message_) {
if (sei_message_display_q_[pDispInfo->picture_index].sei_data) {
// Write SEI Message
@@ -515,12 +532,13 @@ int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
sei_message_display_q_[pDispInfo->picture_index].sei_message = NULL; // to avoid double free
}
}
// vp_frames_ffmpeg_.size() is empty, wait for decoding to finish
// this will happen during PopFrame()
AVFrame *p_av_frame;
if (no_multithreading_) {
p_av_frame = av_frame_q_.front();
av_frame_q_.pop();
AVFrame *p_av_frame = nullptr;
if (no_multithreading_ ) {
if (!av_frame_q_.empty()) {
p_av_frame = av_frame_q_.front();
av_frame_q_.pop();
}
} else {
p_av_frame = PopFrame();
}
@@ -529,8 +547,8 @@ int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
std::cerr << "Invalid avframe decode output" << std::endl;
return 0;
}
void* src_ptr[3] = { 0 };
int32_t src_pitch[3] = { 0 };
void* src_ptr[3] = {0};
int32_t src_pitch[3] = {0};
src_ptr[0] = p_av_frame->data[0];
src_ptr[1] = p_av_frame->data[1];
src_ptr[2] = p_av_frame->data[2];
@@ -545,7 +563,7 @@ int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
// if not enough frames in stock, allocate
if (++output_frame_cnt_ > vp_frames_ffmpeg_.size()) {
num_alloced_frames_++;
DecFrameBufferFFMpeg dec_frame = { 0 };
DecFrameBufferFFMpeg dec_frame = {0};
if (out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) {
// allocate device memory
HIP_API_CALL(hipMalloc((void **)&dec_frame.frame_ptr, GetFrameSize()));
@@ -554,8 +572,8 @@ int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
}
dec_frame.av_frame_ptr = p_av_frame;
dec_frame.pts = pDispInfo->pts;
dec_frame.picture_index = pDispInfo->picture_index;
dec_frame.pts = p_av_frame->pts;
dec_frame.picture_index = p_av_frame->display_picture_number;
vp_frames_ffmpeg_.push_back(dec_frame);
}
p_dec_frame = vp_frames_ffmpeg_[output_frame_cnt_ - 1].frame_ptr;
@@ -651,7 +669,7 @@ int FFMpegVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
int FFMpegVideoDecoder::DecodeFrame(const uint8_t *data, size_t size, int pkt_flags, int64_t pts, int *num_decoded_pics) {
output_frame_cnt_ = 0, output_frame_cnt_ret_ = 0;
decoded_pic_cnt_ = 0;
last_packet_ = { 0 };
last_packet_ = {0};
last_packet_.payload = data;
last_packet_.payload_size = size;
last_packet_.flags = pkt_flags | ROCDEC_PKT_TIMESTAMP;
@@ -660,6 +678,11 @@ int FFMpegVideoDecoder::DecodeFrame(const uint8_t *data, size_t size, int pkt_fl
last_packet_.flags |= ROCDEC_PKT_ENDOFSTREAM;
}
ROCDEC_API_CALL(rocDecParseVideoData(rocdec_parser_, &last_packet_));
if (last_packet_.flags & ROCDEC_PKT_ENDOFSTREAM) {
// flush last packet and let FFMpeg decode last frames
FlushDecoder();
}
if (num_decoded_pics) {
*num_decoded_pics = decoded_pic_cnt_;
}
@@ -724,6 +747,30 @@ void FFMpegVideoDecoder::InitOutputFrameInfo(AVFrame *p_frame) {
}
}
/**
* @brief Flush decoder with decoding of last frames
*
* @return int 1: success 0: fail
*/
int FFMpegVideoDecoder::FlushDecoder() {
AVPacket pkt = {0};
if (no_multithreading_) {
DecodeAvFrame(&pkt, dec_frames_[av_frame_cnt_]);
int num_frames_to_display = decoded_pic_cnt_;
while (num_frames_to_display) {
RocdecParserDispInfo dispInfo = {0}; // don't care about this as this will be igonored
HandlePictureDisplay(&dispInfo);
num_frames_to_display--;
};
} else {
//push packet into packet q for decoding
PushPacket(&pkt);
}
return 0;
}
void FFMpegVideoDecoder::DecodeThread()
{
AVPacket *pkt;
+1
Просмотреть файл
@@ -174,6 +174,7 @@ class FFMpegVideoDecoder: public RocVideoDecoder {
void DecodeThread();
int DecodeAvFrame(AVPacket *av_pkt, AVFrame *p_frame);
void InitOutputFrameInfo(AVFrame *p_frame);
int FlushDecoder();
void PushPacket(AVPacket *pkt) {
{
std::lock_guard<std::mutex> lock(mtx_pkt_q_);