Updated videoDecodePerf Sample with host backend (#635)

* set min: number of decoded surfaces to non-zero value for ffmpeg based decoding

* added host decoding option to videodecode perf app

* fix GetOutputSurfaceInfo() for FFMpeg

* updated chagelog

* added host decoding option to videodecode perf app

* fix GetOutputSurfaceInfo() for FFMpeg

* updated chagelog

* removed GPU device info printing for HOST backend

* fix for review comment

[ROCm/rocdecode commit: eeecc3e8c8]
This commit is contained in:
Rajy Rawther
2025-08-28 09:02:13 -07:00
committed by GitHub
parent 87baa53ebd
commit 74ea277fdf
8 changed files with 113 additions and 39 deletions
+2 -1
View File
@@ -2,7 +2,7 @@
Full documentation for rocDecode is available at [https://rocm.docs.amd.com/projects/rocDecode/en/latest/](https://rocm.docs.amd.com/projects/rocDecode/en/latest/)
## rocDecode 1.2.0 (unreleased)
## rocDecode 1.3.0 (unreleased)
### Added
@@ -20,6 +20,7 @@ Full documentation for rocDecode is available at [https://rocm.docs.amd.com/proj
* rocdecode now uses the Cmake CMAKE_PREFIX_PATH directive.
* rocdecode - A new avcodec-based decoder built as a separate "rocdecode-host" library
* The host backend in the `videoDeode` sample now uses the rocdecode-host library
* The host backend using rocdecode-host library is added in `videoDecodePerf` sample
### Optimized
+1 -1
View File
@@ -40,7 +40,7 @@ if (NOT DEFINED CMAKE_CXX_COMPILER)
endif()
# rocdecode Version
set(VERSION "1.2.0")
set(VERSION "1.3.0")
# Set Project Version and Language
project(rocdecode VERSION ${VERSION} LANGUAGES CXX)
@@ -65,6 +65,7 @@ find_package(HIP QUIET)
find_package(rocdecode QUIET)
find_package(rocprofiler-register QUIET)
find_package(FFmpeg QUIET)
find_package(Threads QUIET)
if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofiler-register_FOUND)
# HIP
@@ -74,7 +75,7 @@ if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofi
${SWSCALE_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR})
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES})
# rocdecode and utils
include_directories (${rocdecode_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories (${rocdecode_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/ffmpegvideodecode/ ${CMAKE_CURRENT_SOURCE_DIR}/..)
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocdecode::rocdecode)
# threads
set(THREADS_PREFER_PTHREAD_FLAG ON)
@@ -82,10 +83,19 @@ if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofi
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} Threads::Threads)
# rocprofiler-register
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocprofiler-register::rocprofiler-register)
# rocdecodehost
find_library(ROCDECODE_HOST_LIBRARY NAMES rocdecodehost HINTS ${ROCM_PATH}/lib)
# sample app exe
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp)
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/ffmpegvideodecode/ffmpeg_video_dec.cpp)
add_executable(${PROJECT_NAME} ${SOURCES})
if(ROCDECODE_HOST_LIBRARY)
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_HOST_LIBRARY})
target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_HOST_DECODE=1)
else()
target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_HOST_DECODE=0)
endif()
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
# FFMPEG multi-version support
if(_FFMPEG_AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100)
target_compile_definitions(${PROJECT_NAME} PUBLIC USE_AVCODEC_GREATER_THAN_58_134=0)
@@ -35,6 +35,7 @@ THE SOFTWARE.
#endif
#include "video_demuxer.h"
#include "roc_video_dec.h"
#include "ffmpeg_video_dec.h"
#include "common.h"
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames, OutputSurfaceMemoryType mem_type) {
@@ -98,6 +99,7 @@ int main(int argc, char **argv) {
bool b_force_zero_latency = false;
uint32_t max_num_frames = 0; // max number of frames to be decoded. default value is 0, meaning decode the entire stream
int disp_delay = 1;
int backend = 0;
// Parse command-line arguments
if(argc <= 1) {
@@ -162,6 +164,13 @@ int main(int argc, char **argv) {
mem_type = static_cast<OutputSurfaceMemoryType>(atoi(argv[i]));
continue;
}
if (!strcmp(argv[i], "-backend")) {
if (++i == argc) {
ShowHelpAndExit("-backend");
}
backend = atoi(argv[i]);
continue;
}
ShowHelpAndExit(argv[i]);
}
@@ -219,7 +228,22 @@ int main(int argc, char **argv) {
} else {
v_device_id[i] = i % hip_vis_dev_count;
}
std::unique_ptr<RocVideoDecoder> dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay));
std::unique_ptr<RocVideoDecoder> dec;
if (!backend) { // gpu backend
dec = std::make_unique<RocVideoDecoder>(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay);
} else {
#if ENABLE_HOST_DECODE
std::cout << "info: RocDecode is using CPU backend!" << std::endl;
uint32_t max_width = demuxer->GetWidth();
uint32_t max_height =demuxer->GetHeight();
mem_type = OUT_SURFACE_MEM_HOST_COPIED;
dec = std::make_unique<FFMpegVideoDecoder>(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay, max_width, max_height);
#else
std::cout << "Error: RocDecode HOST library is not found and backend is not supported!" << std::endl;
return 0;
#endif
}
if (!dec->CodecSupported(v_device_id[i], rocdec_codec_id, demuxer->GetBitDepth())) {
std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl;
continue;
@@ -246,7 +270,7 @@ int main(int argc, char **argv) {
for (int i = 0; i < n_thread; i++) {
v_viddec[i]->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id);
std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
if (!backend) std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) <<
std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl;
std::cout << "info: decoding started for thread " << i << " ,please wait!" << std::endl;
@@ -45,11 +45,16 @@ static inline float GetChromaWidthFactor(rocDecVideoSurfaceFormat surface_format
FFMpegVideoDecoder::FFMpegVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency,
const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) :
RocVideoDecoder(device_id, out_mem_type, codec, force_zero_latency, p_crop_rect, extract_user_sei_Message, disp_delay, max_width, max_height, clk_rate) {
RocVideoDecoder(device_id, out_mem_type, codec, force_zero_latency, p_crop_rect, extract_user_sei_Message, disp_delay, max_width, max_height, clk_rate, true) {
if ((out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) || (out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED)) {
ROCDEC_THROW("Unsupported output memory type", ROCDEC_INVALID_PARAMETER);
}
if (out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) {
if (!InitHIP(device_id_)) {
ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID);
}
}
// many of the decoder parameters are hardcoded below for just creating the decoder.
// In the handlevideosequence callback, the decoder will get reconfigured to the actual parameters in the sequence header
RocDecoderHostCreateInfo create_info = {};
@@ -74,7 +79,19 @@ FFMpegVideoDecoder::FFMpegVideoDecoder(int device_id, OutputSurfaceMemoryType ou
create_info.pfn_display_picture = FFMpegHandlePictureDisplayProc;
create_info.pfn_get_sei_msg = nullptr; // tobe supported in future
ROCDEC_API_CALL(rocDecCreateDecoderHost(&roc_decoder_, &create_info));
// set disp_width and height to non_zero values for it doesn't trigger decoding error before actual start of decoding
disp_width_ = max_width;
disp_height_ = max_height;
// fill output_surface_info_
output_surface_info_.output_width = max_width;
output_surface_info_.output_height = max_height;
output_surface_info_.output_pitch = max_width * 2; // bytes_per_pixel 2
output_surface_info_.output_vstride = max_height;
output_surface_info_.bit_depth = bitdepth_minus_8_ + 8;
output_surface_info_.bytes_per_pixel = 2;
output_surface_info_.surface_format = rocDecVideoSurfaceFormat_P016;
output_surface_info_.num_chroma_planes = 2;
output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED;
}
@@ -201,6 +218,15 @@ int FFMpegVideoDecoder::HandleVideoSequence(RocdecVideoFormatHost *format_host)
return num_decode_surfaces;
}
bool FFMpegVideoDecoder::GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info) {
if (!disp_width_ || !disp_height_) {
std::cerr << "ERROR: FFMpegVideo is not intialized" << std::endl;
return false;
}
*surface_info = &output_surface_info_;
return true;
}
/**
* @brief function to reconfigure decoder if there is a change in sequence params.
*
@@ -72,13 +72,22 @@ class FFMpegVideoDecoder: public RocVideoDecoder {
* @param num_decoded_pics - nummber of pictures decoded in this call
* @return int - num of frames to display
*/
int DecodeFrame(const uint8_t *data, size_t size, int pkt_flags, int64_t pts = 0, int *num_decoded_pics = nullptr);
int DecodeFrame(const uint8_t *data, size_t size, int pkt_flags, int64_t pts = 0, int *num_decoded_pics = nullptr) override;
/**
* @brief Get the pointer to the Output Image Info
*
* @param surface_info ptr to output surface info
* @return true
* @return false
*/
bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info) override;
/**
* @brief This function returns a decoded frame and timestamp. This should be called in a loop fetching all the available frames
*
*/
uint8_t* GetFrame(int64_t *pts);
uint8_t* GetFrame(int64_t *pts) override;
/**
* @brief function to release frame after use by the application: Only used with "OUT_SURFACE_MEM_DEV_INTERNAL"
@@ -88,7 +97,7 @@ class FFMpegVideoDecoder: public RocVideoDecoder {
* @return true - success
* @return false - falied
*/
bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false);
bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false) override;
/**
* @brief Helper function to dump decoded output surface to file
@@ -98,12 +107,17 @@ class FFMpegVideoDecoder: public RocVideoDecoder {
* @param surf_info - surface info
* @param rgb_image_size - image size for rgb (optional). A non_zero value indicates the surf_mem holds an rgb interleaved image and the entire size will be dumped to file
*/
void SaveFrameToFile(std::string output_file_name, void *surf_mem, OutputSurfaceInfo *surf_info, size_t rgb_image_size = 0);
void SaveFrameToFile(std::string output_file_name, void *surf_mem, OutputSurfaceInfo *surf_info, size_t rgb_image_size = 0) override;
/**
* @brief This function is used to get the current frame size based on pixel format.
*/
virtual int GetFrameSize() {CHECK_ZERO("Display width", disp_width_); return ((disp_width_ * disp_height_) + ((chroma_height_ * chroma_width_) * num_chroma_planes_)) * byte_per_pixel_; }
virtual int GetFrameSize() override {CHECK_ZERO("Display width", disp_width_); return ((disp_width_ * disp_height_) + ((chroma_height_ * chroma_width_) * num_chroma_planes_)) * byte_per_pixel_; }
/**
* @brief This function reconfigure decoder if there is a change in sequence params.
*/
int ReconfigureDecoder(RocdecVideoFormat *p_video_format) override;
private:
/**
@@ -138,9 +152,4 @@ class FFMpegVideoDecoder: public RocVideoDecoder {
*/
int GetSEIMessage(RocdecSeiMessageInfo *p_sei_message_info) { return RocVideoDecoder::GetSEIMessage(p_sei_message_info);};
/**
* @brief This function reconfigure decoder if there is a change in sequence params.
*/
int ReconfigureDecoder(RocdecVideoFormat *p_video_format);
};
@@ -23,30 +23,34 @@ THE SOFTWARE.
#include "roc_video_dec.h"
RocVideoDecoder::RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency,
const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) :
const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate, bool skip_init) :
device_id_{device_id}, out_mem_type_(out_mem_type), codec_id_(codec), b_force_zero_latency_(force_zero_latency),
b_extract_sei_message_(extract_user_sei_Message), disp_delay_(disp_delay), max_width_ (max_width), max_height_(max_height) {
if (!InitHIP(device_id_)) {
ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID);
}
if (p_crop_rect) crop_rect_ = *p_crop_rect;
if (b_extract_sei_message_) {
fp_sei_ = fopen("rocdec_sei_message.txt", "wb");
curr_sei_message_ptr_ = new RocdecSeiMessageInfo;
memset(&sei_message_display_q_, 0, sizeof(sei_message_display_q_));
}
// create rocdec videoparser
RocdecParserParams parser_params = {};
parser_params.codec_type = codec_id_;
parser_params.max_num_decode_surfaces = 1; // let the parser to determine the decode buffer pool size
parser_params.clock_rate = clk_rate;
parser_params.max_display_delay = disp_delay_;
parser_params.user_data = this;
parser_params.pfn_sequence_callback = HandleVideoSequenceProc;
parser_params.pfn_decode_picture = HandlePictureDecodeProc;
parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc;
parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL;
ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params));
// derived class can skip the following initialization by setting skip_init flag
if (!skip_init) {
if (!InitHIP(device_id_)) {
ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID);
}
// create rocdec videoparser
RocdecParserParams parser_params = {};
parser_params.codec_type = codec_id_;
parser_params.max_num_decode_surfaces = 1; // let the parser to determine the decode buffer pool size
parser_params.clock_rate = clk_rate;
parser_params.max_display_delay = disp_delay_;
parser_params.user_data = this;
parser_params.pfn_sequence_callback = HandleVideoSequenceProc;
parser_params.pfn_decode_picture = HandlePictureDecodeProc;
parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc;
parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL;
ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params));
}
}
@@ -76,7 +80,6 @@ RocVideoDecoder::~RocVideoDecoder() {
delete curr_video_format_ptr_;
curr_video_format_ptr_ = nullptr;
}
std::lock_guard<std::mutex> lock(mtx_vp_frame_);
if (out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) {
for (auto &p_frame : vp_frames_) {
@@ -100,11 +103,11 @@ RocVideoDecoder::~RocVideoDecoder() {
std::cerr << "ERROR: hipStream_Destroy failed! (" << hip_status << ")" << std::endl;
}
}
if (fp_out_) {
fclose(fp_out_);
fp_out_ = nullptr;
}
double elapsed_time = StopTimer(start_time);
AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time);
}
@@ -219,7 +219,8 @@ class RocVideoDecoder {
*/
RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency = false,
const Rect *p_crop_rect = nullptr, bool extract_user_SEI_Message = false, uint32_t disp_delay = 0, int max_width = 0, int max_height = 0,
uint32_t clk_rate = 1000);
uint32_t clk_rate = 1000, bool skip_init = false);
virtual ~RocVideoDecoder();
rocDecVideoCodec GetCodecId() { return codec_id_; }
@@ -291,7 +292,7 @@ class RocVideoDecoder {
* @return true
* @return false
*/
bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info);
virtual bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info);
/**
* @brief Function to set the Reconfig Params object
@@ -486,7 +487,7 @@ class RocVideoDecoder {
bool b_force_recofig_flush_ = false;
int32_t num_frames_flushed_during_reconfig_ = 0;
hipDeviceProp_t hip_dev_prop_;
hipStream_t hip_stream_;
hipStream_t hip_stream_ = nullptr;
rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420;
rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12;
RocdecSeiMessageInfo *curr_sei_message_ptr_ = nullptr;