diff --git a/CHANGELOG.md b/CHANGELOG.md index 21790a0c3e..316f2f95ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ Full documentation for rocDecode is available at [https://rocm.docs.amd.com/projects/rocDecode/en/latest/](https://rocm.docs.amd.com/projects/rocDecode/en/latest/) -## rocDecode 1.2.0 (unreleased) +## rocDecode 1.3.0 (unreleased) ### Added @@ -20,6 +20,7 @@ Full documentation for rocDecode is available at [https://rocm.docs.amd.com/proj * rocdecode now uses the Cmake CMAKE_PREFIX_PATH directive. * rocdecode - A new avcodec-based decoder built as a separate "rocdecode-host" library * The host backend in the `videoDeode` sample now uses the rocdecode-host library +* The host backend using rocdecode-host library is added in `videoDecodePerf` sample ### Optimized diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f335a2cd5..316a337a75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,7 +40,7 @@ if (NOT DEFINED CMAKE_CXX_COMPILER) endif() # rocdecode Version -set(VERSION "1.2.0") +set(VERSION "1.3.0") # Set Project Version and Language project(rocdecode VERSION ${VERSION} LANGUAGES CXX) diff --git a/samples/videoDecodePerf/CMakeLists.txt b/samples/videoDecodePerf/CMakeLists.txt index 1eb89155db..8e0625d736 100644 --- a/samples/videoDecodePerf/CMakeLists.txt +++ b/samples/videoDecodePerf/CMakeLists.txt @@ -65,6 +65,7 @@ find_package(HIP QUIET) find_package(rocdecode QUIET) find_package(rocprofiler-register QUIET) find_package(FFmpeg QUIET) +find_package(Threads QUIET) if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofiler-register_FOUND) # HIP @@ -74,7 +75,7 @@ if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofi ${SWSCALE_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES}) # rocdecode and utils - include_directories (${rocdecode_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode ${CMAKE_CURRENT_SOURCE_DIR}/..) + include_directories (${rocdecode_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/ffmpegvideodecode/ ${CMAKE_CURRENT_SOURCE_DIR}/..) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocdecode::rocdecode) # threads set(THREADS_PREFER_PTHREAD_FLAG ON) @@ -82,10 +83,19 @@ if(HIP_FOUND AND FFMPEG_FOUND AND rocdecode_FOUND AND Threads_FOUND AND rocprofi set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} Threads::Threads) # rocprofiler-register set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocprofiler-register::rocprofiler-register) + # rocdecodehost + find_library(ROCDECODE_HOST_LIBRARY NAMES rocdecodehost HINTS ${ROCM_PATH}/lib) # sample app exe - list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp) + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/ffmpegvideodecode/ffmpeg_video_dec.cpp) add_executable(${PROJECT_NAME} ${SOURCES}) + if(ROCDECODE_HOST_LIBRARY) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_HOST_LIBRARY}) + target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_HOST_DECODE=1) + else() + target_compile_definitions(${PROJECT_NAME} PUBLIC ENABLE_HOST_DECODE=0) + endif() target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) + # FFMPEG multi-version support if(_FFMPEG_AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100) target_compile_definitions(${PROJECT_NAME} PUBLIC USE_AVCODEC_GREATER_THAN_58_134=0) diff --git a/samples/videoDecodePerf/videodecodeperf.cpp b/samples/videoDecodePerf/videodecodeperf.cpp index c449abff33..d789f2ee47 100644 --- a/samples/videoDecodePerf/videodecodeperf.cpp +++ b/samples/videoDecodePerf/videodecodeperf.cpp @@ -35,6 +35,7 @@ THE SOFTWARE. #endif #include "video_demuxer.h" #include "roc_video_dec.h" +#include "ffmpeg_video_dec.h" #include "common.h" void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames, OutputSurfaceMemoryType mem_type) { @@ -98,6 +99,7 @@ int main(int argc, char **argv) { bool b_force_zero_latency = false; uint32_t max_num_frames = 0; // max number of frames to be decoded. default value is 0, meaning decode the entire stream int disp_delay = 1; + int backend = 0; // Parse command-line arguments if(argc <= 1) { @@ -162,6 +164,13 @@ int main(int argc, char **argv) { mem_type = static_cast(atoi(argv[i])); continue; } + if (!strcmp(argv[i], "-backend")) { + if (++i == argc) { + ShowHelpAndExit("-backend"); + } + backend = atoi(argv[i]); + continue; + } ShowHelpAndExit(argv[i]); } @@ -219,7 +228,22 @@ int main(int argc, char **argv) { } else { v_device_id[i] = i % hip_vis_dev_count; } - std::unique_ptr dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay)); + std::unique_ptr dec; + if (!backend) { // gpu backend + dec = std::make_unique(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay); + } else { + #if ENABLE_HOST_DECODE + std::cout << "info: RocDecode is using CPU backend!" << std::endl; + uint32_t max_width = demuxer->GetWidth(); + uint32_t max_height =demuxer->GetHeight(); + mem_type = OUT_SURFACE_MEM_HOST_COPIED; + dec = std::make_unique(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay, max_width, max_height); + #else + std::cout << "Error: RocDecode HOST library is not found and backend is not supported!" << std::endl; + return 0; + #endif + } + if (!dec->CodecSupported(v_device_id[i], rocdec_codec_id, demuxer->GetBitDepth())) { std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl; continue; @@ -246,7 +270,7 @@ int main(int argc, char **argv) { for (int i = 0; i < n_thread; i++) { v_viddec[i]->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); - std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " << + if (!backend) std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " << std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) << std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl; std::cout << "info: decoding started for thread " << i << " ,please wait!" << std::endl; diff --git a/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp b/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp index 7ca7834cde..daae59875e 100644 --- a/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp +++ b/utils/ffmpegvideodecode/ffmpeg_video_dec.cpp @@ -45,11 +45,16 @@ static inline float GetChromaWidthFactor(rocDecVideoSurfaceFormat surface_format FFMpegVideoDecoder::FFMpegVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency, const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) : - RocVideoDecoder(device_id, out_mem_type, codec, force_zero_latency, p_crop_rect, extract_user_sei_Message, disp_delay, max_width, max_height, clk_rate) { + RocVideoDecoder(device_id, out_mem_type, codec, force_zero_latency, p_crop_rect, extract_user_sei_Message, disp_delay, max_width, max_height, clk_rate, true) { if ((out_mem_type_ == OUT_SURFACE_MEM_DEV_INTERNAL) || (out_mem_type_ == OUT_SURFACE_MEM_NOT_MAPPED)) { ROCDEC_THROW("Unsupported output memory type", ROCDEC_INVALID_PARAMETER); } + if (out_mem_type_ == OUT_SURFACE_MEM_DEV_COPIED) { + if (!InitHIP(device_id_)) { + ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID); + } + } // many of the decoder parameters are hardcoded below for just creating the decoder. // In the handlevideosequence callback, the decoder will get reconfigured to the actual parameters in the sequence header RocDecoderHostCreateInfo create_info = {}; @@ -74,7 +79,19 @@ FFMpegVideoDecoder::FFMpegVideoDecoder(int device_id, OutputSurfaceMemoryType ou create_info.pfn_display_picture = FFMpegHandlePictureDisplayProc; create_info.pfn_get_sei_msg = nullptr; // tobe supported in future ROCDEC_API_CALL(rocDecCreateDecoderHost(&roc_decoder_, &create_info)); - + // set disp_width and height to non_zero values for it doesn't trigger decoding error before actual start of decoding + disp_width_ = max_width; + disp_height_ = max_height; + // fill output_surface_info_ + output_surface_info_.output_width = max_width; + output_surface_info_.output_height = max_height; + output_surface_info_.output_pitch = max_width * 2; // bytes_per_pixel 2 + output_surface_info_.output_vstride = max_height; + output_surface_info_.bit_depth = bitdepth_minus_8_ + 8; + output_surface_info_.bytes_per_pixel = 2; + output_surface_info_.surface_format = rocDecVideoSurfaceFormat_P016; + output_surface_info_.num_chroma_planes = 2; + output_surface_info_.mem_type = OUT_SURFACE_MEM_HOST_COPIED; } @@ -201,6 +218,15 @@ int FFMpegVideoDecoder::HandleVideoSequence(RocdecVideoFormatHost *format_host) return num_decode_surfaces; } +bool FFMpegVideoDecoder::GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info) { + if (!disp_width_ || !disp_height_) { + std::cerr << "ERROR: FFMpegVideo is not intialized" << std::endl; + return false; + } + *surface_info = &output_surface_info_; + return true; +} + /** * @brief function to reconfigure decoder if there is a change in sequence params. * diff --git a/utils/ffmpegvideodecode/ffmpeg_video_dec.h b/utils/ffmpegvideodecode/ffmpeg_video_dec.h index fd567c0ac8..369274c202 100644 --- a/utils/ffmpegvideodecode/ffmpeg_video_dec.h +++ b/utils/ffmpegvideodecode/ffmpeg_video_dec.h @@ -72,13 +72,22 @@ class FFMpegVideoDecoder: public RocVideoDecoder { * @param num_decoded_pics - nummber of pictures decoded in this call * @return int - num of frames to display */ - int DecodeFrame(const uint8_t *data, size_t size, int pkt_flags, int64_t pts = 0, int *num_decoded_pics = nullptr); + int DecodeFrame(const uint8_t *data, size_t size, int pkt_flags, int64_t pts = 0, int *num_decoded_pics = nullptr) override; + + /** + * @brief Get the pointer to the Output Image Info + * + * @param surface_info ptr to output surface info + * @return true + * @return false + */ + bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info) override; /** * @brief This function returns a decoded frame and timestamp. This should be called in a loop fetching all the available frames * */ - uint8_t* GetFrame(int64_t *pts); + uint8_t* GetFrame(int64_t *pts) override; /** * @brief function to release frame after use by the application: Only used with "OUT_SURFACE_MEM_DEV_INTERNAL" @@ -88,7 +97,7 @@ class FFMpegVideoDecoder: public RocVideoDecoder { * @return true - success * @return false - falied */ - bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false); + bool ReleaseFrame(int64_t pTimestamp, bool b_flushing = false) override; /** * @brief Helper function to dump decoded output surface to file @@ -98,12 +107,17 @@ class FFMpegVideoDecoder: public RocVideoDecoder { * @param surf_info - surface info * @param rgb_image_size - image size for rgb (optional). A non_zero value indicates the surf_mem holds an rgb interleaved image and the entire size will be dumped to file */ - void SaveFrameToFile(std::string output_file_name, void *surf_mem, OutputSurfaceInfo *surf_info, size_t rgb_image_size = 0); + void SaveFrameToFile(std::string output_file_name, void *surf_mem, OutputSurfaceInfo *surf_info, size_t rgb_image_size = 0) override; /** * @brief This function is used to get the current frame size based on pixel format. */ - virtual int GetFrameSize() {CHECK_ZERO("Display width", disp_width_); return ((disp_width_ * disp_height_) + ((chroma_height_ * chroma_width_) * num_chroma_planes_)) * byte_per_pixel_; } + virtual int GetFrameSize() override {CHECK_ZERO("Display width", disp_width_); return ((disp_width_ * disp_height_) + ((chroma_height_ * chroma_width_) * num_chroma_planes_)) * byte_per_pixel_; } + + /** + * @brief This function reconfigure decoder if there is a change in sequence params. + */ + int ReconfigureDecoder(RocdecVideoFormat *p_video_format) override; private: /** @@ -138,9 +152,4 @@ class FFMpegVideoDecoder: public RocVideoDecoder { */ int GetSEIMessage(RocdecSeiMessageInfo *p_sei_message_info) { return RocVideoDecoder::GetSEIMessage(p_sei_message_info);}; - /** - * @brief This function reconfigure decoder if there is a change in sequence params. - */ - int ReconfigureDecoder(RocdecVideoFormat *p_video_format); - }; diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index f34165f4fd..0c4ffc39e5 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -23,30 +23,34 @@ THE SOFTWARE. #include "roc_video_dec.h" RocVideoDecoder::RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency, - const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) : + const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate, bool skip_init) : device_id_{device_id}, out_mem_type_(out_mem_type), codec_id_(codec), b_force_zero_latency_(force_zero_latency), b_extract_sei_message_(extract_user_sei_Message), disp_delay_(disp_delay), max_width_ (max_width), max_height_(max_height) { - if (!InitHIP(device_id_)) { - ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID); - } + if (p_crop_rect) crop_rect_ = *p_crop_rect; if (b_extract_sei_message_) { fp_sei_ = fopen("rocdec_sei_message.txt", "wb"); curr_sei_message_ptr_ = new RocdecSeiMessageInfo; memset(&sei_message_display_q_, 0, sizeof(sei_message_display_q_)); } - // create rocdec videoparser - RocdecParserParams parser_params = {}; - parser_params.codec_type = codec_id_; - parser_params.max_num_decode_surfaces = 1; // let the parser to determine the decode buffer pool size - parser_params.clock_rate = clk_rate; - parser_params.max_display_delay = disp_delay_; - parser_params.user_data = this; - parser_params.pfn_sequence_callback = HandleVideoSequenceProc; - parser_params.pfn_decode_picture = HandlePictureDecodeProc; - parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc; - parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL; - ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params)); + // derived class can skip the following initialization by setting skip_init flag + if (!skip_init) { + if (!InitHIP(device_id_)) { + ROCDEC_THROW("Failed to initilize the HIP", ROCDEC_DEVICE_INVALID); + } + // create rocdec videoparser + RocdecParserParams parser_params = {}; + parser_params.codec_type = codec_id_; + parser_params.max_num_decode_surfaces = 1; // let the parser to determine the decode buffer pool size + parser_params.clock_rate = clk_rate; + parser_params.max_display_delay = disp_delay_; + parser_params.user_data = this; + parser_params.pfn_sequence_callback = HandleVideoSequenceProc; + parser_params.pfn_decode_picture = HandlePictureDecodeProc; + parser_params.pfn_display_picture = b_force_zero_latency_ ? NULL : HandlePictureDisplayProc; + parser_params.pfn_get_sei_msg = b_extract_sei_message_ ? HandleSEIMessagesProc : NULL; + ROCDEC_API_CALL(rocDecCreateVideoParser(&rocdec_parser_, &parser_params)); + } } @@ -76,7 +80,6 @@ RocVideoDecoder::~RocVideoDecoder() { delete curr_video_format_ptr_; curr_video_format_ptr_ = nullptr; } - std::lock_guard lock(mtx_vp_frame_); if (out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) { for (auto &p_frame : vp_frames_) { @@ -100,11 +103,11 @@ RocVideoDecoder::~RocVideoDecoder() { std::cerr << "ERROR: hipStream_Destroy failed! (" << hip_status << ")" << std::endl; } } + if (fp_out_) { fclose(fp_out_); fp_out_ = nullptr; } - double elapsed_time = StopTimer(start_time); AddDecoderSessionOverHead(std::this_thread::get_id(), elapsed_time); } diff --git a/utils/rocvideodecode/roc_video_dec.h b/utils/rocvideodecode/roc_video_dec.h index 69359318aa..4c1b5332d4 100644 --- a/utils/rocvideodecode/roc_video_dec.h +++ b/utils/rocvideodecode/roc_video_dec.h @@ -219,7 +219,8 @@ class RocVideoDecoder { */ RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_type, rocDecVideoCodec codec, bool force_zero_latency = false, const Rect *p_crop_rect = nullptr, bool extract_user_SEI_Message = false, uint32_t disp_delay = 0, int max_width = 0, int max_height = 0, - uint32_t clk_rate = 1000); + uint32_t clk_rate = 1000, bool skip_init = false); + virtual ~RocVideoDecoder(); rocDecVideoCodec GetCodecId() { return codec_id_; } @@ -291,7 +292,7 @@ class RocVideoDecoder { * @return true * @return false */ - bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info); + virtual bool GetOutputSurfaceInfo(OutputSurfaceInfo **surface_info); /** * @brief Function to set the Reconfig Params object @@ -486,7 +487,7 @@ class RocVideoDecoder { bool b_force_recofig_flush_ = false; int32_t num_frames_flushed_during_reconfig_ = 0; hipDeviceProp_t hip_dev_prop_; - hipStream_t hip_stream_; + hipStream_t hip_stream_ = nullptr; rocDecVideoChromaFormat video_chroma_format_ = rocDecVideoChromaFormat_420; rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12; RocdecSeiMessageInfo *curr_sei_message_ptr_ = nullptr;