From 5bac02fab38b66b113a256a247cecbddbb71b285 Mon Sep 17 00:00:00 2001 From: Lakshmi Kumar Date: Fri, 10 Nov 2023 08:23:44 -0800 Subject: [PATCH] Add a sample for measuring the video decoding performance using multi-threads (#50) * introducing perf example * chnages for google standards * adding exception handling * review comments * change logic to find vcn devices * adding comments --- samples/videoDecodePerf/CMakeLists.txt | 81 ++++++++ samples/videoDecodePerf/README.md | 37 ++++ samples/videoDecodePerf/videodecodeperf.cpp | 212 ++++++++++++++++++++ 3 files changed, 330 insertions(+) create mode 100644 samples/videoDecodePerf/CMakeLists.txt create mode 100644 samples/videoDecodePerf/README.md create mode 100644 samples/videoDecodePerf/videodecodeperf.cpp diff --git a/samples/videoDecodePerf/CMakeLists.txt b/samples/videoDecodePerf/CMakeLists.txt new file mode 100644 index 0000000000..b76444b095 --- /dev/null +++ b/samples/videoDecodePerf/CMakeLists.txt @@ -0,0 +1,81 @@ +################################################################################ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +################################################################################ + +cmake_minimum_required (VERSION 3.5) +project(videodecodeperf) +set(CMAKE_CXX_STANDARD 17) + +# ROCM Path +if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path") +elseif(ROCM_PATH) + message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}") +else() + set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path") +endif() + +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake) +list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) +set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++) + +set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1031;gfx1032;gfx1100") +set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + +find_package(HIP QUIET) +find_package(FFmpeg QUIET) +# find rocDecode +find_library(ROCDECODE_LIBRARY NAMES rocdecode HINTS {ROCM_PATH}/lib) +find_path(ROCDECODE_INCLUDE_DIR NAMES rocdecode.h PATHS /opt/rocm/include/rocdecode {ROCM_PATH}/include/rocdecode) + +if(ROCDECODE_LIBRARY AND ROCDECODE_INCLUDE_DIR) + set(ROCDECODE_FOUND TRUE) + message("-- ${White}Using rocDecode -- \n\tLibraries:${ROCDECODE_LIBRARY} \n\tIncludes:${ROCDECODE_INCLUDE_DIR}${ColourReset}") +endif() + +if(HIP_FOUND AND FFMPEG_FOUND AND ROCDECODE_FOUND) + # HIP + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device) + # FFMPEG + include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR} + ${SWSCALE_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES}) + # rocDecode + include_directories (${ROCDECODE_INCLUDE_DIR}) + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY}) + + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp) + add_executable(${PROJECT_NAME} ${SOURCES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") + target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) +else() + message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!") + if (NOT HIP_FOUND) + message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!") + endif() + if (NOT FFMPEG_FOUND) + message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!") + endif() + if (NOT ROCDECODE_FOUND) + message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!") + endif() +endif() \ No newline at end of file diff --git a/samples/videoDecodePerf/README.md b/samples/videoDecodePerf/README.md new file mode 100644 index 0000000000..7b65a956ce --- /dev/null +++ b/samples/videoDecodePerf/README.md @@ -0,0 +1,37 @@ +# Video Decode Sample +This sample illustrates the FFMPEG demuxer to get the individual frames which are then decoded on AMD hardware using VAAPI. + +This sample supports both YUV420 8-bit and 10-bit streams. + +This sample uses multiple threads to decode the same input video parallely. + +## Prerequisites: + +* Linux distribution + + Ubuntu - `20.04` / `22.04` + +* [ROCm supported hardware](https://rocm.docs.amd.com/en/latest/release/gpu_os_support.html) + +* Install [ROCm 5.5 or later](https://rocmdocs.amd.com/en/latest/deploy/linux/installer/install.html) with `--usecase=graphics,rocm --no-32` + +* rocDecode + +* CMake `3.5` or later + +* [FFMPEG](https://ffmpeg.org/about.html) + ``` + sudo apt install ffmpeg libavcodec-dev libavformat-dev libswscale-dev + ``` + +## Build +``` +mkdir build +cd build +cmake ../ +make -j +``` +# Run +``` +./videodecodeperf -i + -t +``` \ No newline at end of file diff --git a/samples/videoDecodePerf/videodecodeperf.cpp b/samples/videoDecodePerf/videodecodeperf.cpp new file mode 100644 index 0000000000..e290cf3cbe --- /dev/null +++ b/samples/videoDecodePerf/videodecodeperf.cpp @@ -0,0 +1,212 @@ +/* +Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include +#else + #include +#endif +#include "video_demuxer.hpp" +#include "roc_video_dec.h" + +void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps) { + int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0; + uint8_t *p_video = nullptr; + int64_t pts = 0; + double total_dec_time = 0.0; + auto start_time = std::chrono::high_resolution_clock::now(); + + do { + demuxer->Demux(&p_video, &n_video_bytes, &pts); + n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts); + n_frame += n_frame_returned; + } while (n_video_bytes); + + auto end_time = std::chrono::high_resolution_clock::now(); + auto time_per_frame = std::chrono::duration(end_time - start_time).count(); + + // Calculate average decoding time + total_dec_time = time_per_frame; + double average_decoding_time = total_dec_time / n_frame; + double n_fps = 1000 / average_decoding_time; + *pn_fps = n_fps; +} + +void ShowHelpAndExit(const char *option = NULL) { + std::cout << "Options:" << std::endl + << "-i Input File Path - required" << std::endl + << "-t Number of threads (>= 1) - optional; default: 4" << std::endl + << "-d Device ID (>= 0) - optional; default: 0" << std::endl; + exit(0); +} + +int main(int argc, char **argv) { + + std::string input_file_path; + int device_id = 0; + int n_thread = 4; + Rect *p_crop_rect = nullptr; + OUTPUT_SURF_MEMORY_TYPE mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal + // Parse command-line arguments + if(argc < 1) { + ShowHelpAndExit(); + } + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-h")) { + ShowHelpAndExit(); + } + if (!strcmp(argv[i], "-i")) { + if (++i == argc) { + ShowHelpAndExit("-i"); + } + input_file_path = argv[i]; + continue; + } + if (!strcmp(argv[i], "-t")) { + if (++i == argc) { + ShowHelpAndExit("-t"); + } + n_thread = atoi(argv[i]); + if (n_thread <= 0) { + ShowHelpAndExit(argv[i]); + } + continue; + } + if (!strcmp(argv[i], "-d")) { + if (++i == argc) { + ShowHelpAndExit("-d"); + } + device_id = atoi(argv[i]); + if (device_id < 0) { + ShowHelpAndExit(argv[i]); + } + continue; + } + ShowHelpAndExit(argv[i]); + } + + try { + // TODO: Change this block to use VCN query API + int num_devices = 0, sd = 0; + hipError_t hip_status = hipSuccess; + hipDeviceProp_t hip_dev_prop; + std::string gcn_arch_name; + hip_status = hipGetDeviceCount(&num_devices); + if (hip_status != hipSuccess) { + std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl; + return -1; + } + + if (num_devices < 1) { + ERR("ERROR: didn't find any GPU!"); + return -1; + } + if (device_id >= num_devices) { + ERR("ERROR: the requested device_id is not found! "); + return -1; + } + + hip_status = hipGetDeviceProperties(&hip_dev_prop, device_id); + if (hip_status != hipSuccess) { + ERR("ERROR: hipGetDeviceProperties for device (" +TOSTR(device_id) + " ) failed! (" + TOSTR(hip_status) + ")" ); + return -1; + } + + gcn_arch_name = hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + + // gfx90a has two GCDs as two separate devices + if (!gcn_arch_name_base.compare("gfx90a")) { + sd = 1; + } + + std::vector> v_demuxer; + std::vector> v_viddec; + std::vector v_device_id(n_thread); + + for (int i = 0; i < n_thread; i++) { + std::unique_ptr demuxer(new VideoDemuxer(input_file_path.c_str())); + rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer->GetCodecID()); + v_device_id[i] = (i % 2 == 0) ? 0 : sd; + std::unique_ptr dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, false, true, p_crop_rect)); + v_demuxer.push_back(std::move(demuxer)); + v_viddec.push_back(std::move(dec)); + } + + float total_fps = 0; + std::vector v_thread; + std::vector v_fps; + std::vector v_frame; + v_fps.resize(n_thread, 0); + v_frame.resize(n_thread, 0); + int n_total = 0; + OutputSurfaceInfo *p_surf_info; + + std::string device_name; + int pci_bus_id, pci_domain_id, pci_device_id; + + for (int i = 0; i < n_thread; i++) { + v_viddec[i]->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id); + std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " << + std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) << + std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl; + std::cout << "info: decoding started for thread " << i << " ,please wait!" << std::endl; + } + + for (int i = 0; i < n_thread; i++) { + v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i])); + } + + for (int i = 0; i < n_thread; i++) { + v_thread[i].join(); + total_fps += v_fps[i]; + n_total += v_frame[i]; + } + + if (!v_viddec[0]->GetOutputSurfaceInfo(&p_surf_info)) { + std::cerr << "Error: Failed to get Output Surface Info!" << std::endl; + return -1; + } + + std::cout << "info: Video codec format: " << v_viddec[0]->GetCodecFmtName(v_viddec[0]->GetCodecId()) << std::endl; + std::cout << "info: Video size: [ " << p_surf_info->output_width << ", " << p_surf_info->output_height << " ]" << std::endl; + std::cout << "info: Video surface format: " << v_viddec[0]->GetSurfaceFmtName(p_surf_info->surface_format) << std::endl; + std::cout << "info: Video Bit depth: " << p_surf_info->bit_depth << std::endl; + std::cout << "info: Total frame decoded: " << n_total << std::endl; + std::cout << "info: avg decoding time per frame (ms): " << 1000 / total_fps << std::endl; + std::cout << "info: avg FPS: " << total_fps << std::endl; + } catch (const std::exception &ex) { + std::cout << ex.what() << std::endl; + exit(1); + } + + return 0; +}