Add a sample for measuring the video decoding performance using multi-threads (#50)
* introducing perf example * chnages for google standards * adding exception handling * review comments * change logic to find vcn devices * adding comments
This commit is contained in:
@@ -0,0 +1,81 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
project(videodecodeperf)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
|
||||
|
||||
set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908;gfx90a;gfx940;gfx1030;gfx1031;gfx1032;gfx1100")
|
||||
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")
|
||||
|
||||
find_package(HIP QUIET)
|
||||
find_package(FFmpeg QUIET)
|
||||
# find rocDecode
|
||||
find_library(ROCDECODE_LIBRARY NAMES rocdecode HINTS {ROCM_PATH}/lib)
|
||||
find_path(ROCDECODE_INCLUDE_DIR NAMES rocdecode.h PATHS /opt/rocm/include/rocdecode {ROCM_PATH}/include/rocdecode)
|
||||
|
||||
if(ROCDECODE_LIBRARY AND ROCDECODE_INCLUDE_DIR)
|
||||
set(ROCDECODE_FOUND TRUE)
|
||||
message("-- ${White}Using rocDecode -- \n\tLibraries:${ROCDECODE_LIBRARY} \n\tIncludes:${ROCDECODE_INCLUDE_DIR}${ColourReset}")
|
||||
endif()
|
||||
|
||||
if(HIP_FOUND AND FFMPEG_FOUND AND ROCDECODE_FOUND)
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::device)
|
||||
# FFMPEG
|
||||
include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR}
|
||||
${SWSCALE_INCLUDE_DIR} ${AVFORMAT_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES})
|
||||
# rocDecode
|
||||
include_directories (${ROCDECODE_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY})
|
||||
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videodecodeperf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
else()
|
||||
message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!")
|
||||
if (NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
|
||||
endif()
|
||||
if (NOT FFMPEG_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!")
|
||||
endif()
|
||||
if (NOT ROCDECODE_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,37 @@
|
||||
# Video Decode Sample
|
||||
This sample illustrates the FFMPEG demuxer to get the individual frames which are then decoded on AMD hardware using VAAPI.
|
||||
|
||||
This sample supports both YUV420 8-bit and 10-bit streams.
|
||||
|
||||
This sample uses multiple threads to decode the same input video parallely.
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
* Linux distribution
|
||||
+ Ubuntu - `20.04` / `22.04`
|
||||
|
||||
* [ROCm supported hardware](https://rocm.docs.amd.com/en/latest/release/gpu_os_support.html)
|
||||
|
||||
* Install [ROCm 5.5 or later](https://rocmdocs.amd.com/en/latest/deploy/linux/installer/install.html) with `--usecase=graphics,rocm --no-32`
|
||||
|
||||
* rocDecode
|
||||
|
||||
* CMake `3.5` or later
|
||||
|
||||
* [FFMPEG](https://ffmpeg.org/about.html)
|
||||
```
|
||||
sudo apt install ffmpeg libavcodec-dev libavformat-dev libswscale-dev
|
||||
```
|
||||
|
||||
## Build
|
||||
```
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ../
|
||||
make -j
|
||||
```
|
||||
# Run
|
||||
```
|
||||
./videodecodeperf -i <input video file [required]>
|
||||
-t <number of threads [optional - default:4]>
|
||||
```
|
||||
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
Copyright (c) 2023 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <sys/stat.h>
|
||||
#include <libgen.h>
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
#else
|
||||
#include <experimental/filesystem>
|
||||
#endif
|
||||
#include "video_demuxer.hpp"
|
||||
#include "roc_video_dec.h"
|
||||
|
||||
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps) {
|
||||
int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0;
|
||||
uint8_t *p_video = nullptr;
|
||||
int64_t pts = 0;
|
||||
double total_dec_time = 0.0;
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
do {
|
||||
demuxer->Demux(&p_video, &n_video_bytes, &pts);
|
||||
n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts);
|
||||
n_frame += n_frame_returned;
|
||||
} while (n_video_bytes);
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto time_per_frame = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
|
||||
// Calculate average decoding time
|
||||
total_dec_time = time_per_frame;
|
||||
double average_decoding_time = total_dec_time / n_frame;
|
||||
double n_fps = 1000 / average_decoding_time;
|
||||
*pn_fps = n_fps;
|
||||
}
|
||||
|
||||
void ShowHelpAndExit(const char *option = NULL) {
|
||||
std::cout << "Options:" << std::endl
|
||||
<< "-i Input File Path - required" << std::endl
|
||||
<< "-t Number of threads (>= 1) - optional; default: 4" << std::endl
|
||||
<< "-d Device ID (>= 0) - optional; default: 0" << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string input_file_path;
|
||||
int device_id = 0;
|
||||
int n_thread = 4;
|
||||
Rect *p_crop_rect = nullptr;
|
||||
OUTPUT_SURF_MEMORY_TYPE mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal
|
||||
// Parse command-line arguments
|
||||
if(argc < 1) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-h")) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
if (!strcmp(argv[i], "-i")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-i");
|
||||
}
|
||||
input_file_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-t")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-t");
|
||||
}
|
||||
n_thread = atoi(argv[i]);
|
||||
if (n_thread <= 0) {
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-d")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-d");
|
||||
}
|
||||
device_id = atoi(argv[i]);
|
||||
if (device_id < 0) {
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
|
||||
try {
|
||||
// TODO: Change this block to use VCN query API
|
||||
int num_devices = 0, sd = 0;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hipDeviceProp_t hip_dev_prop;
|
||||
std::string gcn_arch_name;
|
||||
hip_status = hipGetDeviceCount(&num_devices);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (num_devices < 1) {
|
||||
ERR("ERROR: didn't find any GPU!");
|
||||
return -1;
|
||||
}
|
||||
if (device_id >= num_devices) {
|
||||
ERR("ERROR: the requested device_id is not found! ");
|
||||
return -1;
|
||||
}
|
||||
|
||||
hip_status = hipGetDeviceProperties(&hip_dev_prop, device_id);
|
||||
if (hip_status != hipSuccess) {
|
||||
ERR("ERROR: hipGetDeviceProperties for device (" +TOSTR(device_id) + " ) failed! (" + TOSTR(hip_status) + ")" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
gcn_arch_name = hip_dev_prop.gcnArchName;
|
||||
std::size_t pos = gcn_arch_name.find_first_of(":");
|
||||
std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name;
|
||||
|
||||
// gfx90a has two GCDs as two separate devices
|
||||
if (!gcn_arch_name_base.compare("gfx90a")) {
|
||||
sd = 1;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<VideoDemuxer>> v_demuxer;
|
||||
std::vector<std::unique_ptr<RocVideoDecoder>> v_viddec;
|
||||
std::vector<int> v_device_id(n_thread);
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
std::unique_ptr<VideoDemuxer> demuxer(new VideoDemuxer(input_file_path.c_str()));
|
||||
rocDecVideoCodec rocdec_codec_id = AVCodec2RocDecVideoCodec(demuxer->GetCodecID());
|
||||
v_device_id[i] = (i % 2 == 0) ? 0 : sd;
|
||||
std::unique_ptr<RocVideoDecoder> dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, false, true, p_crop_rect));
|
||||
v_demuxer.push_back(std::move(demuxer));
|
||||
v_viddec.push_back(std::move(dec));
|
||||
}
|
||||
|
||||
float total_fps = 0;
|
||||
std::vector<std::thread> v_thread;
|
||||
std::vector<double> v_fps;
|
||||
std::vector<int> v_frame;
|
||||
v_fps.resize(n_thread, 0);
|
||||
v_frame.resize(n_thread, 0);
|
||||
int n_total = 0;
|
||||
OutputSurfaceInfo *p_surf_info;
|
||||
|
||||
std::string device_name;
|
||||
int pci_bus_id, pci_domain_id, pci_device_id;
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
v_viddec[i]->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id);
|
||||
std::cout << "info: stream " << i << " using GPU device " << v_device_id[i] << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
|
||||
std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) <<
|
||||
std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl;
|
||||
std::cout << "info: decoding started for thread " << i << " ,please wait!" << std::endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i]));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
v_thread[i].join();
|
||||
total_fps += v_fps[i];
|
||||
n_total += v_frame[i];
|
||||
}
|
||||
|
||||
if (!v_viddec[0]->GetOutputSurfaceInfo(&p_surf_info)) {
|
||||
std::cerr << "Error: Failed to get Output Surface Info!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::cout << "info: Video codec format: " << v_viddec[0]->GetCodecFmtName(v_viddec[0]->GetCodecId()) << std::endl;
|
||||
std::cout << "info: Video size: [ " << p_surf_info->output_width << ", " << p_surf_info->output_height << " ]" << std::endl;
|
||||
std::cout << "info: Video surface format: " << v_viddec[0]->GetSurfaceFmtName(p_surf_info->surface_format) << std::endl;
|
||||
std::cout << "info: Video Bit depth: " << p_surf_info->bit_depth << std::endl;
|
||||
std::cout << "info: Total frame decoded: " << n_total << std::endl;
|
||||
std::cout << "info: avg decoding time per frame (ms): " << 1000 / total_fps << std::endl;
|
||||
std::cout << "info: avg FPS: " << total_fps << std::endl;
|
||||
} catch (const std::exception &ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
مرجع در شماره جدید
Block a user