VideoToSequence sample (#373)
* videoToSequence sample initial commit * add frame_num calculation when seeking * update README * addressed review comments * added FlushAndReconfigure() to flush out frames with seek
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
ac82d17e9f
Коммит
fe5f63428c
@@ -0,0 +1,101 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2024 - 2024 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required (VERSION 3.5)
|
||||
project(videotosequence)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# ROCM Path
|
||||
if(DEFINED ENV{ROCM_PATH})
|
||||
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
|
||||
elseif(ROCM_PATH)
|
||||
message("-- ${White}${PROJECT_NAME} :ROCM_PATH Set -- ${ROCM_PATH}${ColourReset}")
|
||||
else()
|
||||
set(ROCM_PATH /opt/rocm CACHE PATH "${White}${PROJECT_NAME}: Default ROCm installation path${ColourReset}")
|
||||
endif()
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/llvm/bin/clang++)
|
||||
|
||||
# rocDecode sample build type
|
||||
set(DEFAULT_BUILD_TYPE "Release")
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE STRING "rocDecode Default Build Type" FORCE)
|
||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
|
||||
endif()
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
# -O0 -- Don't Optimize output file
|
||||
# -gdwarf-4 -- generate debugging information, dwarf-4 for making valgrind work
|
||||
# -Og -- Optimize for debugging experience rather than speed or size
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -gdwarf-4 -Og")
|
||||
else()
|
||||
# -O3 -- Optimize output file
|
||||
# -DNDEBUG -- turn off asserts
|
||||
# -fPIC -- Generate position-independent code if possible
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DNDEBUG -fPIC")
|
||||
endif()
|
||||
|
||||
find_package(HIP QUIET)
|
||||
find_package(FFmpeg QUIET)
|
||||
# find rocDecode
|
||||
find_library(ROCDECODE_LIBRARY NAMES rocdecode HINTS ${ROCM_PATH}/lib)
|
||||
find_path(ROCDECODE_INCLUDE_DIR NAMES rocdecode.h PATHS /opt/rocm/include/rocdecode ${ROCM_PATH}/include/rocdecode)
|
||||
if(ROCDECODE_LIBRARY AND ROCDECODE_INCLUDE_DIR)
|
||||
set(ROCDECODE_FOUND TRUE)
|
||||
message("-- ${White}${PROJECT_NAME}: Using rocDecode -- \n\tLibraries:${ROCDECODE_LIBRARY} \n\tIncludes:${ROCDECODE_INCLUDE_DIR}${ColourReset}")
|
||||
endif()
|
||||
|
||||
if(HIP_FOUND AND FFMPEG_FOUND AND ROCDECODE_FOUND)
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::host)
|
||||
# FFMPEG
|
||||
include_directories(${AVUTIL_INCLUDE_DIR} ${AVCODEC_INCLUDE_DIR}
|
||||
${AVFORMAT_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${FFMPEG_LIBRARIES})
|
||||
# rocDecode and utils
|
||||
include_directories (${ROCDECODE_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../../utils ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode)
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCDECODE_LIBRARY})
|
||||
# sample app exe
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} videotosequence.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/rocvideodecode/roc_video_dec.cpp)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
# FFMPEG multi-version support
|
||||
if(_FFMPEG_AVCODEC_VERSION VERSION_LESS_EQUAL 58.134.100)
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC USE_AVCODEC_GREATER_THAN_58_134=0)
|
||||
else()
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC USE_AVCODEC_GREATER_THAN_58_134=1)
|
||||
endif()
|
||||
else()
|
||||
message("-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!")
|
||||
if (NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
|
||||
endif()
|
||||
if (NOT FFMPEG_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!")
|
||||
endif()
|
||||
if (NOT ROCDECODE_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,41 @@
|
||||
# Video decode sample
|
||||
|
||||
The VideoToSequence sample illustrates decoding a single packetized video stream using FFMPEG demuxer and splitting it into multiple video sequences. This uses seek functionality to seek to a random position and extract a batch of video sequences in YUV format with step and stride. This sample can be configured with a device ID and optionally able to dump the output to a file. This sample uses the high-level RocVideoDecoder class which connects both the video parser and Rocdecoder. This process repeats until a batch of sequences are extracted or EOS is reached.
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
* Install [rocDecode](../../README.md#build-and-install-instructions)
|
||||
|
||||
* [FFMPEG](https://ffmpeg.org/about.html)
|
||||
|
||||
* On `Ubuntu`
|
||||
|
||||
```shell
|
||||
sudo apt install ffmpeg libavcodec-dev libavformat-dev libavutil-dev
|
||||
```
|
||||
|
||||
* On `RHEL`/`SLES` - install ffmpeg development packages manually or use [rocDecode-setup.py](../../rocDecode-setup.py) script
|
||||
|
||||
## Build
|
||||
|
||||
```shell
|
||||
mkdir video_decode_sample && cd video_decode_sample
|
||||
cmake ../
|
||||
make -j
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```shell
|
||||
./videotosequence -i <Input file/folder Path [required]>
|
||||
-o <Output folder to dump sequences - dumps output if requested [optional]>
|
||||
-d <GPU device ID - 0:device 0 / 1:device 1/ ... [optional - default:0]>
|
||||
-b <batch_size - specify the number of sequences to be decoded [optional - default:1]>
|
||||
-step <frame interval between each sequence [optional - default:1]>
|
||||
-stride <distance between consective frames in a sequence [optional - default:1]>
|
||||
-l <Number of frames in each sequence [optional - default:1]>
|
||||
-crop <crop rectangle for output (not used when using interopped decoded frame) [optional - default:1]>
|
||||
-seek_mode <option for seeking (0: no seek 1: seek to prev key frame) [optional - default: 0]>
|
||||
-crop <crop rectangle for output (not used when using interopped decoded frame) [optional - default: 0,0,0,0]>
|
||||
-m <output_surface_memory_type - decoded surface memory [optional - default: 0][0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : OUT_SURFACE_MEM_HOST_COPIED/3 : OUT_SURFACE_MEM_NOT_MAPPED]>
|
||||
```
|
||||
@@ -0,0 +1,504 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <queue>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
#include <functional>
|
||||
#include <sys/stat.h>
|
||||
#include <libgen.h>
|
||||
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
#else
|
||||
#include <experimental/filesystem>
|
||||
#endif
|
||||
#include "video_demuxer.h"
|
||||
#include "roc_video_dec.h"
|
||||
#include "common.h"
|
||||
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool(int nthreads) : shutdown_(false) {
|
||||
// Create the specified number of threads
|
||||
threads_.reserve(nthreads);
|
||||
for (int i = 0; i < nthreads; ++i)
|
||||
threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i));
|
||||
}
|
||||
|
||||
~ThreadPool() {}
|
||||
|
||||
void JoinThreads() {
|
||||
{
|
||||
// Unblock any threads and tell them to stop
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
shutdown_ = true;
|
||||
cond_var_.notify_all();
|
||||
}
|
||||
|
||||
// Wait for all threads to stop
|
||||
for (auto& thread : threads_)
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ExecuteJob(std::function<void()> func) {
|
||||
// Place a job on the queue and unblock a thread
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
decode_jobs_queue_.emplace(std::move(func));
|
||||
cond_var_.notify_one();
|
||||
}
|
||||
|
||||
protected:
|
||||
void ThreadEntry(int i) {
|
||||
std::function<void()> execute_decode_job;
|
||||
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cond_var_.wait(lock, [&] {return shutdown_ || !decode_jobs_queue_.empty();});
|
||||
if (decode_jobs_queue_.empty()) {
|
||||
// No jobs to do; shutting down
|
||||
return;
|
||||
}
|
||||
|
||||
execute_decode_job = std::move(decode_jobs_queue_.front());
|
||||
decode_jobs_queue_.pop();
|
||||
}
|
||||
|
||||
// Execute the decode job without holding any locks
|
||||
execute_decode_job();
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cond_var_;
|
||||
bool shutdown_;
|
||||
std::queue<std::function<void()>> decode_jobs_queue_;
|
||||
std::vector<std::thread> threads_;
|
||||
};
|
||||
|
||||
struct DecoderInfo {
|
||||
int dec_device_id;
|
||||
std::unique_ptr<RocVideoDecoder> viddec;
|
||||
std::uint32_t bit_depth;
|
||||
rocDecVideoCodec rocdec_codec_id;
|
||||
std::atomic_bool decoding_complete;
|
||||
|
||||
DecoderInfo() : dec_device_id(0), viddec(nullptr), bit_depth(8) , decoding_complete(false) {}
|
||||
};
|
||||
|
||||
struct SeqInfo {
|
||||
int batch_size; // seq_info.batch_size: #of sequences in output
|
||||
int seq_length; // length of each sequence: #frames per seq
|
||||
int step; // step in number of frames to skip from one sequence to next
|
||||
int stride; // stride in muber of frames to skip between consecutive frames in a seq
|
||||
};
|
||||
|
||||
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps, std::atomic_bool &decoding_complete, int &seek_mode, bool &b_dump_output_frames, SeqInfo &seq_info, std::string *p_output_file_name, OutputSurfaceMemoryType mem_type) {
|
||||
|
||||
int n_video_bytes = 0, n_frame_returned = 0;
|
||||
int64_t n_frame = 0;
|
||||
uint8_t *p_video = nullptr, *p_frame = nullptr;
|
||||
int64_t pts = 0;
|
||||
double total_dec_time = 0.0;
|
||||
int seq_id = 0;
|
||||
OutputSurfaceInfo *surf_info;
|
||||
VideoSeekContext video_seek_ctx;
|
||||
int seq_frame_start[seq_info.batch_size];
|
||||
seq_frame_start[0] = 0;
|
||||
for (int i = 1; i < seq_info.batch_size; i++) {
|
||||
seq_frame_start[i] = seq_frame_start[i-1] + (seq_info.seq_length - 1) * seq_info.stride + seq_info.step;
|
||||
//std::cout << "seq: " << i << " seq_start: " << seq_frame_start[i] << std::endl;
|
||||
}
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
int n_frames_skipped = 0, n_frame_seq = 0, num_seq = 0;
|
||||
int next_frame_num = 0;
|
||||
bool seq_start = true;
|
||||
std::string seq_output_file_name = p_output_file_name[num_seq];
|
||||
//set reconfig before decode start
|
||||
ReconfigParams reconfig_params = { 0 };
|
||||
ReconfigDumpFileStruct reconfig_user_struct = { 0 };
|
||||
reconfig_params.p_fn_reconfigure_flush = ReconfigureFlushCallback;
|
||||
reconfig_user_struct.b_dump_frames_to_file = false;
|
||||
reconfig_params.reconfig_flush_mode = RECONFIG_FLUSH_MODE_NONE;
|
||||
reconfig_params.p_reconfig_user_struct = &reconfig_user_struct;
|
||||
p_dec->SetReconfigParams(&reconfig_params, true); // force reconfig flush mode
|
||||
|
||||
do {
|
||||
if (seek_mode && seq_start) {
|
||||
// todo:: reconfigure before seeking
|
||||
video_seek_ctx.seek_frame_ = seq_frame_start[num_seq];
|
||||
video_seek_ctx.seek_crit_ = SEEK_CRITERIA_FRAME_NUM;
|
||||
video_seek_ctx.seek_mode_ = SEEK_MODE_PREV_KEY_FRAME;
|
||||
demuxer->Seek(video_seek_ctx, &p_video, &n_video_bytes);
|
||||
pts = video_seek_ctx.out_frame_pts_;
|
||||
n_frame = static_cast<int64_t> (pts * demuxer->GetFrameRate()); // start frame number
|
||||
seq_start = false;
|
||||
p_dec->FlushAndReconfigure();
|
||||
|
||||
} else {
|
||||
demuxer->Demux(&p_video, &n_video_bytes, &pts);
|
||||
}
|
||||
n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts);
|
||||
if (b_dump_output_frames && mem_type != OUT_SURFACE_MEM_NOT_MAPPED) {
|
||||
if (!n_frame && !p_dec->GetOutputSurfaceInfo(&surf_info)) {
|
||||
std::cerr << "Error: Failed to get Output Surface Info!" << std::endl;
|
||||
break;
|
||||
}
|
||||
for (int i = 0; i < n_frame_returned; i++) {
|
||||
if ((n_frame + i) == next_frame_num) {
|
||||
p_frame = p_dec->GetFrame(&pts);
|
||||
if (n_frame_seq < seq_info.seq_length) {
|
||||
p_dec->SaveFrameToFile(seq_output_file_name, p_frame, surf_info);
|
||||
//std::cout << "saving " << next_frame_num << " to " << seq_output_file_name << std::endl;
|
||||
n_frame_seq ++;
|
||||
}
|
||||
p_dec->ReleaseFrame(pts);
|
||||
next_frame_num += seq_info.stride;
|
||||
}else {
|
||||
p_frame = p_dec->GetFrame(&pts);
|
||||
p_dec->ReleaseFrame(pts);
|
||||
}
|
||||
}
|
||||
}
|
||||
n_frame += n_frame_returned;
|
||||
if (n_frame_seq >= seq_info.seq_length) {
|
||||
n_frame_seq = 0; //reset for next sequence
|
||||
seq_start = true;
|
||||
num_seq ++;
|
||||
if (num_seq < seq_info.batch_size) {
|
||||
next_frame_num = seq_frame_start[num_seq];
|
||||
seq_output_file_name = p_output_file_name[num_seq];
|
||||
}
|
||||
p_dec->ResetSaveFrameToFile();
|
||||
}
|
||||
} while (n_video_bytes && num_seq < seq_info.batch_size);
|
||||
|
||||
//n_frame += p_dec->GetNumOfFlushedFrames();
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto time_per_decode = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
|
||||
// Calculate average decoding time
|
||||
total_dec_time = time_per_decode;
|
||||
double average_decoding_time = total_dec_time / n_frame;
|
||||
double n_fps = 1000 / average_decoding_time;
|
||||
*pn_fps = n_fps;
|
||||
*pn_frame = n_frame;
|
||||
p_dec->ResetSaveFrameToFile();
|
||||
decoding_complete = true;
|
||||
}
|
||||
|
||||
|
||||
void ShowHelpAndExit(const char *option = NULL) {
|
||||
std::cout << "Options:" << std::endl
|
||||
<< "-i Input File / Folder Path - required" << std::endl
|
||||
<< "-o Output folder to dump sequences - dumps output if requested; optional" << std::endl
|
||||
<< "-d GPU device ID (0 for the first device, 1 for the second, etc.); optional; default: 0" << std::endl
|
||||
<< "-b seq_info.batch_size - specify the number of sequences to be decoded; (default: all sequences till eof)" << std::endl
|
||||
<< "-step - frame interval between each sequence; (default: sequence length)" << std::endl
|
||||
<< "-stride - distance between consective frames in a sequence; (default: 1)" << std::endl
|
||||
<< "-l - Number of frames in each sequence; (default: 3)" << std::endl
|
||||
<< "-crop crop rectangle for output (not used when using interopped decoded frame); optional; default: 0" << std::endl
|
||||
<< "-seek_mode option for seeking (0: no seek 1: seek to prev key frame); optional; default: 0" << std::endl
|
||||
<< "-m output_surface_memory_type - decoded surface memory; optional; default - 0"
|
||||
<< " [0 : OUT_SURFACE_MEM_DEV_INTERNAL/ 1 : OUT_SURFACE_MEM_DEV_COPIED/ 2 : OUT_SURFACE_MEM_HOST_COPIED/ 3 : OUT_SURFACE_MEM_NOT_MAPPED]" << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
// input_folder_path, output_folder_path, device_id, n_threads, seq_info, seek_mode, mem_type, argc, argv
|
||||
void ParseCommandLine(std::string &input_folder_path, std::string &output_folder_path, int &device_id, int &n_thread, SeqInfo &seq_info, int &seek_mode,
|
||||
bool &b_dump_output_frames, OutputSurfaceMemoryType &mem_type, int argc, char *argv[]) {
|
||||
// Parse command-line arguments
|
||||
if(argc <= 1) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-h")) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
if (!strcmp(argv[i], "-i")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-i");
|
||||
}
|
||||
input_folder_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-t")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-t");
|
||||
}
|
||||
n_thread = atoi(argv[i]);
|
||||
if (n_thread <= 0 || n_thread > 64) {
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-d")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-d");
|
||||
}
|
||||
device_id = atoi(argv[i]);
|
||||
if (device_id < 0) {
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-o")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-o");
|
||||
}
|
||||
output_folder_path = argv[i];
|
||||
if (!output_folder_path.empty()) {
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
if (std::filesystem::is_directory(output_folder_path)) {
|
||||
std::filesystem::remove_all(output_folder_path);
|
||||
}
|
||||
std::filesystem::create_directory(output_folder_path);
|
||||
#else
|
||||
if (std::experimental::filesystem::is_directory(output_folder_path)) {
|
||||
std::experimental::filesystem::remove_all(output_folder_path);
|
||||
}
|
||||
std::experimental::filesystem::create_directory(output_folder_path);
|
||||
#endif
|
||||
b_dump_output_frames = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-m")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-m");
|
||||
}
|
||||
mem_type = static_cast<OutputSurfaceMemoryType>(atoi(argv[i]));
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-b")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-b");
|
||||
}
|
||||
seq_info.batch_size = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-l")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-l");
|
||||
}
|
||||
seq_info.seq_length = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-step")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-step");
|
||||
}
|
||||
seq_info.step = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-stride")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-stride");
|
||||
}
|
||||
seq_info.stride = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-seek_mode")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-seek_mode");
|
||||
}
|
||||
seek_mode = atoi(argv[i]);
|
||||
if (seek_mode != 0 && seek_mode != 1)
|
||||
ShowHelpAndExit("-seek_mode");
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string input_folder_path, output_folder_path;
|
||||
int dump_output_frames = 0;
|
||||
int device_id = 0, num_files = 0, seek_mode = 0;
|
||||
SeqInfo seq_info = {4, 1, 1, 4}; //default values
|
||||
int n_threads = 1;
|
||||
bool b_flush_frames_during_reconfig = true, b_dump_output_frames = false;
|
||||
Rect *p_crop_rect = nullptr; // specify crop_rect if output cropping is needed
|
||||
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_DEV_INTERNAL; // set to internal
|
||||
|
||||
uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream
|
||||
std::vector<std::string> input_file_names;
|
||||
|
||||
ParseCommandLine(input_folder_path, output_folder_path, device_id, n_threads, seq_info, seek_mode, b_dump_output_frames, mem_type, argc, argv);
|
||||
|
||||
try {
|
||||
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
for (const auto& entry : std::filesystem::directory_iterator(input_folder_path)) {
|
||||
#else
|
||||
for (const auto& entry : std::experimental::filesystem::directory_iterator(input_folder_path)) {
|
||||
#endif
|
||||
input_file_names.push_back(entry.path());
|
||||
num_files++;
|
||||
}
|
||||
n_threads = ((n_threads > num_files) ? num_files : n_threads);
|
||||
std::vector<std::string> output_seq_file_names;
|
||||
output_seq_file_names.resize(seq_info.batch_size * num_files);
|
||||
int num_devices = 0, sd = 0;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hipDeviceProp_t hip_dev_prop;
|
||||
std::string gcn_arch_name;
|
||||
if (hipGetDeviceCount(&num_devices) != hipSuccess) {
|
||||
std::cout << "ERROR: hipGetDeviceCount failed! (" << hip_status << ")" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
if (num_devices < 1) {
|
||||
ERR("ERROR: didn't find any GPU!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (hipSuccess != hipGetDeviceProperties(&hip_dev_prop, device_id)) {
|
||||
ERR("ERROR: hipGetDeviceProperties for device (" +TOSTR(device_id) + " ) failed! (" + hipGetErrorName(hip_status) + ")" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
gcn_arch_name = hip_dev_prop.gcnArchName;
|
||||
std::size_t pos = gcn_arch_name.find_first_of(":");
|
||||
std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name;
|
||||
|
||||
// gfx90a has two GCDs as two separate devices
|
||||
if (!gcn_arch_name_base.compare("gfx90a") && num_devices > 1) {
|
||||
sd = 1;
|
||||
}
|
||||
|
||||
std::string device_name;
|
||||
int pci_bus_id, pci_domain_id, pci_device_id;
|
||||
double total_fps = 0;
|
||||
int n_total = 0;
|
||||
std::vector<double> v_fps;
|
||||
std::vector<int> v_frame;
|
||||
v_fps.resize(num_files, 0);
|
||||
v_frame.resize(num_files, 0);
|
||||
int hip_vis_dev_count = 0;
|
||||
GetEnvVar("HIP_VISIBLE_DEVICES", hip_vis_dev_count);
|
||||
|
||||
std::vector<std::unique_ptr<VideoDemuxer>> v_demuxer;
|
||||
std::vector<std::unique_ptr<DecoderInfo>> v_dec_info;
|
||||
ThreadPool thread_pool(n_threads);
|
||||
std::mutex mutex;
|
||||
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
v_demuxer.push_back(std::make_unique<VideoDemuxer>(input_file_names[i].c_str()));
|
||||
std::size_t found_file = input_file_names[i].find_last_of('/');
|
||||
input_file_names[i] = input_file_names[i].substr(found_file + 1);
|
||||
if (b_dump_output_frames) {
|
||||
std::size_t found_ext = input_file_names[i].find_last_of('.');
|
||||
std::string path = output_folder_path + "/output_" + input_file_names[i].substr(0, found_ext);
|
||||
for (int n = 0; n < seq_info.batch_size; n++) {
|
||||
output_seq_file_names[i * seq_info.batch_size + n] = path + "_seq_" + std::to_string(n) + ".yuv";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_threads; i++) {
|
||||
v_dec_info.emplace_back(std::make_unique<DecoderInfo>());
|
||||
if (!hip_vis_dev_count) {
|
||||
if (device_id % 2 == 0) {
|
||||
v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id : device_id + sd;
|
||||
} else
|
||||
v_dec_info[i]->dec_device_id = (i % 2 == 0) ? device_id - sd : device_id;
|
||||
} else {
|
||||
v_dec_info[i]->dec_device_id = i % hip_vis_dev_count;
|
||||
}
|
||||
v_dec_info[i]->rocdec_codec_id = AVCodec2RocDecVideoCodec(v_demuxer[i]->GetCodecID());
|
||||
v_dec_info[i]->bit_depth = v_demuxer[i]->GetBitDepth();
|
||||
v_dec_info[i]->viddec = std::make_unique<RocVideoDecoder>(v_dec_info[i]->dec_device_id, mem_type, v_dec_info[i]->rocdec_codec_id, false, p_crop_rect);
|
||||
v_dec_info[i]->viddec->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id);
|
||||
std::cout << "info: decoding " << input_file_names[i] << " using GPU device " << v_dec_info[i]->dec_device_id << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
|
||||
std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) <<
|
||||
std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl;
|
||||
}
|
||||
|
||||
for (int j = 0; j < num_files; j++) {
|
||||
int thread_idx = j % n_threads;
|
||||
if (j >= n_threads) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
while (!v_dec_info[thread_idx]->decoding_complete)
|
||||
sleep(1);
|
||||
v_dec_info[thread_idx]->decoding_complete = false;
|
||||
}
|
||||
uint32_t bit_depth = v_demuxer[j]->GetBitDepth();
|
||||
rocDecVideoCodec codec_id = AVCodec2RocDecVideoCodec(v_demuxer[j]->GetCodecID());
|
||||
// If the codec_type or bit_depth has changed, recreate the decoder
|
||||
//if (v_dec_info[thread_idx]->bit_depth != bit_depth || v_dec_info[thread_idx]->rocdec_codec_id != codec_id) {
|
||||
(v_dec_info[thread_idx]->viddec).release();
|
||||
v_dec_info[thread_idx]->viddec = std::make_unique<RocVideoDecoder>(v_dec_info[thread_idx]->dec_device_id, mem_type, codec_id, false, p_crop_rect);
|
||||
//}
|
||||
v_dec_info[thread_idx]->viddec->GetDeviceinfo(device_name, gcn_arch_name, pci_bus_id, pci_domain_id, pci_device_id);
|
||||
std::cout << "info: decoding " << input_file_names[j] << " using GPU device " << v_dec_info[thread_idx]->dec_device_id << " - " << device_name << "[" << gcn_arch_name << "] on PCI bus " <<
|
||||
std::setfill('0') << std::setw(2) << std::right << std::hex << pci_bus_id << ":" << std::setfill('0') << std::setw(2) <<
|
||||
std::right << std::hex << pci_domain_id << "." << pci_device_id << std::dec << std::endl;
|
||||
}
|
||||
thread_pool.ExecuteJob(std::bind(DecProc, v_dec_info[thread_idx]->viddec.get(), v_demuxer[j].get(), &v_frame[j], &v_fps[j], std::ref(v_dec_info[thread_idx]->decoding_complete),
|
||||
seek_mode, b_dump_output_frames, seq_info, &output_seq_file_names[j*seq_info.batch_size], mem_type));
|
||||
}
|
||||
|
||||
thread_pool.JoinThreads();
|
||||
for (int i = 0; i < num_files; i++) {
|
||||
total_fps += v_fps[i] * static_cast<double>(n_threads) / static_cast<double>(num_files);
|
||||
n_total += v_frame[i];
|
||||
}
|
||||
if (!b_dump_output_frames) {
|
||||
std::cout << "info: Total frame decoded: " << n_total << std::endl;
|
||||
std::cout << "info: avg decoding time per frame: " << 1000 / total_fps << " ms" << std::endl;
|
||||
std::cout << "info: avg FPS: " << total_fps << std::endl;
|
||||
}
|
||||
else {
|
||||
if (mem_type == OUT_SURFACE_MEM_NOT_MAPPED) {
|
||||
std::cout << "info: saving frames with -m 3 option is not supported!" << std::endl;
|
||||
} else {
|
||||
for (int i = 0; i < num_files; i++){
|
||||
for (int n = 0; n < seq_info.batch_size; n++) {
|
||||
std::cout << "info: saved frames into " << output_seq_file_names[i * seq_info.batch_size + n] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::exception &ex) {
|
||||
std::cout << ex.what() << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -73,6 +73,11 @@ RocVideoDecoder::~RocVideoDecoder() {
|
||||
roc_decoder_ = nullptr;
|
||||
}
|
||||
|
||||
if (curr_video_format_ptr_) {
|
||||
delete curr_video_format_ptr_;
|
||||
curr_video_format_ptr_ = nullptr;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(mtx_vp_frame_);
|
||||
if (out_mem_type_ != OUT_SURFACE_MEM_DEV_INTERNAL) {
|
||||
for (auto &p_frame : vp_frames_) {
|
||||
@@ -250,6 +255,11 @@ static void GetSurfaceStrideInternal(rocDecVideoSurfaceFormat surface_format, ui
|
||||
* 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::max_num_decode_surfaces while creating parser)
|
||||
*/
|
||||
int RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) {
|
||||
|
||||
if (p_video_format == nullptr) {
|
||||
ROCDEC_THROW("Rocdec:: Invalid video format in HandleVideoSequence: ", ROCDEC_INVALID_PARAMETER);
|
||||
return 0;
|
||||
}
|
||||
auto start_time = StartTimer();
|
||||
input_video_info_str_.str("");
|
||||
input_video_info_str_.clear();
|
||||
@@ -280,7 +290,6 @@ int RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) {
|
||||
ROCDEC_THROW("Rocdec:: Codec not supported on this GPU: ", ROCDEC_NOT_SUPPORTED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((p_video_format->coded_width > decode_caps.max_width) || (p_video_format->coded_height > decode_caps.max_height)) {
|
||||
std::ostringstream errorString;
|
||||
errorString << std::endl
|
||||
@@ -291,12 +300,16 @@ int RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) {
|
||||
ROCDEC_THROW(cErr, ROCDEC_NOT_SUPPORTED);
|
||||
return 0;
|
||||
}
|
||||
if (curr_video_format_ptr_ == nullptr) {
|
||||
curr_video_format_ptr_ = new RocdecVideoFormat();
|
||||
}
|
||||
// store current video format: this is required to call reconfigure from application in case of random seek
|
||||
if (curr_video_format_ptr_) memcpy(curr_video_format_ptr_, p_video_format, sizeof(RocdecVideoFormat));
|
||||
|
||||
if (coded_width_ && coded_height_) {
|
||||
// rocdecCreateDecoder() has been called before, and now there's possible config change
|
||||
return ReconfigureDecoder(p_video_format);
|
||||
}
|
||||
|
||||
// e_codec has been set in the constructor (for parser). Here it's set again for potential correction
|
||||
codec_id_ = p_video_format->codec;
|
||||
video_chroma_format_ = p_video_format->chroma_format;
|
||||
@@ -431,17 +444,38 @@ int RocVideoDecoder::HandleVideoSequence(RocdecVideoFormat *p_video_format) {
|
||||
* @return true : success
|
||||
* @return false : fail
|
||||
*/
|
||||
bool RocVideoDecoder::SetReconfigParams(ReconfigParams *p_reconfig_params) {
|
||||
bool RocVideoDecoder::SetReconfigParams(ReconfigParams *p_reconfig_params, bool b_force_reconfig_flush) {
|
||||
if (!p_reconfig_params) {
|
||||
std::cerr << "ERROR: Invalid reconfig struct passed! "<< std::endl;
|
||||
return false;
|
||||
}
|
||||
//save it
|
||||
p_reconfig_params_ = p_reconfig_params;
|
||||
b_force_recofig_flush_ = b_force_reconfig_flush;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function to force Reconfigure Flush: needed for random seeking to key frames
|
||||
*
|
||||
* @return int 1: Success 0: Fail
|
||||
*/
|
||||
int RocVideoDecoder::FlushAndReconfigure() {
|
||||
if (!p_reconfig_params_) {
|
||||
std::cerr << "ERROR: Reconfig params is not set! "<< std::endl;
|
||||
return 0;
|
||||
}
|
||||
if (!curr_video_format_ptr_ ) {
|
||||
std::cerr << "ERROR: video format is not initialized! "<< std::endl;
|
||||
return 0;
|
||||
}
|
||||
//call reconfigure
|
||||
b_force_recofig_flush_ = true; // if not already set to force reconfigure
|
||||
ReconfigureDecoder(curr_video_format_ptr_);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief function to reconfigure decoder if there is a change in sequence params.
|
||||
*
|
||||
@@ -466,7 +500,8 @@ int RocVideoDecoder::ReconfigureDecoder(RocdecVideoFormat *p_video_format) {
|
||||
p_video_format->display_area.top == disp_rect_.top &&
|
||||
p_video_format->display_area.left == disp_rect_.left &&
|
||||
p_video_format->display_area.right == disp_rect_.right);
|
||||
if (!is_decode_res_changed && !is_display_rect_changed) {
|
||||
|
||||
if (!is_decode_res_changed && !is_display_rect_changed && !b_force_recofig_flush_) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -932,7 +967,7 @@ void RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void *surf_m
|
||||
}
|
||||
}
|
||||
is_decoder_reconfigured_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (fp_out_ == nullptr) {
|
||||
fp_out_ = fopen(output_file_name.c_str(), "wb");
|
||||
|
||||
@@ -267,7 +267,14 @@ class RocVideoDecoder {
|
||||
* @return true : success
|
||||
* @return false : fail
|
||||
*/
|
||||
bool SetReconfigParams(ReconfigParams *p_reconfig_params);
|
||||
bool SetReconfigParams(ReconfigParams *p_reconfig_params, bool b_force_reconfig_flush = false);
|
||||
|
||||
/**
|
||||
* @brief Function to force Reconfigure Flush: needed for random seeking to key frames
|
||||
*
|
||||
* @return int 1: Success 0: Fail
|
||||
*/
|
||||
int FlushAndReconfigure();
|
||||
/**
|
||||
* @brief this function decodes a frame and returns the number of frames avalable for display
|
||||
*
|
||||
@@ -451,6 +458,7 @@ class RocVideoDecoder {
|
||||
bool b_force_zero_latency_ = false;
|
||||
uint32_t disp_delay_;
|
||||
ReconfigParams *p_reconfig_params_ = nullptr;
|
||||
bool b_force_recofig_flush_ = false;
|
||||
int32_t num_frames_flushed_during_reconfig_ = 0;
|
||||
hipDeviceProp_t hip_dev_prop_;
|
||||
hipStream_t hip_stream_;
|
||||
@@ -459,6 +467,7 @@ class RocVideoDecoder {
|
||||
rocDecVideoSurfaceFormat video_surface_format_ = rocDecVideoSurfaceFormat_NV12;
|
||||
RocdecSeiMessageInfo *curr_sei_message_ptr_ = nullptr;
|
||||
RocdecSeiMessageInfo sei_message_display_q_[MAX_FRAME_NUM];
|
||||
RocdecVideoFormat *curr_video_format_ptr_ = nullptr;
|
||||
int decoded_frame_cnt_ = 0, decoded_frame_cnt_ret_ = 0;
|
||||
int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM];
|
||||
int num_alloced_frames_ = 0;
|
||||
|
||||
Ссылка в новой задаче
Block a user