From b9ba594d2eecd698a9d0bb4a092748be0737aabc Mon Sep 17 00:00:00 2001
From: jeffqjiangNew <142832361+jeffqjiangNew@users.noreply.github.com>
Date: Mon, 12 Aug 2024 09:39:43 -0400
Subject: [PATCH] Perf sample: Added decoded frame number and display delay
options to video decode performance sample. (#405)
* * rocDecode/Perf sample: Added decoded frame number and display delay options to video decode performance sample. Also changed default thread number from 4 to 1.
* * rocDecode: Added max number of decoded frames option to decode sample script. This is useful to do partial decoding test on long streams.
* * rocDecode: Updated README.md.
* * rocDecode: Minor correction.
[ROCm/rocdecode commit: c486a7f6b46f08c23953e63fe1b0bc9c5838421c]
---
.../samples/videoDecodePerf/README.md | 4 ++-
.../videoDecodePerf/videodecodeperf.cpp | 30 +++++++++++++++----
projects/rocdecode/test/testScripts/README.md | 4 ++-
.../test/testScripts/run_rocDecodeSamples.py | 11 ++++---
4 files changed, 38 insertions(+), 11 deletions(-)
diff --git a/projects/rocdecode/samples/videoDecodePerf/README.md b/projects/rocdecode/samples/videoDecodePerf/README.md
index 2109288a28..becaf9c486 100644
--- a/projects/rocdecode/samples/videoDecodePerf/README.md
+++ b/projects/rocdecode/samples/videoDecodePerf/README.md
@@ -30,7 +30,9 @@ make -j
```shell
./videodecodeperf -i
- -t
+ -t
+ -f
+ -disp_delay
-d = 0) [optional - default:0]>
-z
```
\ No newline at end of file
diff --git a/projects/rocdecode/samples/videoDecodePerf/videodecodeperf.cpp b/projects/rocdecode/samples/videoDecodePerf/videodecodeperf.cpp
index d161cebc1d..d771395768 100644
--- a/projects/rocdecode/samples/videoDecodePerf/videodecodeperf.cpp
+++ b/projects/rocdecode/samples/videoDecodePerf/videodecodeperf.cpp
@@ -37,7 +37,7 @@ THE SOFTWARE.
#include "roc_video_dec.h"
#include "common.h"
-void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps) {
+void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps, int num_decoded_frames) {
int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0;
uint8_t *p_video = nullptr;
int64_t pts = 0;
@@ -48,6 +48,9 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, doubl
demuxer->Demux(&p_video, &n_video_bytes, &pts);
n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts);
n_frame += n_frame_returned;
+ if (num_decoded_frames && num_decoded_frames <= n_frame) {
+ break;
+ }
} while (n_video_bytes);
auto end_time = std::chrono::high_resolution_clock::now();
@@ -64,7 +67,7 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, doubl
void ShowHelpAndExit(const char *option = NULL) {
std::cout << "Options:" << std::endl
<< "-i Input File Path - required" << std::endl
- << "-t Number of threads (>= 1) - optional; default: 4" << std::endl
+ << "-t Number of threads (>= 1) - optional; default: 1" << std::endl
<< "-d Device ID (>= 0) - optional; default: 0" << std::endl
<< "-z force_zero_latency (force_zero_latency, Decoded frames will be flushed out for display immediately); optional;" << std::endl;
exit(0);
@@ -74,10 +77,13 @@ int main(int argc, char **argv) {
std::string input_file_path;
int device_id = 0;
- int n_thread = 4;
+ int n_thread = 1;
Rect *p_crop_rect = nullptr;
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_NOT_MAPPED; // set to decode only for performance
bool b_force_zero_latency = false;
+ uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream
+ int disp_delay = 0;
+
// Parse command-line arguments
if(argc <= 1) {
ShowHelpAndExit();
@@ -113,6 +119,20 @@ int main(int argc, char **argv) {
}
continue;
}
+ if (!strcmp(argv[i], "-disp_delay")) {
+ if (++i == argc) {
+ ShowHelpAndExit("-disp_delay");
+ }
+ disp_delay = atoi(argv[i]);
+ continue;
+ }
+ if (!strcmp(argv[i], "-f")) {
+ if (++i == argc) {
+ ShowHelpAndExit("-d");
+ }
+ num_decoded_frames = atoi(argv[i]);
+ continue;
+ }
if (!strcmp(argv[i], "-z")) {
if (i == argc) {
ShowHelpAndExit("-z");
@@ -177,7 +197,7 @@ int main(int argc, char **argv) {
} else {
v_device_id[i] = i % hip_vis_dev_count;
}
- std::unique_ptr dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect));
+ std::unique_ptr dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay));
if (!dec->CodecSupported(v_device_id[i], rocdec_codec_id, demuxer->GetBitDepth())) {
std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl;
continue;
@@ -207,7 +227,7 @@ int main(int argc, char **argv) {
}
for (int i = 0; i < n_thread; i++) {
- v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i]));
+ v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i], num_decoded_frames));
}
for (int i = 0; i < n_thread; i++) {
diff --git a/projects/rocdecode/test/testScripts/README.md b/projects/rocdecode/test/testScripts/README.md
index b629d5fe94..7f02cd72d0 100644
--- a/projects/rocdecode/test/testScripts/README.md
+++ b/projects/rocdecode/test/testScripts/README.md
@@ -43,7 +43,9 @@ optional arguments:
--sample_mode SAMPLE_MODE
The sample to run - optional (default:0 [range:0-1] 0: videoDecode, 1: videoDecodePerf)
--num_threads NUM_THREADS
- The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:4)
+ The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:1)
+ --max_num_decoded_frames MAX_NUM_DECODED_FRAMES
+ The max number of decoded frames. Useful for partial decoding of a long stream. - optional (default:0, meaning no limit)
```
* **run_rocDecode_Conformance.py**
diff --git a/projects/rocdecode/test/testScripts/run_rocDecodeSamples.py b/projects/rocdecode/test/testScripts/run_rocDecodeSamples.py
index afbcba7f18..6c88e98b27 100644
--- a/projects/rocdecode/test/testScripts/run_rocDecodeSamples.py
+++ b/projects/rocdecode/test/testScripts/run_rocDecodeSamples.py
@@ -64,8 +64,10 @@ parser.add_argument('--files_directory', type=str, default='',
help='The path to a dirctory containing one or more supported files for decoding (e.g., mp4, mov, etc.) - required')
parser.add_argument('--sample_mode', type=int, default=0,
help='The sample to run - optional (default:0 [range:0-1] 0: videoDecode, 1: videoDecodePerf)')
-parser.add_argument('--num_threads', type=int, default=4,
- help='The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:4)')
+parser.add_argument('--num_threads', type=int, default=1,
+ help='The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:1)')
+parser.add_argument('--max_num_decoded_frames', type=int, default=0,
+ help='The max number of decoded frames. Useful for partial decoding of a long stream. - optional (default:0, meaning no limit)')
args = parser.parse_args()
@@ -75,6 +77,7 @@ filesDir = args.files_directory
filesDirPath = Path(filesDir)
sampleMode = args.sample_mode
numThreads = args.num_threads
+maxNumFrames = args.max_num_decoded_frames
print("\nrunrocDecodeTests V"+__version__+"\n")
@@ -113,7 +116,7 @@ if os.path.exists(resultsPath+'/rocDecode_test_results.csv'):
if sampleMode == 0:
for current_file in iter_files(filesDirPath):
- os.system(run_rocDecode_app+' -i '+str(current_file)+' -d '+str(gpuDeviceID)+' | tee -a '+resultsPath+'/rocDecode_output.log')
+ os.system(run_rocDecode_app+' -i '+str(current_file)+' -d '+str(gpuDeviceID)+' -f '+str(maxNumFrames)+' | tee -a '+resultsPath+'/rocDecode_output.log')
print("\n\n")
orig_stdout = sys.stdout
@@ -144,7 +147,7 @@ if sampleMode == 0:
os.system(runAwk_csv)
elif sampleMode == 1:
for current_file in iter_files(filesDirPath):
- os.system(run_rocDecode_app+' -i '+str(current_file)+' -t '+str(numThreads)+' | tee -a '+resultsPath+'/rocDecode_output.log')
+ os.system(run_rocDecode_app+' -i '+str(current_file)+' -t '+str(numThreads)+' -f '+str(maxNumFrames)+' | tee -a '+resultsPath+'/rocDecode_output.log')
print("\n\n")
orig_stdout = sys.stdout