Perf sample: Added decoded frame number and display delay options to video decode performance sample. (#405)

* * rocDecode/Perf sample: Added decoded frame number and display delay options to video decode performance sample. Also changed default thread number from 4 to 1.

* * rocDecode: Added max number of decoded frames option to decode sample script. This is useful to do partial decoding test on long streams.

* * rocDecode: Updated README.md.

* * rocDecode: Minor correction.
Этот коммит содержится в:
jeffqjiangNew
2024-08-12 09:39:43 -04:00
коммит произвёл GitHub
родитель 30ee6787b2
Коммит c486a7f6b4
4 изменённых файлов: 38 добавлений и 11 удалений
+3 -1
Просмотреть файл
@@ -30,7 +30,9 @@ make -j
```shell
./videodecodeperf -i <input video file [required]>
-t <number of threads [optional - default:4]>
-t <number of threads [optional - default:1]>
-f <Number of decoded frames - specify the number of pictures to be decoded [optional]>
-disp_delay <display delay - specify the number of frames to be delayed for display [optional]>
-d <Device ID (>= 0) [optional - default:0]>
-z <force_zero_latency - Decoded frames will be flushed out for display immediately [optional]>
```
+25 -5
Просмотреть файл
@@ -37,7 +37,7 @@ THE SOFTWARE.
#include "roc_video_dec.h"
#include "common.h"
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps) {
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, double *pn_fps, int num_decoded_frames) {
int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0;
uint8_t *p_video = nullptr;
int64_t pts = 0;
@@ -48,6 +48,9 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, doubl
demuxer->Demux(&p_video, &n_video_bytes, &pts);
n_frame_returned = p_dec->DecodeFrame(p_video, n_video_bytes, 0, pts);
n_frame += n_frame_returned;
if (num_decoded_frames && num_decoded_frames <= n_frame) {
break;
}
} while (n_video_bytes);
auto end_time = std::chrono::high_resolution_clock::now();
@@ -64,7 +67,7 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, doubl
void ShowHelpAndExit(const char *option = NULL) {
std::cout << "Options:" << std::endl
<< "-i Input File Path - required" << std::endl
<< "-t Number of threads (>= 1) - optional; default: 4" << std::endl
<< "-t Number of threads (>= 1) - optional; default: 1" << std::endl
<< "-d Device ID (>= 0) - optional; default: 0" << std::endl
<< "-z force_zero_latency (force_zero_latency, Decoded frames will be flushed out for display immediately); optional;" << std::endl;
exit(0);
@@ -74,10 +77,13 @@ int main(int argc, char **argv) {
std::string input_file_path;
int device_id = 0;
int n_thread = 4;
int n_thread = 1;
Rect *p_crop_rect = nullptr;
OutputSurfaceMemoryType mem_type = OUT_SURFACE_MEM_NOT_MAPPED; // set to decode only for performance
bool b_force_zero_latency = false;
uint32_t num_decoded_frames = 0; // default value is 0, meaning decode the entire stream
int disp_delay = 0;
// Parse command-line arguments
if(argc <= 1) {
ShowHelpAndExit();
@@ -113,6 +119,20 @@ int main(int argc, char **argv) {
}
continue;
}
if (!strcmp(argv[i], "-disp_delay")) {
if (++i == argc) {
ShowHelpAndExit("-disp_delay");
}
disp_delay = atoi(argv[i]);
continue;
}
if (!strcmp(argv[i], "-f")) {
if (++i == argc) {
ShowHelpAndExit("-d");
}
num_decoded_frames = atoi(argv[i]);
continue;
}
if (!strcmp(argv[i], "-z")) {
if (i == argc) {
ShowHelpAndExit("-z");
@@ -177,7 +197,7 @@ int main(int argc, char **argv) {
} else {
v_device_id[i] = i % hip_vis_dev_count;
}
std::unique_ptr<RocVideoDecoder> dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect));
std::unique_ptr<RocVideoDecoder> dec(new RocVideoDecoder(v_device_id[i], mem_type, rocdec_codec_id, b_force_zero_latency, p_crop_rect, false, disp_delay));
if (!dec->CodecSupported(v_device_id[i], rocdec_codec_id, demuxer->GetBitDepth())) {
std::cerr << "Codec not supported on GPU, skipping this file!" << std::endl;
continue;
@@ -207,7 +227,7 @@ int main(int argc, char **argv) {
}
for (int i = 0; i < n_thread; i++) {
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i]));
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_fps[i], num_decoded_frames));
}
for (int i = 0; i < n_thread; i++) {
+3 -1
Просмотреть файл
@@ -43,7 +43,9 @@ optional arguments:
--sample_mode SAMPLE_MODE
The sample to run - optional (default:0 [range:0-1] 0: videoDecode, 1: videoDecodePerf)
--num_threads NUM_THREADS
The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:4)
The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:1)
--max_num_decoded_frames MAX_NUM_DECODED_FRAMES
The max number of decoded frames. Useful for partial decoding of a long stream. - optional (default:0, meaning no limit)
```
* **run_rocDecode_Conformance.py**
+7 -4
Просмотреть файл
@@ -64,8 +64,10 @@ parser.add_argument('--files_directory', type=str, default='',
help='The path to a dirctory containing one or more supported files for decoding (e.g., mp4, mov, etc.) - required')
parser.add_argument('--sample_mode', type=int, default=0,
help='The sample to run - optional (default:0 [range:0-1] 0: videoDecode, 1: videoDecodePerf)')
parser.add_argument('--num_threads', type=int, default=4,
help='The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:4)')
parser.add_argument('--num_threads', type=int, default=1,
help='The number of threads is only for the videoDecodePerf sample (sample_mode = 1) - optional (default:1)')
parser.add_argument('--max_num_decoded_frames', type=int, default=0,
help='The max number of decoded frames. Useful for partial decoding of a long stream. - optional (default:0, meaning no limit)')
args = parser.parse_args()
@@ -75,6 +77,7 @@ filesDir = args.files_directory
filesDirPath = Path(filesDir)
sampleMode = args.sample_mode
numThreads = args.num_threads
maxNumFrames = args.max_num_decoded_frames
print("\nrunrocDecodeTests V"+__version__+"\n")
@@ -113,7 +116,7 @@ if os.path.exists(resultsPath+'/rocDecode_test_results.csv'):
if sampleMode == 0:
for current_file in iter_files(filesDirPath):
os.system(run_rocDecode_app+' -i '+str(current_file)+' -d '+str(gpuDeviceID)+' | tee -a '+resultsPath+'/rocDecode_output.log')
os.system(run_rocDecode_app+' -i '+str(current_file)+' -d '+str(gpuDeviceID)+' -f '+str(maxNumFrames)+' | tee -a '+resultsPath+'/rocDecode_output.log')
print("\n\n")
orig_stdout = sys.stdout
@@ -144,7 +147,7 @@ if sampleMode == 0:
os.system(runAwk_csv)
elif sampleMode == 1:
for current_file in iter_files(filesDirPath):
os.system(run_rocDecode_app+' -i '+str(current_file)+' -t '+str(numThreads)+' | tee -a '+resultsPath+'/rocDecode_output.log')
os.system(run_rocDecode_app+' -i '+str(current_file)+' -t '+str(numThreads)+' -f '+str(maxNumFrames)+' | tee -a '+resultsPath+'/rocDecode_output.log')
print("\n\n")
orig_stdout = sys.stdout