* rocDecode/Perf: Improved the accuracy of decode performance measurement for the performance sample. We need to wait for the decode completion of the last picture before sampling the end time. (#425)

[ROCm/rocdecode commit: 7ef4e29262]
This commit is contained in:
jeffqjiangNew
2024-09-25 21:27:49 -04:00
committed by GitHub
vanhempi 6f1871777f
commit 2b84f90795
3 muutettua tiedostoa jossa 21 lisäystä ja 3 poistoa
@@ -37,7 +37,7 @@ THE SOFTWARE.
#include "roc_video_dec.h"
#include "common.h"
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames) {
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames, OutputSurfaceMemoryType mem_type) {
int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0;
int n_pic_decoded = 0, decoded_pics = 0;
uint8_t *p_video = nullptr;
@@ -54,6 +54,9 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *
break;
}
} while (n_video_bytes);
if (mem_type == OUT_SURFACE_MEM_NOT_MAPPED) {
p_dec->WaitForDecodeCompletion();
}
auto end_time = std::chrono::high_resolution_clock::now();
auto time_per_decode = std::chrono::duration<double, std::milli>(end_time - start_time).count();
@@ -76,6 +79,7 @@ void ShowHelpAndExit(const char *option = NULL) {
<< "-t Number of threads (>= 1) - optional; default: 1" << std::endl
<< "-d Device ID (>= 0) - optional; default: 0" << std::endl
<< "-z Force zero latency (decoded frames will be flushed out for display immediately) - optional" << std::endl
<< "-disp_delay -specify the number of frames to be delayed for display; optional" << std::endl
<< "-m Memory type (integer values between 0 to 3: specifies where to store the decoded output:" << std::endl
<< " 0 = decoded output will be in internal interopped memory," << std::endl
<< " 1 = decoded output will be copied to a separate device memory," << std::endl
@@ -249,7 +253,7 @@ int main(int argc, char **argv) {
}
for (int i = 0; i < n_thread; i++) {
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_frame_dec[i], &v_fps[i], &v_fps_dec[i], max_num_frames));
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_frame_dec[i], &v_fps[i], &v_fps_dec[i], max_num_frames, mem_type));
}
for (int i = 0; i < n_thread; i++) {
@@ -643,6 +643,7 @@ int RocVideoDecoder::HandlePictureDecode(RocdecPicParams *pPicParams) {
}
pic_num_in_dec_order_[pPicParams->curr_pic_idx] = decode_poc_++;
ROCDEC_API_CALL(rocDecDecodeFrame(roc_decoder_, pPicParams));
last_decode_surf_idx_ = pPicParams->curr_pic_idx;
decoded_pic_cnt_++;
if (b_force_zero_latency_ && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
RocdecParserDispInfo disp_info;
@@ -1189,3 +1190,11 @@ bool RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, u
}
return true;
}
void RocVideoDecoder::WaitForDecodeCompletion() {
RocdecDecodeStatus dec_status;
memset(&dec_status, 0, sizeof(dec_status));
do {
rocDecStatus result = rocDecGetDecodeStatus(roc_decoder_, last_decode_surf_idx_, &dec_status);
} while (dec_status.decode_status == rocDecodeStatus_InProgress);
}
@@ -336,7 +336,7 @@ class RocVideoDecoder {
/**
* @brief Helper funtion to close a existing file and dump to new file in case of multiple files using same decoder
*/
void ResetSaveFrameToFile();
void ResetSaveFrameToFile();
/**
* @brief Helper function to start MD5 calculation
@@ -366,6 +366,10 @@ class RocVideoDecoder {
*/
int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_;}
/*! \brief Function to wait for the decode completion of the last submitted picture
*/
void WaitForDecodeCompletion();
// Session overhead refers to decoder initialization and deinitialization time
void AddDecoderSessionOverHead(std::thread::id session_id, double duration) { session_overhead_[session_id] += duration; }
double GetDecoderSessionOverHead(std::thread::id session_id) {
@@ -480,6 +484,7 @@ class RocVideoDecoder {
int decoded_pic_cnt_ = 0;
int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM];
int num_alloced_frames_ = 0;
int last_decode_surf_idx_ = 0;
std::ostringstream input_video_info_str_;
int bitdepth_minus_8_ = 0;
uint32_t byte_per_pixel_ = 1;