* rocDecode/Perf: Improved the accuracy of decode performance measurement for the performance sample. We need to wait for the decode completion of the last picture before sampling the end time. (#425)
[ROCm/rocdecode commit: 7ef4e29262]
This commit is contained in:
@@ -37,7 +37,7 @@ THE SOFTWARE.
|
||||
#include "roc_video_dec.h"
|
||||
#include "common.h"
|
||||
|
||||
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames) {
|
||||
void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *pn_pic_dec, double *pn_fps, double *pn_fps_dec, int max_num_frames, OutputSurfaceMemoryType mem_type) {
|
||||
int n_video_bytes = 0, n_frame_returned = 0, n_frame = 0;
|
||||
int n_pic_decoded = 0, decoded_pics = 0;
|
||||
uint8_t *p_video = nullptr;
|
||||
@@ -54,6 +54,9 @@ void DecProc(RocVideoDecoder *p_dec, VideoDemuxer *demuxer, int *pn_frame, int *
|
||||
break;
|
||||
}
|
||||
} while (n_video_bytes);
|
||||
if (mem_type == OUT_SURFACE_MEM_NOT_MAPPED) {
|
||||
p_dec->WaitForDecodeCompletion();
|
||||
}
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto time_per_decode = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
@@ -76,6 +79,7 @@ void ShowHelpAndExit(const char *option = NULL) {
|
||||
<< "-t Number of threads (>= 1) - optional; default: 1" << std::endl
|
||||
<< "-d Device ID (>= 0) - optional; default: 0" << std::endl
|
||||
<< "-z Force zero latency (decoded frames will be flushed out for display immediately) - optional" << std::endl
|
||||
<< "-disp_delay -specify the number of frames to be delayed for display; optional" << std::endl
|
||||
<< "-m Memory type (integer values between 0 to 3: specifies where to store the decoded output:" << std::endl
|
||||
<< " 0 = decoded output will be in internal interopped memory," << std::endl
|
||||
<< " 1 = decoded output will be copied to a separate device memory," << std::endl
|
||||
@@ -249,7 +253,7 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_frame_dec[i], &v_fps[i], &v_fps_dec[i], max_num_frames));
|
||||
v_thread.push_back(std::thread(DecProc, v_viddec[i].get(), v_demuxer[i].get(), &v_frame[i], &v_frame_dec[i], &v_fps[i], &v_fps_dec[i], max_num_frames, mem_type));
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_thread; i++) {
|
||||
|
||||
@@ -643,6 +643,7 @@ int RocVideoDecoder::HandlePictureDecode(RocdecPicParams *pPicParams) {
|
||||
}
|
||||
pic_num_in_dec_order_[pPicParams->curr_pic_idx] = decode_poc_++;
|
||||
ROCDEC_API_CALL(rocDecDecodeFrame(roc_decoder_, pPicParams));
|
||||
last_decode_surf_idx_ = pPicParams->curr_pic_idx;
|
||||
decoded_pic_cnt_++;
|
||||
if (b_force_zero_latency_ && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
|
||||
RocdecParserDispInfo disp_info;
|
||||
@@ -1189,3 +1190,11 @@ bool RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, u
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void RocVideoDecoder::WaitForDecodeCompletion() {
|
||||
RocdecDecodeStatus dec_status;
|
||||
memset(&dec_status, 0, sizeof(dec_status));
|
||||
do {
|
||||
rocDecStatus result = rocDecGetDecodeStatus(roc_decoder_, last_decode_surf_idx_, &dec_status);
|
||||
} while (dec_status.decode_status == rocDecodeStatus_InProgress);
|
||||
}
|
||||
@@ -336,7 +336,7 @@ class RocVideoDecoder {
|
||||
/**
|
||||
* @brief Helper funtion to close a existing file and dump to new file in case of multiple files using same decoder
|
||||
*/
|
||||
void ResetSaveFrameToFile();
|
||||
void ResetSaveFrameToFile();
|
||||
|
||||
/**
|
||||
* @brief Helper function to start MD5 calculation
|
||||
@@ -366,6 +366,10 @@ class RocVideoDecoder {
|
||||
*/
|
||||
int32_t GetNumOfFlushedFrames() { return num_frames_flushed_during_reconfig_;}
|
||||
|
||||
/*! \brief Function to wait for the decode completion of the last submitted picture
|
||||
*/
|
||||
void WaitForDecodeCompletion();
|
||||
|
||||
// Session overhead refers to decoder initialization and deinitialization time
|
||||
void AddDecoderSessionOverHead(std::thread::id session_id, double duration) { session_overhead_[session_id] += duration; }
|
||||
double GetDecoderSessionOverHead(std::thread::id session_id) {
|
||||
@@ -480,6 +484,7 @@ class RocVideoDecoder {
|
||||
int decoded_pic_cnt_ = 0;
|
||||
int decode_poc_ = 0, pic_num_in_dec_order_[MAX_FRAME_NUM];
|
||||
int num_alloced_frames_ = 0;
|
||||
int last_decode_surf_idx_ = 0;
|
||||
std::ostringstream input_video_info_str_;
|
||||
int bitdepth_minus_8_ = 0;
|
||||
uint32_t byte_per_pixel_ = 1;
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user