Fix the performance issue introduced after PR#192 (#220)
* Fix the performance issue introduced after PR#192 * Destroy the current interop memories before reconfiguration * initialize the interop to 0 after unmapping a surface * code clean up * check the mapped device mem/ext mem to be valid before destrying them * code cleanup - add some comments * make changes based on the reviewer comments
This commit is contained in:
gecommit door
GitHub
bovenliggende
d4e3dac5e6
commit
929123abff
@@ -25,7 +25,23 @@ THE SOFTWARE.
|
||||
|
||||
RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_decoder_{decoder_create_info}, decoder_create_info_{decoder_create_info} {}
|
||||
|
||||
RocDecoder::~RocDecoder() {}
|
||||
RocDecoder::~RocDecoder() {
|
||||
// clean up the VA-API/HIP interop memories
|
||||
for(auto i = 0; i < hip_interop_.size(); i++) {
|
||||
if (hip_interop_[i].hip_mapped_device_mem != nullptr) {
|
||||
hipError_t hip_status = hipFree(hip_interop_[i].hip_mapped_device_mem);
|
||||
if (hip_status != hipSuccess) {
|
||||
ERR("ERROR: hipFree failed for picture id " + TOSTR(i));
|
||||
}
|
||||
}
|
||||
if (hip_interop_[i].hip_ext_mem != nullptr) {
|
||||
hipError_t hip_status = hipDestroyExternalMemory(hip_interop_[i].hip_ext_mem);
|
||||
if (hip_status != hipSuccess) {
|
||||
ERR("ERROR: hipDestroyExternalMemory failed for picture id " + TOSTR(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rocDecStatus RocDecoder::InitializeDecoder() {
|
||||
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
|
||||
@@ -38,7 +54,10 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco
|
||||
ERR("ERROR: invalid number of decode surfaces ");
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
hip_ext_mem_.resize(decoder_create_info_.num_decode_surfaces);
|
||||
hip_interop_.resize(decoder_create_info_.num_decode_surfaces);
|
||||
for (auto i = 0; i < hip_interop_.size(); i++) {
|
||||
memset((void *)&hip_interop_[i], 0, sizeof(hip_interop_[i]));
|
||||
}
|
||||
|
||||
rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName);
|
||||
if (rocdec_status != ROCDEC_SUCCESS) {
|
||||
@@ -72,7 +91,15 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
|
||||
if (reconfig_params == nullptr) {
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
rocDecStatus rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
|
||||
rocDecStatus rocdec_status;
|
||||
for (int pic_idx = 0; pic_idx < hip_interop_.size(); pic_idx++) {
|
||||
rocdec_status = UnMapVideoFrame(pic_idx);
|
||||
if (rocdec_status != ROCDEC_SUCCESS) {
|
||||
ERR("ERROR: Unmapping the video frame for picture idx " + TOSTR(pic_idx) + " failed during reconfiguration!");
|
||||
return rocdec_status;
|
||||
}
|
||||
}
|
||||
rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
|
||||
if (rocdec_status != ROCDEC_SUCCESS) {
|
||||
ERR("ERROR: Reconfiguration of the decoder failed with rocDecStatus# " + TOSTR(rocdec_status));
|
||||
return rocdec_status;
|
||||
@@ -81,49 +108,81 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
|
||||
}
|
||||
|
||||
rocDecStatus RocDecoder::MapVideoFrame(int pic_idx, void *dev_mem_ptr[3], uint32_t horizontal_pitch[3], RocdecProcParams *vid_postproc_params) {
|
||||
if (pic_idx >= hip_ext_mem_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
|
||||
if (pic_idx >= hip_interop_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc_ = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc_ = {};
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
|
||||
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
|
||||
// wait on current surface to make sure that it is ready for the HIP interop
|
||||
rocdec_status = va_video_decoder_.SyncSurface(pic_idx);
|
||||
if (rocdec_status != ROCDEC_SUCCESS) {
|
||||
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
|
||||
ERR("ERROR: Failed to export surface for picture id = " + TOSTR(pic_idx));
|
||||
return rocdec_status;
|
||||
}
|
||||
|
||||
external_mem_handle_desc_.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc_.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc_.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipImportExternalMemory(&hip_ext_mem_[pic_idx], &external_mem_handle_desc_));
|
||||
// do the VA-API/HIP interop once per surface and save it for reusing
|
||||
if (hip_interop_[pic_idx].hip_mapped_device_mem == nullptr) {
|
||||
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
|
||||
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
|
||||
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
|
||||
|
||||
external_mem_buffer_desc_.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer(&*&dev_mem_ptr[0], hip_ext_mem_[pic_idx], &external_mem_buffer_desc_));
|
||||
horizontal_pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
if (va_drm_prime_surface_desc.num_layers == 2) {
|
||||
*&dev_mem_ptr[1] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
horizontal_pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
} else if (va_drm_prime_surface_desc.num_layers == 3) {
|
||||
*&dev_mem_ptr[2] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
horizontal_pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
|
||||
if (rocdec_status != ROCDEC_SUCCESS) {
|
||||
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
|
||||
return rocdec_status;
|
||||
}
|
||||
|
||||
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
|
||||
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
|
||||
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
|
||||
CHECK_HIP(hipImportExternalMemory(&hip_interop_[pic_idx].hip_ext_mem, &external_mem_handle_desc));
|
||||
|
||||
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
|
||||
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_[pic_idx].hip_mapped_device_mem, hip_interop_[pic_idx].hip_ext_mem, &external_mem_buffer_desc));
|
||||
|
||||
hip_interop_[pic_idx].width = va_drm_prime_surface_desc.width;
|
||||
hip_interop_[pic_idx].height = va_drm_prime_surface_desc.height;
|
||||
|
||||
hip_interop_[pic_idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
|
||||
hip_interop_[pic_idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
|
||||
hip_interop_[pic_idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
|
||||
|
||||
hip_interop_[pic_idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
|
||||
hip_interop_[pic_idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
|
||||
hip_interop_[pic_idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
|
||||
|
||||
hip_interop_[pic_idx].num_layers = va_drm_prime_surface_desc.num_layers;
|
||||
|
||||
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
|
||||
close(va_drm_prime_surface_desc.objects[i].fd);
|
||||
*&dev_mem_ptr[0] = hip_interop_[pic_idx].hip_mapped_device_mem;
|
||||
horizontal_pitch[0] = hip_interop_[pic_idx].pitch[0];
|
||||
if (hip_interop_[pic_idx].num_layers == 2) {
|
||||
*&dev_mem_ptr[1] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[1];
|
||||
horizontal_pitch[1] = hip_interop_[pic_idx].pitch[1];
|
||||
} else if (hip_interop_[pic_idx].num_layers == 3) {
|
||||
*&dev_mem_ptr[2] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[2];
|
||||
horizontal_pitch[2] = hip_interop_[pic_idx].pitch[2];
|
||||
}
|
||||
|
||||
return rocdec_status;
|
||||
}
|
||||
|
||||
rocDecStatus RocDecoder::UnMapVideoFrame(int pic_idx) {
|
||||
if (pic_idx >= hip_ext_mem_.size()) {
|
||||
if (pic_idx >= hip_interop_.size()) {
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
CHECK_HIP(hipDestroyExternalMemory(hip_ext_mem_[pic_idx]));
|
||||
if (hip_interop_[pic_idx].hip_mapped_device_mem != nullptr)
|
||||
CHECK_HIP(hipFree(hip_interop_[pic_idx].hip_mapped_device_mem));
|
||||
if (hip_interop_[pic_idx].hip_ext_mem != nullptr)
|
||||
CHECK_HIP(hipDestroyExternalMemory(hip_interop_[pic_idx].hip_ext_mem));
|
||||
|
||||
memset((void *)&hip_interop_[pic_idx], 0, sizeof(hip_interop_[pic_idx]));
|
||||
|
||||
return ROCDEC_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -42,6 +42,16 @@ THE SOFTWARE.
|
||||
}\
|
||||
}
|
||||
|
||||
struct HipInteropDeviceMem {
|
||||
hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop
|
||||
uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane
|
||||
uint32_t width; // Width of the surface in pixels.
|
||||
uint32_t height; // Height of the surface in pixels.
|
||||
uint32_t offset[3]; // Offset of each plane
|
||||
uint32_t pitch[3]; // Pitch of each plane
|
||||
uint32_t num_layers; // Number of layers making up the surface
|
||||
};
|
||||
|
||||
class RocDecoder {
|
||||
public:
|
||||
RocDecoder(RocDecoderCreateInfo &decoder_create_info);
|
||||
@@ -59,5 +69,5 @@ private:
|
||||
RocDecoderCreateInfo decoder_create_info_;
|
||||
VaapiVideoDecoder va_video_decoder_;
|
||||
hipDeviceProp_t hip_dev_prop_;
|
||||
std::vector<hipExternalMemory_t> hip_ext_mem_;
|
||||
std::vector<HipInteropDeviceMem> hip_interop_;
|
||||
};
|
||||
@@ -327,25 +327,6 @@ rocDecStatus VaapiVideoDecoder::ExportSurface(int pic_idx, VADRMPRIMESurfaceDesc
|
||||
if (pic_idx >= va_surface_ids_.size()) {
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
VASurfaceStatus surface_status;
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
|
||||
while (surface_status != VASurfaceReady) {
|
||||
VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]);
|
||||
/* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns
|
||||
* VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from
|
||||
* various reasons, we treat it as non-fatal and contiue waiting.
|
||||
*/
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
|
||||
} else {
|
||||
std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
return ROCDEC_RUNTIME_ERROR;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, va_surface_ids_[pic_idx],
|
||||
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
|
||||
VA_EXPORT_SURFACE_READ_ONLY |
|
||||
@@ -384,4 +365,30 @@ rocDecStatus VaapiVideoDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo
|
||||
return rocdec_status;
|
||||
}
|
||||
return rocdec_status;
|
||||
}
|
||||
|
||||
rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) {
|
||||
if (pic_idx >= va_surface_ids_.size()) {
|
||||
return ROCDEC_INVALID_PARAMETER;
|
||||
}
|
||||
VASurfaceStatus surface_status;
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
|
||||
while (surface_status != VASurfaceReady) {
|
||||
VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]);
|
||||
/* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns
|
||||
* VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from
|
||||
* various reasons, we treat it as non-fatal and contiue waiting.
|
||||
*/
|
||||
if (va_status != VA_STATUS_SUCCESS) {
|
||||
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
|
||||
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
|
||||
} else {
|
||||
std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
return ROCDEC_RUNTIME_ERROR;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ROCDEC_SUCCESS;
|
||||
}
|
||||
@@ -50,6 +50,7 @@ public:
|
||||
rocDecStatus SubmitDecode(RocdecPicParams *pPicParams);
|
||||
rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status);
|
||||
rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
|
||||
rocDecStatus SyncSurface(int pic_idx);
|
||||
rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params);
|
||||
private:
|
||||
RocDecoderCreateInfo decoder_create_info_;
|
||||
|
||||
@@ -729,10 +729,6 @@ int RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(hip_stream_));
|
||||
if(src_dev_ptr[0] != nullptr) {
|
||||
HIP_API_CALL(hipFree(src_dev_ptr[0]));
|
||||
}
|
||||
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, pDispInfo->picture_index));
|
||||
}
|
||||
} else {
|
||||
RocdecDecodeStatus dec_status;
|
||||
@@ -849,10 +845,6 @@ bool RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) {
|
||||
std::cerr << "Decoded Frame is released out of order" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (mapped_frame_ptr != nullptr) {
|
||||
HIP_API_CALL(hipFree(mapped_frame_ptr));
|
||||
}
|
||||
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index));
|
||||
// pop decoded frame
|
||||
vp_frames_q_.pop();
|
||||
}
|
||||
@@ -872,13 +864,6 @@ bool RocVideoDecoder::ReleaseInternalFrames() {
|
||||
// only needed when using internal mapped buffer
|
||||
while (!vp_frames_q_.empty()) {
|
||||
std::lock_guard<std::mutex> lock(mtx_vp_frame_);
|
||||
DecFrameBuffer *fb = &vp_frames_q_.front();
|
||||
void *mapped_frame_ptr = fb->frame_ptr;
|
||||
|
||||
if (mapped_frame_ptr != nullptr) {
|
||||
HIP_API_CALL(hipFree(mapped_frame_ptr));
|
||||
}
|
||||
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index));
|
||||
// pop decoded frame
|
||||
vp_frames_q_.pop();
|
||||
}
|
||||
@@ -896,7 +881,7 @@ void RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void *surf_m
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem, output_image_size);
|
||||
if (hip_status != hipSuccess) {
|
||||
std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl;
|
||||
std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) << ")" << std::endl;
|
||||
delete [] hst_ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
Verwijs in nieuw issue
Block a user