Fix the performance issue introduced after PR#192 (#220)

* Fix the performance issue introduced after PR#192

* Destroy the current interop memories before reconfiguration

* initialize the interop to 0 after unmapping a surface

* code clean up

* check the mapped device mem/ext mem to be valid before destrying them

* code cleanup - add some comments

* make changes based on the reviewer comments
This commit is contained in:
Aryan Salmanpour
2024-02-05 17:12:39 -05:00
zatwierdzone przez GitHub
rodzic d4e3dac5e6
commit 929123abff
5 zmienionych plików z 124 dodań i 62 usunięć
+85 -26
Wyświetl plik
@@ -25,7 +25,23 @@ THE SOFTWARE.
RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_decoder_{decoder_create_info}, decoder_create_info_{decoder_create_info} {}
RocDecoder::~RocDecoder() {}
RocDecoder::~RocDecoder() {
// clean up the VA-API/HIP interop memories
for(auto i = 0; i < hip_interop_.size(); i++) {
if (hip_interop_[i].hip_mapped_device_mem != nullptr) {
hipError_t hip_status = hipFree(hip_interop_[i].hip_mapped_device_mem);
if (hip_status != hipSuccess) {
ERR("ERROR: hipFree failed for picture id " + TOSTR(i));
}
}
if (hip_interop_[i].hip_ext_mem != nullptr) {
hipError_t hip_status = hipDestroyExternalMemory(hip_interop_[i].hip_ext_mem);
if (hip_status != hipSuccess) {
ERR("ERROR: hipDestroyExternalMemory failed for picture id " + TOSTR(i));
}
}
}
}
rocDecStatus RocDecoder::InitializeDecoder() {
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
@@ -38,7 +54,10 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco
ERR("ERROR: invalid number of decode surfaces ");
return ROCDEC_INVALID_PARAMETER;
}
hip_ext_mem_.resize(decoder_create_info_.num_decode_surfaces);
hip_interop_.resize(decoder_create_info_.num_decode_surfaces);
for (auto i = 0; i < hip_interop_.size(); i++) {
memset((void *)&hip_interop_[i], 0, sizeof(hip_interop_[i]));
}
rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName);
if (rocdec_status != ROCDEC_SUCCESS) {
@@ -72,7 +91,15 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
if (reconfig_params == nullptr) {
return ROCDEC_INVALID_PARAMETER;
}
rocDecStatus rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
rocDecStatus rocdec_status;
for (int pic_idx = 0; pic_idx < hip_interop_.size(); pic_idx++) {
rocdec_status = UnMapVideoFrame(pic_idx);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Unmapping the video frame for picture idx " + TOSTR(pic_idx) + " failed during reconfiguration!");
return rocdec_status;
}
}
rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Reconfiguration of the decoder failed with rocDecStatus# " + TOSTR(rocdec_status));
return rocdec_status;
@@ -81,49 +108,81 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
}
rocDecStatus RocDecoder::MapVideoFrame(int pic_idx, void *dev_mem_ptr[3], uint32_t horizontal_pitch[3], RocdecProcParams *vid_postproc_params) {
if (pic_idx >= hip_ext_mem_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
if (pic_idx >= hip_interop_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
return ROCDEC_INVALID_PARAMETER;
}
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
hipExternalMemoryHandleDesc external_mem_handle_desc_ = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc_ = {};
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
// wait on current surface to make sure that it is ready for the HIP interop
rocdec_status = va_video_decoder_.SyncSurface(pic_idx);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
ERR("ERROR: Failed to export surface for picture id = " + TOSTR(pic_idx));
return rocdec_status;
}
external_mem_handle_desc_.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc_.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc_.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_ext_mem_[pic_idx], &external_mem_handle_desc_));
// do the VA-API/HIP interop once per surface and save it for reusing
if (hip_interop_[pic_idx].hip_mapped_device_mem == nullptr) {
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
external_mem_buffer_desc_.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer(&*&dev_mem_ptr[0], hip_ext_mem_[pic_idx], &external_mem_buffer_desc_));
horizontal_pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
if (va_drm_prime_surface_desc.num_layers == 2) {
*&dev_mem_ptr[1] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[1].offset[0];
horizontal_pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
} else if (va_drm_prime_surface_desc.num_layers == 3) {
*&dev_mem_ptr[2] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[2].offset[0];
horizontal_pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
return rocdec_status;
}
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_interop_[pic_idx].hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_[pic_idx].hip_mapped_device_mem, hip_interop_[pic_idx].hip_ext_mem, &external_mem_buffer_desc));
hip_interop_[pic_idx].width = va_drm_prime_surface_desc.width;
hip_interop_[pic_idx].height = va_drm_prime_surface_desc.height;
hip_interop_[pic_idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
hip_interop_[pic_idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
hip_interop_[pic_idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
hip_interop_[pic_idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
hip_interop_[pic_idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
hip_interop_[pic_idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
hip_interop_[pic_idx].num_layers = va_drm_prime_surface_desc.num_layers;
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
}
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
*&dev_mem_ptr[0] = hip_interop_[pic_idx].hip_mapped_device_mem;
horizontal_pitch[0] = hip_interop_[pic_idx].pitch[0];
if (hip_interop_[pic_idx].num_layers == 2) {
*&dev_mem_ptr[1] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[1];
horizontal_pitch[1] = hip_interop_[pic_idx].pitch[1];
} else if (hip_interop_[pic_idx].num_layers == 3) {
*&dev_mem_ptr[2] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[2];
horizontal_pitch[2] = hip_interop_[pic_idx].pitch[2];
}
return rocdec_status;
}
rocDecStatus RocDecoder::UnMapVideoFrame(int pic_idx) {
if (pic_idx >= hip_ext_mem_.size()) {
if (pic_idx >= hip_interop_.size()) {
return ROCDEC_INVALID_PARAMETER;
}
CHECK_HIP(hipDestroyExternalMemory(hip_ext_mem_[pic_idx]));
if (hip_interop_[pic_idx].hip_mapped_device_mem != nullptr)
CHECK_HIP(hipFree(hip_interop_[pic_idx].hip_mapped_device_mem));
if (hip_interop_[pic_idx].hip_ext_mem != nullptr)
CHECK_HIP(hipDestroyExternalMemory(hip_interop_[pic_idx].hip_ext_mem));
memset((void *)&hip_interop_[pic_idx], 0, sizeof(hip_interop_[pic_idx]));
return ROCDEC_SUCCESS;
}