Fix the performance issue introduced after PR#192 (#220)

* Fix the performance issue introduced after PR#192

* Destroy the current interop memories before reconfiguration

* initialize the interop to 0 after unmapping a surface

* code clean up

* check the mapped device mem/ext mem to be valid before destrying them

* code cleanup - add some comments

* make changes based on the reviewer comments
This commit is contained in:
Aryan Salmanpour
2024-02-05 17:12:39 -05:00
gecommit door GitHub
bovenliggende d4e3dac5e6
commit 929123abff
5 gewijzigde bestanden met toevoegingen van 124 en 62 verwijderingen
+85 -26
Bestand weergeven
@@ -25,7 +25,23 @@ THE SOFTWARE.
RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_decoder_{decoder_create_info}, decoder_create_info_{decoder_create_info} {}
RocDecoder::~RocDecoder() {}
RocDecoder::~RocDecoder() {
// clean up the VA-API/HIP interop memories
for(auto i = 0; i < hip_interop_.size(); i++) {
if (hip_interop_[i].hip_mapped_device_mem != nullptr) {
hipError_t hip_status = hipFree(hip_interop_[i].hip_mapped_device_mem);
if (hip_status != hipSuccess) {
ERR("ERROR: hipFree failed for picture id " + TOSTR(i));
}
}
if (hip_interop_[i].hip_ext_mem != nullptr) {
hipError_t hip_status = hipDestroyExternalMemory(hip_interop_[i].hip_ext_mem);
if (hip_status != hipSuccess) {
ERR("ERROR: hipDestroyExternalMemory failed for picture id " + TOSTR(i));
}
}
}
}
rocDecStatus RocDecoder::InitializeDecoder() {
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
@@ -38,7 +54,10 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco
ERR("ERROR: invalid number of decode surfaces ");
return ROCDEC_INVALID_PARAMETER;
}
hip_ext_mem_.resize(decoder_create_info_.num_decode_surfaces);
hip_interop_.resize(decoder_create_info_.num_decode_surfaces);
for (auto i = 0; i < hip_interop_.size(); i++) {
memset((void *)&hip_interop_[i], 0, sizeof(hip_interop_[i]));
}
rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName);
if (rocdec_status != ROCDEC_SUCCESS) {
@@ -72,7 +91,15 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
if (reconfig_params == nullptr) {
return ROCDEC_INVALID_PARAMETER;
}
rocDecStatus rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
rocDecStatus rocdec_status;
for (int pic_idx = 0; pic_idx < hip_interop_.size(); pic_idx++) {
rocdec_status = UnMapVideoFrame(pic_idx);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Unmapping the video frame for picture idx " + TOSTR(pic_idx) + " failed during reconfiguration!");
return rocdec_status;
}
}
rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Reconfiguration of the decoder failed with rocDecStatus# " + TOSTR(rocdec_status));
return rocdec_status;
@@ -81,49 +108,81 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf
}
rocDecStatus RocDecoder::MapVideoFrame(int pic_idx, void *dev_mem_ptr[3], uint32_t horizontal_pitch[3], RocdecProcParams *vid_postproc_params) {
if (pic_idx >= hip_ext_mem_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
if (pic_idx >= hip_interop_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) {
return ROCDEC_INVALID_PARAMETER;
}
rocDecStatus rocdec_status = ROCDEC_SUCCESS;
hipExternalMemoryHandleDesc external_mem_handle_desc_ = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc_ = {};
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
// wait on current surface to make sure that it is ready for the HIP interop
rocdec_status = va_video_decoder_.SyncSurface(pic_idx);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
ERR("ERROR: Failed to export surface for picture id = " + TOSTR(pic_idx));
return rocdec_status;
}
external_mem_handle_desc_.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc_.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc_.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_ext_mem_[pic_idx], &external_mem_handle_desc_));
// do the VA-API/HIP interop once per surface and save it for reusing
if (hip_interop_[pic_idx].hip_mapped_device_mem == nullptr) {
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
external_mem_buffer_desc_.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer(&*&dev_mem_ptr[0], hip_ext_mem_[pic_idx], &external_mem_buffer_desc_));
horizontal_pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
if (va_drm_prime_surface_desc.num_layers == 2) {
*&dev_mem_ptr[1] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[1].offset[0];
horizontal_pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
} else if (va_drm_prime_surface_desc.num_layers == 3) {
*&dev_mem_ptr[2] = static_cast<uint8_t*>(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[2].offset[0];
horizontal_pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc);
if (rocdec_status != ROCDEC_SUCCESS) {
ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status));
return rocdec_status;
}
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_interop_[pic_idx].hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_[pic_idx].hip_mapped_device_mem, hip_interop_[pic_idx].hip_ext_mem, &external_mem_buffer_desc));
hip_interop_[pic_idx].width = va_drm_prime_surface_desc.width;
hip_interop_[pic_idx].height = va_drm_prime_surface_desc.height;
hip_interop_[pic_idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
hip_interop_[pic_idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
hip_interop_[pic_idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
hip_interop_[pic_idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
hip_interop_[pic_idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
hip_interop_[pic_idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
hip_interop_[pic_idx].num_layers = va_drm_prime_surface_desc.num_layers;
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
}
for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
*&dev_mem_ptr[0] = hip_interop_[pic_idx].hip_mapped_device_mem;
horizontal_pitch[0] = hip_interop_[pic_idx].pitch[0];
if (hip_interop_[pic_idx].num_layers == 2) {
*&dev_mem_ptr[1] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[1];
horizontal_pitch[1] = hip_interop_[pic_idx].pitch[1];
} else if (hip_interop_[pic_idx].num_layers == 3) {
*&dev_mem_ptr[2] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[2];
horizontal_pitch[2] = hip_interop_[pic_idx].pitch[2];
}
return rocdec_status;
}
rocDecStatus RocDecoder::UnMapVideoFrame(int pic_idx) {
if (pic_idx >= hip_ext_mem_.size()) {
if (pic_idx >= hip_interop_.size()) {
return ROCDEC_INVALID_PARAMETER;
}
CHECK_HIP(hipDestroyExternalMemory(hip_ext_mem_[pic_idx]));
if (hip_interop_[pic_idx].hip_mapped_device_mem != nullptr)
CHECK_HIP(hipFree(hip_interop_[pic_idx].hip_mapped_device_mem));
if (hip_interop_[pic_idx].hip_ext_mem != nullptr)
CHECK_HIP(hipDestroyExternalMemory(hip_interop_[pic_idx].hip_ext_mem));
memset((void *)&hip_interop_[pic_idx], 0, sizeof(hip_interop_[pic_idx]));
return ROCDEC_SUCCESS;
}
+11 -1
Bestand weergeven
@@ -42,6 +42,16 @@ THE SOFTWARE.
}\
}
struct HipInteropDeviceMem {
hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop
uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane
uint32_t width; // Width of the surface in pixels.
uint32_t height; // Height of the surface in pixels.
uint32_t offset[3]; // Offset of each plane
uint32_t pitch[3]; // Pitch of each plane
uint32_t num_layers; // Number of layers making up the surface
};
class RocDecoder {
public:
RocDecoder(RocDecoderCreateInfo &decoder_create_info);
@@ -59,5 +69,5 @@ private:
RocDecoderCreateInfo decoder_create_info_;
VaapiVideoDecoder va_video_decoder_;
hipDeviceProp_t hip_dev_prop_;
std::vector<hipExternalMemory_t> hip_ext_mem_;
std::vector<HipInteropDeviceMem> hip_interop_;
};
@@ -327,25 +327,6 @@ rocDecStatus VaapiVideoDecoder::ExportSurface(int pic_idx, VADRMPRIMESurfaceDesc
if (pic_idx >= va_surface_ids_.size()) {
return ROCDEC_INVALID_PARAMETER;
}
VASurfaceStatus surface_status;
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
while (surface_status != VASurfaceReady) {
VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]);
/* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns
* VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from
* various reasons, we treat it as non-fatal and contiue waiting.
*/
if (va_status != VA_STATUS_SUCCESS) {
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
} else {
std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
return ROCDEC_RUNTIME_ERROR;
}
} else {
break;
}
}
CHECK_VAAPI(vaExportSurfaceHandle(va_display_, va_surface_ids_[pic_idx],
VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
VA_EXPORT_SURFACE_READ_ONLY |
@@ -384,4 +365,30 @@ rocDecStatus VaapiVideoDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo
return rocdec_status;
}
return rocdec_status;
}
rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) {
if (pic_idx >= va_surface_ids_.size()) {
return ROCDEC_INVALID_PARAMETER;
}
VASurfaceStatus surface_status;
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
while (surface_status != VASurfaceReady) {
VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]);
/* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns
* VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from
* various reasons, we treat it as non-fatal and contiue waiting.
*/
if (va_status != VA_STATUS_SUCCESS) {
if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) {
CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status));
} else {
std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;
return ROCDEC_RUNTIME_ERROR;
}
} else {
break;
}
}
return ROCDEC_SUCCESS;
}
@@ -50,6 +50,7 @@ public:
rocDecStatus SubmitDecode(RocdecPicParams *pPicParams);
rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status);
rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc);
rocDecStatus SyncSurface(int pic_idx);
rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params);
private:
RocDecoderCreateInfo decoder_create_info_;
+1 -16
Bestand weergeven
@@ -729,10 +729,6 @@ int RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) {
}
HIP_API_CALL(hipStreamSynchronize(hip_stream_));
if(src_dev_ptr[0] != nullptr) {
HIP_API_CALL(hipFree(src_dev_ptr[0]));
}
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, pDispInfo->picture_index));
}
} else {
RocdecDecodeStatus dec_status;
@@ -849,10 +845,6 @@ bool RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) {
std::cerr << "Decoded Frame is released out of order" << std::endl;
return false;
}
if (mapped_frame_ptr != nullptr) {
HIP_API_CALL(hipFree(mapped_frame_ptr));
}
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index));
// pop decoded frame
vp_frames_q_.pop();
}
@@ -872,13 +864,6 @@ bool RocVideoDecoder::ReleaseInternalFrames() {
// only needed when using internal mapped buffer
while (!vp_frames_q_.empty()) {
std::lock_guard<std::mutex> lock(mtx_vp_frame_);
DecFrameBuffer *fb = &vp_frames_q_.front();
void *mapped_frame_ptr = fb->frame_ptr;
if (mapped_frame_ptr != nullptr) {
HIP_API_CALL(hipFree(mapped_frame_ptr));
}
ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index));
// pop decoded frame
vp_frames_q_.pop();
}
@@ -896,7 +881,7 @@ void RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void *surf_m
hipError_t hip_status = hipSuccess;
hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem, output_image_size);
if (hip_status != hipSuccess) {
std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl;
std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) << ")" << std::endl;
delete [] hst_ptr;
return;
}