diff --git a/src/rocdecode/roc_decoder.cpp b/src/rocdecode/roc_decoder.cpp index 3602d3967b..c5d619e683 100644 --- a/src/rocdecode/roc_decoder.cpp +++ b/src/rocdecode/roc_decoder.cpp @@ -25,7 +25,23 @@ THE SOFTWARE. RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_decoder_{decoder_create_info}, decoder_create_info_{decoder_create_info} {} - RocDecoder::~RocDecoder() {} + RocDecoder::~RocDecoder() { + // clean up the VA-API/HIP interop memories + for(auto i = 0; i < hip_interop_.size(); i++) { + if (hip_interop_[i].hip_mapped_device_mem != nullptr) { + hipError_t hip_status = hipFree(hip_interop_[i].hip_mapped_device_mem); + if (hip_status != hipSuccess) { + ERR("ERROR: hipFree failed for picture id " + TOSTR(i)); + } + } + if (hip_interop_[i].hip_ext_mem != nullptr) { + hipError_t hip_status = hipDestroyExternalMemory(hip_interop_[i].hip_ext_mem); + if (hip_status != hipSuccess) { + ERR("ERROR: hipDestroyExternalMemory failed for picture id " + TOSTR(i)); + } + } + } + } rocDecStatus RocDecoder::InitializeDecoder() { rocDecStatus rocdec_status = ROCDEC_SUCCESS; @@ -38,7 +54,10 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco ERR("ERROR: invalid number of decode surfaces "); return ROCDEC_INVALID_PARAMETER; } - hip_ext_mem_.resize(decoder_create_info_.num_decode_surfaces); + hip_interop_.resize(decoder_create_info_.num_decode_surfaces); + for (auto i = 0; i < hip_interop_.size(); i++) { + memset((void *)&hip_interop_[i], 0, sizeof(hip_interop_[i])); + } rocdec_status = va_video_decoder_.InitializeDecoder(hip_dev_prop_.gcnArchName); if (rocdec_status != ROCDEC_SUCCESS) { @@ -72,7 +91,15 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf if (reconfig_params == nullptr) { return ROCDEC_INVALID_PARAMETER; } - rocDecStatus rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params); + rocDecStatus rocdec_status; + for (int pic_idx = 0; pic_idx < hip_interop_.size(); pic_idx++) { + rocdec_status = UnMapVideoFrame(pic_idx); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("ERROR: Unmapping the video frame for picture idx " + TOSTR(pic_idx) + " failed during reconfiguration!"); + return rocdec_status; + } + } + rocdec_status = va_video_decoder_.ReconfigureDecoder(reconfig_params); if (rocdec_status != ROCDEC_SUCCESS) { ERR("ERROR: Reconfiguration of the decoder failed with rocDecStatus# " + TOSTR(rocdec_status)); return rocdec_status; @@ -81,49 +108,81 @@ rocDecStatus RocDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconf } rocDecStatus RocDecoder::MapVideoFrame(int pic_idx, void *dev_mem_ptr[3], uint32_t horizontal_pitch[3], RocdecProcParams *vid_postproc_params) { - if (pic_idx >= hip_ext_mem_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) { + if (pic_idx >= hip_interop_.size() || &dev_mem_ptr[0] == nullptr || vid_postproc_params == nullptr) { return ROCDEC_INVALID_PARAMETER; } rocDecStatus rocdec_status = ROCDEC_SUCCESS; - hipExternalMemoryHandleDesc external_mem_handle_desc_ = {}; - hipExternalMemoryBufferDesc external_mem_buffer_desc_ = {}; - VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; - rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc); + // wait on current surface to make sure that it is ready for the HIP interop + rocdec_status = va_video_decoder_.SyncSurface(pic_idx); if (rocdec_status != ROCDEC_SUCCESS) { - ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status)); + ERR("ERROR: Failed to export surface for picture id = " + TOSTR(pic_idx)); return rocdec_status; } - external_mem_handle_desc_.type = hipExternalMemoryHandleTypeOpaqueFd; - external_mem_handle_desc_.handle.fd = va_drm_prime_surface_desc.objects[0].fd; - external_mem_handle_desc_.size = va_drm_prime_surface_desc.objects[0].size; - CHECK_HIP(hipImportExternalMemory(&hip_ext_mem_[pic_idx], &external_mem_handle_desc_)); + // do the VA-API/HIP interop once per surface and save it for reusing + if (hip_interop_[pic_idx].hip_mapped_device_mem == nullptr) { + hipExternalMemoryHandleDesc external_mem_handle_desc = {}; + hipExternalMemoryBufferDesc external_mem_buffer_desc = {}; + VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {}; - external_mem_buffer_desc_.size = va_drm_prime_surface_desc.objects[0].size; - CHECK_HIP(hipExternalMemoryGetMappedBuffer(&*&dev_mem_ptr[0], hip_ext_mem_[pic_idx], &external_mem_buffer_desc_)); - horizontal_pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; - if (va_drm_prime_surface_desc.num_layers == 2) { - *&dev_mem_ptr[1] = static_cast(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[1].offset[0]; - horizontal_pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; - } else if (va_drm_prime_surface_desc.num_layers == 3) { - *&dev_mem_ptr[2] = static_cast(*&dev_mem_ptr[0]) + va_drm_prime_surface_desc.layers[2].offset[0]; - horizontal_pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; + rocdec_status = va_video_decoder_.ExportSurface(pic_idx, va_drm_prime_surface_desc); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("ERROR: Failed to export surface for picture id" + TOSTR(pic_idx) + " , with rocDecStatus# " + TOSTR(rocdec_status)); + return rocdec_status; + } + + external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd; + external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd; + external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size; + + CHECK_HIP(hipImportExternalMemory(&hip_interop_[pic_idx].hip_ext_mem, &external_mem_handle_desc)); + + external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size; + CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_[pic_idx].hip_mapped_device_mem, hip_interop_[pic_idx].hip_ext_mem, &external_mem_buffer_desc)); + + hip_interop_[pic_idx].width = va_drm_prime_surface_desc.width; + hip_interop_[pic_idx].height = va_drm_prime_surface_desc.height; + + hip_interop_[pic_idx].offset[0] = va_drm_prime_surface_desc.layers[0].offset[0]; + hip_interop_[pic_idx].offset[1] = va_drm_prime_surface_desc.layers[1].offset[0]; + hip_interop_[pic_idx].offset[2] = va_drm_prime_surface_desc.layers[2].offset[0]; + + hip_interop_[pic_idx].pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0]; + hip_interop_[pic_idx].pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0]; + hip_interop_[pic_idx].pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0]; + + hip_interop_[pic_idx].num_layers = va_drm_prime_surface_desc.num_layers; + + for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { + close(va_drm_prime_surface_desc.objects[i].fd); + } } - for (auto i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) { - close(va_drm_prime_surface_desc.objects[i].fd); + *&dev_mem_ptr[0] = hip_interop_[pic_idx].hip_mapped_device_mem; + horizontal_pitch[0] = hip_interop_[pic_idx].pitch[0]; + if (hip_interop_[pic_idx].num_layers == 2) { + *&dev_mem_ptr[1] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[1]; + horizontal_pitch[1] = hip_interop_[pic_idx].pitch[1]; + } else if (hip_interop_[pic_idx].num_layers == 3) { + *&dev_mem_ptr[2] = hip_interop_[pic_idx].hip_mapped_device_mem + hip_interop_[pic_idx].offset[2]; + horizontal_pitch[2] = hip_interop_[pic_idx].pitch[2]; } return rocdec_status; } rocDecStatus RocDecoder::UnMapVideoFrame(int pic_idx) { - if (pic_idx >= hip_ext_mem_.size()) { + if (pic_idx >= hip_interop_.size()) { return ROCDEC_INVALID_PARAMETER; } - CHECK_HIP(hipDestroyExternalMemory(hip_ext_mem_[pic_idx])); + if (hip_interop_[pic_idx].hip_mapped_device_mem != nullptr) + CHECK_HIP(hipFree(hip_interop_[pic_idx].hip_mapped_device_mem)); + if (hip_interop_[pic_idx].hip_ext_mem != nullptr) + CHECK_HIP(hipDestroyExternalMemory(hip_interop_[pic_idx].hip_ext_mem)); + + memset((void *)&hip_interop_[pic_idx], 0, sizeof(hip_interop_[pic_idx])); return ROCDEC_SUCCESS; } diff --git a/src/rocdecode/roc_decoder.h b/src/rocdecode/roc_decoder.h index 7bc705ecb8..770f7f60f3 100644 --- a/src/rocdecode/roc_decoder.h +++ b/src/rocdecode/roc_decoder.h @@ -42,6 +42,16 @@ THE SOFTWARE. }\ } +struct HipInteropDeviceMem { + hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop + uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane + uint32_t width; // Width of the surface in pixels. + uint32_t height; // Height of the surface in pixels. + uint32_t offset[3]; // Offset of each plane + uint32_t pitch[3]; // Pitch of each plane + uint32_t num_layers; // Number of layers making up the surface +}; + class RocDecoder { public: RocDecoder(RocDecoderCreateInfo &decoder_create_info); @@ -59,5 +69,5 @@ private: RocDecoderCreateInfo decoder_create_info_; VaapiVideoDecoder va_video_decoder_; hipDeviceProp_t hip_dev_prop_; - std::vector hip_ext_mem_; + std::vector hip_interop_; }; \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index 05e409d0ee..5073dbf7c1 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -327,25 +327,6 @@ rocDecStatus VaapiVideoDecoder::ExportSurface(int pic_idx, VADRMPRIMESurfaceDesc if (pic_idx >= va_surface_ids_.size()) { return ROCDEC_INVALID_PARAMETER; } - VASurfaceStatus surface_status; - CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); - while (surface_status != VASurfaceReady) { - VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]); - /* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns - * VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from - * various reasons, we treat it as non-fatal and contiue waiting. - */ - if (va_status != VA_STATUS_SUCCESS) { - if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) { - CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); - } else { - std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl; - return ROCDEC_RUNTIME_ERROR; - } - } else { - break; - } - } CHECK_VAAPI(vaExportSurfaceHandle(va_display_, va_surface_ids_[pic_idx], VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, VA_EXPORT_SURFACE_READ_ONLY | @@ -384,4 +365,30 @@ rocDecStatus VaapiVideoDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo return rocdec_status; } return rocdec_status; +} + +rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) { + if (pic_idx >= va_surface_ids_.size()) { + return ROCDEC_INVALID_PARAMETER; + } + VASurfaceStatus surface_status; + CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); + while (surface_status != VASurfaceReady) { + VAStatus va_status = vaSyncSurface(va_display_, va_surface_ids_[pic_idx]); + /* Current implementation of vaSyncSurface() does not block indefinitely (contrary to VA-API spec), it returns + * VA_STATUS_ERROR_TIMEDOUT error when it blocks for a certain amount of time. Although time out can come from + * various reasons, we treat it as non-fatal and contiue waiting. + */ + if (va_status != VA_STATUS_SUCCESS) { + if (va_status == 0x26 /*VA_STATUS_ERROR_TIMEDOUT*/) { + CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); + } else { + std::cout << "VAAPI failure: vaSyncSurface() failed with error code: " << va_status << "', status: " << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl; + return ROCDEC_RUNTIME_ERROR; + } + } else { + break; + } + } + return ROCDEC_SUCCESS; } \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 15c5ac5f55..5550242c51 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -50,6 +50,7 @@ public: rocDecStatus SubmitDecode(RocdecPicParams *pPicParams); rocDecStatus GetDecodeStatus(int pic_idx, RocdecDecodeStatus* decode_status); rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); + rocDecStatus SyncSurface(int pic_idx); rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params); private: RocDecoderCreateInfo decoder_create_info_; diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index 93abb8fc99..216e831ceb 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -729,10 +729,6 @@ int RocVideoDecoder::HandlePictureDisplay(RocdecParserDispInfo *pDispInfo) { } HIP_API_CALL(hipStreamSynchronize(hip_stream_)); - if(src_dev_ptr[0] != nullptr) { - HIP_API_CALL(hipFree(src_dev_ptr[0])); - } - ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, pDispInfo->picture_index)); } } else { RocdecDecodeStatus dec_status; @@ -849,10 +845,6 @@ bool RocVideoDecoder::ReleaseFrame(int64_t pTimestamp, bool b_flushing) { std::cerr << "Decoded Frame is released out of order" << std::endl; return false; } - if (mapped_frame_ptr != nullptr) { - HIP_API_CALL(hipFree(mapped_frame_ptr)); - } - ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index)); // pop decoded frame vp_frames_q_.pop(); } @@ -872,13 +864,6 @@ bool RocVideoDecoder::ReleaseInternalFrames() { // only needed when using internal mapped buffer while (!vp_frames_q_.empty()) { std::lock_guard lock(mtx_vp_frame_); - DecFrameBuffer *fb = &vp_frames_q_.front(); - void *mapped_frame_ptr = fb->frame_ptr; - - if (mapped_frame_ptr != nullptr) { - HIP_API_CALL(hipFree(mapped_frame_ptr)); - } - ROCDEC_API_CALL(rocDecUnMapVideoFrame(roc_decoder_, fb->picture_index)); // pop decoded frame vp_frames_q_.pop(); } @@ -896,7 +881,7 @@ void RocVideoDecoder::SaveFrameToFile(std::string output_file_name, void *surf_m hipError_t hip_status = hipSuccess; hip_status = hipMemcpyDtoH((void *)hst_ptr, surf_mem, output_image_size); if (hip_status != hipSuccess) { - std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hip_status << ")" << std::endl; + std::cerr << "ERROR: hipMemcpyDtoH failed! (" << hipGetErrorName(hip_status) << ")" << std::endl; delete [] hst_ptr; return; }