From 6046873054a912adb83f45671a4194e18e396e1d Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Fri, 13 Jun 2025 11:51:26 -0400 Subject: [PATCH] Improve the memory pool logic to enable the reuse of VA-API surfaces (#159) * Improve the memory pool logic to enable the reuse of VA-API surfaces. * Update the version --- CHANGELOG.md | 2 +- CMakeLists.txt | 2 +- src/rocjpeg_decoder.cpp | 57 ++++++++++++++++++----------------- src/rocjpeg_decoder.h | 3 +- src/rocjpeg_vaapi_decoder.cpp | 22 ++++++++------ src/rocjpeg_vaapi_decoder.h | 2 ++ 6 files changed, 48 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74dd4e9319..7029b07697 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ Documentation for rocJPEG is available at [https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/) -## rocJPEG 0.13.0 (unreleased) +## rocJPEG 0.14.0 (unreleased) ## Added * cmake config files diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f242f2a3d..2cd734335a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ endif() # rocJPEG Version # NOTE: package version and rocjpeg_version.h is generated with this version -set(VERSION "0.13.0") +set(VERSION "0.14.0") # Set Project Version and Language project(rocjpeg VERSION ${VERSION} LANGUAGES CXX) diff --git a/src/rocjpeg_decoder.cpp b/src/rocjpeg_decoder.cpp index c09cf13c97..d4a8ffd413 100644 --- a/src/rocjpeg_decoder.cpp +++ b/src/rocjpeg_decoder.cpp @@ -150,16 +150,16 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, picture_height, chroma_height)); // Copy Luma (first channel) for any surface format - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid)); if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) { // Copy the second channel (UV interleaved) for NV12 - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid)); } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P || hip_interop_dev_mem.surface_format == VA_FOURCC_422V) { // Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V) - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid)); - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, destination, decode_params, is_roi_valid)); } break; case ROCJPEG_OUTPUT_YUV_PLANAR: @@ -255,15 +255,15 @@ RocJpegStatus RocJpegDecoder::DecodeBatched(RocJpegStreamHandle *jpeg_streams, i // Copy the native decoded output buffers from interop memory directly to the destination buffers CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, picture_height, chroma_height)); // Copy Luma (first channel) for any surface format - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, &destinations[k + i], &decode_params[k + i], is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, &destinations[k + i], &decode_params[k + i], is_roi_valid)); if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) { // Copy the second channel (UV interleaved) for NV12 - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid)); } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P || hip_interop_dev_mem.surface_format == VA_FOURCC_422V) { // Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V) - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid)); - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, &destinations[k + i], &decode_params[k + i], is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, &destinations[k + i], &decode_params[k + i], is_roi_valid)); } break; case ROCJPEG_OUTPUT_YUV_PLANAR: @@ -368,12 +368,13 @@ RocJpegStatus RocJpegDecoder::GetImageInfo(RocJpegStreamHandle jpeg_stream_handl * The `channel_height` parameter specifies the height of the channel. * * @param hip_interop_dev_mem The `HipInteropDeviceMem` object containing the source channel data. + * @param channel_width The width of the channel to be copied. * @param channel_height The height of the channel to be copied. * @param channel_index The index of the channel to be copied. * @param destination The `RocJpegImage` object representing the destination image. * @return The status of the operation. Returns `ROCJPEG_STATUS_SUCCESS` if the channel was copied successfully. */ -RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_mem, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid) { +RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_mem, uint16_t channel_width, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid) { if (hip_interop_dev_mem.pitch[channel_index] != 0 && destination->pitch[channel_index] != 0 && destination->channel[channel_index] != nullptr) { uint32_t roi_offset = 0; if (is_roi_valid) { @@ -394,24 +395,24 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m uint32_t channel_widths[ROCJPEG_MAX_COMPONENT] = {}; uint32_t roi_width = decode_params->crop_rectangle.right - decode_params->crop_rectangle.left; - bool is_roi_width_valid = roi_width > 0 && roi_width <= hip_interop_dev_mem.width; + bool is_roi_width_valid = roi_width > 0 && roi_width <= channel_width; switch (decode_params->output_format) { case ROCJPEG_OUTPUT_NATIVE: switch (hip_interop_dev_mem.surface_format) { case VA_FOURCC_444P: - channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case VA_FOURCC_422V: - channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case VA_FOURCC_YUY2: - channel_widths[0] = (is_roi_width_valid ? roi_width : hip_interop_dev_mem.width) * 2; + channel_widths[0] = (is_roi_width_valid ? roi_width : channel_width) * 2; break; case VA_FOURCC_NV12: - channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case VA_FOURCC_Y800: - channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; default: ERR("Unknown output format!"); @@ -421,21 +422,21 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m case ROCJPEG_OUTPUT_YUV_PLANAR: switch (hip_interop_dev_mem.surface_format) { case VA_FOURCC_444P: - channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case VA_FOURCC_422V: - channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case VA_FOURCC_YUY2: - channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; channel_widths[2] = channel_widths[1] = channel_widths[0] >> 1; break; case VA_FOURCC_NV12: - channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; channel_widths[2] = channel_widths[1] = channel_widths[0] >> 1; break; case VA_FOURCC_Y800: - channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; default: ERR("Unknown output format!"); @@ -443,13 +444,13 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m } break; case ROCJPEG_OUTPUT_Y: - channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; case ROCJPEG_OUTPUT_RGB: - channel_widths[0] = (is_roi_width_valid ? roi_width : hip_interop_dev_mem.width) * 3; + channel_widths[0] = (is_roi_width_valid ? roi_width : channel_width) * 3; break; case ROCJPEG_OUTPUT_RGB_PLANAR: - channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width; + channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width; break; default: ERR("Unknown output format!"); @@ -615,7 +616,7 @@ RocJpegStatus RocJpegDecoder::ColorConvertToRGBPlanar(HipInteropDeviceMem& hip_i case VA_FOURCC_RGBP: // Copy red, green, and blue channels from the interop memory into the destination for (uint8_t channel_index = 0; channel_index < 3; channel_index++) { - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, channel_index, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, channel_index, destination, decode_params, is_roi_valid)); } break; default: @@ -659,15 +660,15 @@ RocJpegStatus RocJpegDecoder::GetPlanarYUVOutputFormat(HipInteropDeviceMem& hip_ destination->pitch[0], destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + roi_offset, hip_interop_dev_mem.pitch[0]); } else { // Copy Luma - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid)); if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) { // Extract the interleaved UV channels and copy them into the second and third channels of the destination. ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2], destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1] + roi_offset, hip_interop_dev_mem.pitch[1]); } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P || hip_interop_dev_mem.surface_format == VA_FOURCC_422V) { - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid)); - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, destination, decode_params, is_roi_valid)); } } return ROCJPEG_STATUS_SUCCESS; @@ -699,7 +700,7 @@ RocJpegStatus RocJpegDecoder::GetYOutputFormat(HipInteropDeviceMem& hip_interop_ hip_interop_dev_mem.hip_mapped_device_mem + roi_offset, hip_interop_dev_mem.pitch[0]); } else { // Copy Luma - CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid)); + CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid)); } return ROCJPEG_STATUS_SUCCESS; } \ No newline at end of file diff --git a/src/rocjpeg_decoder.h b/src/rocjpeg_decoder.h index 09daf8f02a..a1a1f2a827 100644 --- a/src/rocjpeg_decoder.h +++ b/src/rocjpeg_decoder.h @@ -119,12 +119,13 @@ private: /** * @brief Copies a channel from the HIP interop device memory to the destination image. * @param hip_interop The HIP interop device memory. + * @param channel_width The width of the channel. * @param channel_height The height of the channel. * @param channel_index The index of the channel. * @param destination Pointer to the destination image. * @return The status of the operation. */ - RocJpegStatus CopyChannel(HipInteropDeviceMem& hip_interop, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid); + RocJpegStatus CopyChannel(HipInteropDeviceMem& hip_interop, uint16_t channel_width, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid); /** * @brief Converts the image to RGB color space. diff --git a/src/rocjpeg_vaapi_decoder.cpp b/src/rocjpeg_vaapi_decoder.cpp index e914e028e4..6164d52d40 100644 --- a/src/rocjpeg_vaapi_decoder.cpp +++ b/src/rocjpeg_vaapi_decoder.cpp @@ -181,7 +181,7 @@ RocJpegStatus RocJpegVaapiMemoryPool::AddPoolEntry(uint32_t surface_format, cons */ RocJpegVaapiMemPoolEntry RocJpegVaapiMemoryPool::GetEntry(uint32_t surface_format, uint32_t image_width, uint32_t image_height, uint32_t num_surfaces) { for (auto& entry : mem_pool_[surface_format]) { - if (entry.image_width == image_width && entry.image_height == image_height && entry.va_surface_ids.size() == num_surfaces && entry.entry_status == kIdle) { + if (entry.image_width >= image_width && entry.image_height >= image_height && entry.va_surface_ids.size() == num_surfaces && entry.entry_status == kIdle) { entry.entry_status = kBusy; return entry; } @@ -334,7 +334,7 @@ bool RocJpegVaapiMemoryPool::SetSurfaceAsIdle(VASurfaceID surface_id) { * @param device_id The ID of the device to be used for decoding. */ RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64}, - max_picture_width_{4096}, max_picture_height_{4096}, supports_modifiers_{false}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline}, + max_picture_width_{4096}, max_picture_height_{4096}, default_surface_width_{3840}, default_surface_height_{2160}, supports_modifiers_{false}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline}, vaapi_mem_pool_(std::make_unique()), current_vcn_jpeg_spec_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0}, va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {}; @@ -410,7 +410,7 @@ RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, in vaapi_mem_pool_->SetVaapiDisplay(va_display_); GetNumJpegCores(); - vaapi_mem_pool_->SetPoolSize(current_vcn_jpeg_spec_.num_jpeg_cores + 1); + vaapi_mem_pool_->SetPoolSize(5 * current_vcn_jpeg_spec_.num_jpeg_cores + 1); return ROCJPEG_STATUS_SUCCESS; } @@ -694,9 +694,11 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg RocJpegVaapiMemPoolEntry mem_pool_entry = vaapi_mem_pool_->GetEntry(surface_pixel_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, 1); if (mem_pool_entry.va_surface_ids.empty()) { mem_pool_entry.va_surface_ids.resize(1); - CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, mem_pool_entry.va_surface_ids.data(), 1, surface_attribs.data(), surface_attribs.size())); - mem_pool_entry.image_width = jpeg_stream_params->picture_parameter_buffer.picture_width; - mem_pool_entry.image_height = jpeg_stream_params->picture_parameter_buffer.picture_height; + uint32_t surface_width = (jpeg_stream_params->picture_parameter_buffer.picture_width > default_surface_width_) ? jpeg_stream_params->picture_parameter_buffer.picture_width : default_surface_width_; + uint32_t surface_height = (jpeg_stream_params->picture_parameter_buffer.picture_height > default_surface_height_) ? jpeg_stream_params->picture_parameter_buffer.picture_height : default_surface_height_; + CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, surface_width, surface_height, mem_pool_entry.va_surface_ids.data(), 1, surface_attribs.data(), surface_attribs.size())); + mem_pool_entry.image_width = surface_width; + mem_pool_entry.image_height = surface_height; mem_pool_entry.hip_interops.resize(1, HipInteropDeviceMem()); surface_id = mem_pool_entry.va_surface_ids[0]; mem_pool_entry.entry_status = kBusy; @@ -825,9 +827,11 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecodeBatched(JpegStreamParameters *jpe RocJpegVaapiMemPoolEntry mem_pool_entry = vaapi_mem_pool_->GetEntry(key.pixel_format, key.width, key.height, indices.size()); if (mem_pool_entry.va_surface_ids.empty()) { mem_pool_entry.va_surface_ids.resize(indices.size()); - CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, key.width, key.height, mem_pool_entry.va_surface_ids.data(), mem_pool_entry.va_surface_ids.size(), surface_attribs.data(), supports_modifiers_ ? 2 : 1)); - mem_pool_entry.image_width = key.width; - mem_pool_entry.image_height = key.height; + uint32_t surface_width = (key.width > default_surface_width_) ? key.width : default_surface_width_; + uint32_t surface_height = (key.height > default_surface_height_) ? key.height : default_surface_height_; + CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, surface_width, surface_height, mem_pool_entry.va_surface_ids.data(), mem_pool_entry.va_surface_ids.size(), surface_attribs.data(), supports_modifiers_ ? 2 : 1)); + mem_pool_entry.image_width = surface_width; + mem_pool_entry.image_height = surface_height; for (size_t i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) { surface_ids[indices[i]] = mem_pool_entry.va_surface_ids[i]; } diff --git a/src/rocjpeg_vaapi_decoder.h b/src/rocjpeg_vaapi_decoder.h index c9ac454b7a..019311bbdb 100644 --- a/src/rocjpeg_vaapi_decoder.h +++ b/src/rocjpeg_vaapi_decoder.h @@ -338,6 +338,8 @@ private: uint32_t min_picture_height_; // The minimum height of the picture uint32_t max_picture_width_; // The maximum width of the picture uint32_t max_picture_height_; // The maximum height of the picture + uint32_t default_surface_width_; // The default width of the surface + uint32_t default_surface_height_; // The default height of the surface bool supports_modifiers_; // DRM format modifiers support VADisplay va_display_; // The VAAPI display VAContextID va_context_id_; // The VAAPI context ID