Improve the memory pool logic to enable the reuse of VA-API surfaces (#159)

* Improve the memory pool logic to enable the reuse of VA-API surfaces.

* Update the version

[ROCm/rocjpeg commit: 6046873054]
This commit is contained in:
Aryan Salmanpour
2025-06-13 11:51:26 -04:00
committad av GitHub
förälder 4509fc4165
incheckning 3b10d1948e
6 ändrade filer med 48 tillägg och 40 borttagningar
+1 -1
Visa fil
@@ -3,7 +3,7 @@
Documentation for rocJPEG is available at
[https://rocm.docs.amd.com/projects/rocJPEG/en/latest/](https://rocm.docs.amd.com/projects/rocJPEG/en/latest/)
## rocJPEG 0.13.0 (unreleased)
## rocJPEG 0.14.0 (unreleased)
## Added
* cmake config files
+1 -1
Visa fil
@@ -42,7 +42,7 @@ endif()
# rocJPEG Version
# NOTE: package version and rocjpeg_version.h is generated with this version
set(VERSION "0.13.0")
set(VERSION "0.14.0")
# Set Project Version and Language
project(rocjpeg VERSION ${VERSION} LANGUAGES CXX)
+29 -28
Visa fil
@@ -150,16 +150,16 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, picture_height, chroma_height));
// Copy Luma (first channel) for any surface format
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Copy the second channel (UV interleaved) for NV12
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid));
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P ||
hip_interop_dev_mem.surface_format == VA_FOURCC_422V) {
// Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V)
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, destination, decode_params, is_roi_valid));
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
@@ -255,15 +255,15 @@ RocJpegStatus RocJpegDecoder::DecodeBatched(RocJpegStreamHandle *jpeg_streams, i
// Copy the native decoded output buffers from interop memory directly to the destination buffers
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, picture_height, chroma_height));
// Copy Luma (first channel) for any surface format
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, &destinations[k + i], &decode_params[k + i], is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, &destinations[k + i], &decode_params[k + i], is_roi_valid));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Copy the second channel (UV interleaved) for NV12
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid));
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P ||
hip_interop_dev_mem.surface_format == VA_FOURCC_422V) {
// Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V)
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, &destinations[k + i], &decode_params[k + i], is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, &destinations[k + i], &decode_params[k + i], is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, &destinations[k + i], &decode_params[k + i], is_roi_valid));
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
@@ -368,12 +368,13 @@ RocJpegStatus RocJpegDecoder::GetImageInfo(RocJpegStreamHandle jpeg_stream_handl
* The `channel_height` parameter specifies the height of the channel.
*
* @param hip_interop_dev_mem The `HipInteropDeviceMem` object containing the source channel data.
* @param channel_width The width of the channel to be copied.
* @param channel_height The height of the channel to be copied.
* @param channel_index The index of the channel to be copied.
* @param destination The `RocJpegImage` object representing the destination image.
* @return The status of the operation. Returns `ROCJPEG_STATUS_SUCCESS` if the channel was copied successfully.
*/
RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_mem, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid) {
RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_mem, uint16_t channel_width, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid) {
if (hip_interop_dev_mem.pitch[channel_index] != 0 && destination->pitch[channel_index] != 0 && destination->channel[channel_index] != nullptr) {
uint32_t roi_offset = 0;
if (is_roi_valid) {
@@ -394,24 +395,24 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m
uint32_t channel_widths[ROCJPEG_MAX_COMPONENT] = {};
uint32_t roi_width = decode_params->crop_rectangle.right - decode_params->crop_rectangle.left;
bool is_roi_width_valid = roi_width > 0 && roi_width <= hip_interop_dev_mem.width;
bool is_roi_width_valid = roi_width > 0 && roi_width <= channel_width;
switch (decode_params->output_format) {
case ROCJPEG_OUTPUT_NATIVE:
switch (hip_interop_dev_mem.surface_format) {
case VA_FOURCC_444P:
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case VA_FOURCC_422V:
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case VA_FOURCC_YUY2:
channel_widths[0] = (is_roi_width_valid ? roi_width : hip_interop_dev_mem.width) * 2;
channel_widths[0] = (is_roi_width_valid ? roi_width : channel_width) * 2;
break;
case VA_FOURCC_NV12:
channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case VA_FOURCC_Y800:
channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
default:
ERR("Unknown output format!");
@@ -421,21 +422,21 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m
case ROCJPEG_OUTPUT_YUV_PLANAR:
switch (hip_interop_dev_mem.surface_format) {
case VA_FOURCC_444P:
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case VA_FOURCC_422V:
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case VA_FOURCC_YUY2:
channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
channel_widths[2] = channel_widths[1] = channel_widths[0] >> 1;
break;
case VA_FOURCC_NV12:
channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
channel_widths[2] = channel_widths[1] = channel_widths[0] >> 1;
break;
case VA_FOURCC_Y800:
channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
default:
ERR("Unknown output format!");
@@ -443,13 +444,13 @@ RocJpegStatus RocJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_m
}
break;
case ROCJPEG_OUTPUT_Y:
channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
case ROCJPEG_OUTPUT_RGB:
channel_widths[0] = (is_roi_width_valid ? roi_width : hip_interop_dev_mem.width) * 3;
channel_widths[0] = (is_roi_width_valid ? roi_width : channel_width) * 3;
break;
case ROCJPEG_OUTPUT_RGB_PLANAR:
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : hip_interop_dev_mem.width;
channel_widths[2] = channel_widths[1] = channel_widths[0] = is_roi_width_valid ? roi_width : channel_width;
break;
default:
ERR("Unknown output format!");
@@ -615,7 +616,7 @@ RocJpegStatus RocJpegDecoder::ColorConvertToRGBPlanar(HipInteropDeviceMem& hip_i
case VA_FOURCC_RGBP:
// Copy red, green, and blue channels from the interop memory into the destination
for (uint8_t channel_index = 0; channel_index < 3; channel_index++) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, channel_index, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, channel_index, destination, decode_params, is_roi_valid));
}
break;
default:
@@ -659,15 +660,15 @@ RocJpegStatus RocJpegDecoder::GetPlanarYUVOutputFormat(HipInteropDeviceMem& hip_
destination->pitch[0], destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + roi_offset, hip_interop_dev_mem.pitch[0]);
} else {
// Copy Luma
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Extract the interleaved UV channels and copy them into the second and third channels of the destination.
ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2],
destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1] + roi_offset, hip_interop_dev_mem.pitch[1]);
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P ||
hip_interop_dev_mem.surface_format == VA_FOURCC_422V) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 1, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, chroma_height, 2, destination, decode_params, is_roi_valid));
}
}
return ROCJPEG_STATUS_SUCCESS;
@@ -699,7 +700,7 @@ RocJpegStatus RocJpegDecoder::GetYOutputFormat(HipInteropDeviceMem& hip_interop_
hip_interop_dev_mem.hip_mapped_device_mem + roi_offset, hip_interop_dev_mem.pitch[0]);
} else {
// Copy Luma
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination, decode_params, is_roi_valid));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_width, picture_height, 0, destination, decode_params, is_roi_valid));
}
return ROCJPEG_STATUS_SUCCESS;
}
+2 -1
Visa fil
@@ -119,12 +119,13 @@ private:
/**
* @brief Copies a channel from the HIP interop device memory to the destination image.
* @param hip_interop The HIP interop device memory.
* @param channel_width The width of the channel.
* @param channel_height The height of the channel.
* @param channel_index The index of the channel.
* @param destination Pointer to the destination image.
* @return The status of the operation.
*/
RocJpegStatus CopyChannel(HipInteropDeviceMem& hip_interop, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid);
RocJpegStatus CopyChannel(HipInteropDeviceMem& hip_interop, uint16_t channel_width, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination, const RocJpegDecodeParams *decode_params, bool is_roi_valid);
/**
* @brief Converts the image to RGB color space.
+13 -9
Visa fil
@@ -181,7 +181,7 @@ RocJpegStatus RocJpegVaapiMemoryPool::AddPoolEntry(uint32_t surface_format, cons
*/
RocJpegVaapiMemPoolEntry RocJpegVaapiMemoryPool::GetEntry(uint32_t surface_format, uint32_t image_width, uint32_t image_height, uint32_t num_surfaces) {
for (auto& entry : mem_pool_[surface_format]) {
if (entry.image_width == image_width && entry.image_height == image_height && entry.va_surface_ids.size() == num_surfaces && entry.entry_status == kIdle) {
if (entry.image_width >= image_width && entry.image_height >= image_height && entry.va_surface_ids.size() == num_surfaces && entry.entry_status == kIdle) {
entry.entry_status = kBusy;
return entry;
}
@@ -334,7 +334,7 @@ bool RocJpegVaapiMemoryPool::SetSurfaceAsIdle(VASurfaceID surface_id) {
* @param device_id The ID of the device to be used for decoding.
*/
RocJpegVappiDecoder::RocJpegVappiDecoder(int device_id) : device_id_{device_id}, drm_fd_{-1}, min_picture_width_{64}, min_picture_height_{64},
max_picture_width_{4096}, max_picture_height_{4096}, supports_modifiers_{false}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline},
max_picture_width_{4096}, max_picture_height_{4096}, default_surface_width_{3840}, default_surface_height_{2160}, supports_modifiers_{false}, va_display_{0}, va_config_attrib_{{}}, va_config_id_{0}, va_profile_{VAProfileJPEGBaseline},
vaapi_mem_pool_(std::make_unique<RocJpegVaapiMemoryPool>()), current_vcn_jpeg_spec_{}, va_picture_parameter_buf_id_{0}, va_quantization_matrix_buf_id_{0}, va_huffmantable_buf_id_{0},
va_slice_param_buf_id_{0}, va_slice_data_buf_id_{0} {};
@@ -410,7 +410,7 @@ RocJpegStatus RocJpegVappiDecoder::InitializeDecoder(std::string device_name, in
vaapi_mem_pool_->SetVaapiDisplay(va_display_);
GetNumJpegCores();
vaapi_mem_pool_->SetPoolSize(current_vcn_jpeg_spec_.num_jpeg_cores + 1);
vaapi_mem_pool_->SetPoolSize(5 * current_vcn_jpeg_spec_.num_jpeg_cores + 1);
return ROCJPEG_STATUS_SUCCESS;
}
@@ -694,9 +694,11 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg
RocJpegVaapiMemPoolEntry mem_pool_entry = vaapi_mem_pool_->GetEntry(surface_pixel_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, 1);
if (mem_pool_entry.va_surface_ids.empty()) {
mem_pool_entry.va_surface_ids.resize(1);
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, mem_pool_entry.va_surface_ids.data(), 1, surface_attribs.data(), surface_attribs.size()));
mem_pool_entry.image_width = jpeg_stream_params->picture_parameter_buffer.picture_width;
mem_pool_entry.image_height = jpeg_stream_params->picture_parameter_buffer.picture_height;
uint32_t surface_width = (jpeg_stream_params->picture_parameter_buffer.picture_width > default_surface_width_) ? jpeg_stream_params->picture_parameter_buffer.picture_width : default_surface_width_;
uint32_t surface_height = (jpeg_stream_params->picture_parameter_buffer.picture_height > default_surface_height_) ? jpeg_stream_params->picture_parameter_buffer.picture_height : default_surface_height_;
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, surface_width, surface_height, mem_pool_entry.va_surface_ids.data(), 1, surface_attribs.data(), surface_attribs.size()));
mem_pool_entry.image_width = surface_width;
mem_pool_entry.image_height = surface_height;
mem_pool_entry.hip_interops.resize(1, HipInteropDeviceMem());
surface_id = mem_pool_entry.va_surface_ids[0];
mem_pool_entry.entry_status = kBusy;
@@ -825,9 +827,11 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecodeBatched(JpegStreamParameters *jpe
RocJpegVaapiMemPoolEntry mem_pool_entry = vaapi_mem_pool_->GetEntry(key.pixel_format, key.width, key.height, indices.size());
if (mem_pool_entry.va_surface_ids.empty()) {
mem_pool_entry.va_surface_ids.resize(indices.size());
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, key.width, key.height, mem_pool_entry.va_surface_ids.data(), mem_pool_entry.va_surface_ids.size(), surface_attribs.data(), supports_modifiers_ ? 2 : 1));
mem_pool_entry.image_width = key.width;
mem_pool_entry.image_height = key.height;
uint32_t surface_width = (key.width > default_surface_width_) ? key.width : default_surface_width_;
uint32_t surface_height = (key.height > default_surface_height_) ? key.height : default_surface_height_;
CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, surface_width, surface_height, mem_pool_entry.va_surface_ids.data(), mem_pool_entry.va_surface_ids.size(), surface_attribs.data(), supports_modifiers_ ? 2 : 1));
mem_pool_entry.image_width = surface_width;
mem_pool_entry.image_height = surface_height;
for (size_t i = 0; i < mem_pool_entry.va_surface_ids.size(); i++) {
surface_ids[indices[i]] = mem_pool_entry.va_surface_ids[i];
}
@@ -338,6 +338,8 @@ private:
uint32_t min_picture_height_; // The minimum height of the picture
uint32_t max_picture_width_; // The maximum width of the picture
uint32_t max_picture_height_; // The maximum height of the picture
uint32_t default_surface_width_; // The default width of the surface
uint32_t default_surface_height_; // The default height of the surface
bool supports_modifiers_; // DRM format modifiers support
VADisplay va_display_; // The VAAPI display
VAContextID va_context_id_; // The VAAPI context ID