Add support for memory pool management for vaapi surfaces and vaapi-hip interops (#17)

* Add support for memory pool management for vaapi surfaces

* reuse memories for each channel if the sizes remain unchanged

* rename hip_interop to hip_interop_dev_mem
This commit is contained in:
Aryan Salmanpour
2024-04-22 14:37:45 -04:00
کامیت شده توسط GitHub
والد 6aaca70cd9
کامیت 694a4bc07f
5فایلهای تغییر یافته به همراه324 افزوده شده و 220 حذف شده
+51 -96
مشاهده پرونده
@@ -23,7 +23,7 @@ THE SOFTWARE.
#include "rocjpeg_decoder.h"
ROCJpegDecoder::ROCJpegDecoder(RocJpegBackend backend, int device_id) :
num_devices_{0}, device_id_ {device_id}, hip_stream_ {0}, backend_{backend}, hip_interop_{} {}
num_devices_{0}, device_id_ {device_id}, hip_stream_ {0}, backend_{backend} {}
ROCJpegDecoder::~ROCJpegDecoder() {
if (hip_stream_) {
@@ -81,43 +81,42 @@ RocJpegStatus ROCJpegDecoder::Decode(const uint8_t *data, size_t length, RocJpeg
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id, output_format));
if (destination != nullptr) {
VADRMPRIMESurfaceDescriptor va_drm_prime_surface_desc = {};
HipInteropDeviceMem hip_interop_dev_mem = {};
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ExportSurface(current_surface_id, va_drm_prime_surface_desc));
CHECK_ROCJPEG(GetHipInteropMem(va_drm_prime_surface_desc));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.GetHipInteropMem(current_surface_id, hip_interop_dev_mem));
uint16_t chroma_height = 0;
switch (output_format) {
case ROCJPEG_OUTPUT_NATIVE:
// Copy the native decoded output buffers from interop memory directly to the destination buffers
CHECK_ROCJPEG(GetChromaHeight(jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
// Copy Luma (first channel) for any surface format
CHECK_ROCJPEG(CopyChannel(destination, jpeg_stream_params->picture_parameter_buffer.picture_height, 0));
if (hip_interop_.surface_format == VA_FOURCC_NV12) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_height, 0, destination));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Copy the second channel (UV interleaved) for NV12
CHECK_ROCJPEG(CopyChannel(destination, chroma_height, 1));
} else if (hip_interop_.surface_format == VA_FOURCC_444P) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination));
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P) {
// Copy the second and third channels for YUV444
CHECK_ROCJPEG(CopyChannel(destination, chroma_height, 1));
CHECK_ROCJPEG(CopyChannel(destination, chroma_height, 2));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination));
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
CHECK_ROCJPEG(GetChromaHeight(jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
CHECK_ROCJPEG(GetPlanarYUVOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
CHECK_ROCJPEG(GetPlanarYUVOutputFormat(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height, destination));
break;
case ROCJPEG_OUTPUT_Y:
CHECK_ROCJPEG(GetYOutputFormat(jpeg_stream_params->picture_parameter_buffer.picture_width,
CHECK_ROCJPEG(GetYOutputFormat(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
break;
case ROCJPEG_OUTPUT_RGB:
CHECK_ROCJPEG(ColorConvertToRGB(jpeg_stream_params->picture_parameter_buffer.picture_width,
CHECK_ROCJPEG(ColorConvertToRGB(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
break;
case ROCJPEG_OUTPUT_RGB_PLANAR:
CHECK_ROCJPEG(ColorConvertToRGBPlanar(jpeg_stream_params->picture_parameter_buffer.picture_width,
CHECK_ROCJPEG(ColorConvertToRGBPlanar(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, destination));
break;
default:
@@ -125,8 +124,6 @@ RocJpegStatus ROCJpegDecoder::Decode(const uint8_t *data, size_t length, RocJpeg
}
CHECK_HIP(hipStreamSynchronize(hip_stream_));
CHECK_ROCJPEG(ReleaseHipInteropMem(current_surface_id));
}
return ROCJPEG_STATUS_SUCCESS;
@@ -188,63 +185,21 @@ RocJpegStatus ROCJpegDecoder::GetImageInfo(const uint8_t *data, size_t length, u
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetHipInteropMem(VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc) {
hipExternalMemoryHandleDesc external_mem_handle_desc = {};
hipExternalMemoryBufferDesc external_mem_buffer_desc = {};
external_mem_handle_desc.type = hipExternalMemoryHandleTypeOpaqueFd;
external_mem_handle_desc.handle.fd = va_drm_prime_surface_desc.objects[0].fd;
external_mem_handle_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipImportExternalMemory(&hip_interop_.hip_ext_mem, &external_mem_handle_desc));
external_mem_buffer_desc.size = va_drm_prime_surface_desc.objects[0].size;
CHECK_HIP(hipExternalMemoryGetMappedBuffer((void**)&hip_interop_.hip_mapped_device_mem, hip_interop_.hip_ext_mem, &external_mem_buffer_desc));
hip_interop_.surface_format = va_drm_prime_surface_desc.fourcc;
hip_interop_.width = va_drm_prime_surface_desc.width;
hip_interop_.height = va_drm_prime_surface_desc.height;
hip_interop_.offset[0] = va_drm_prime_surface_desc.layers[0].offset[0];
hip_interop_.offset[1] = va_drm_prime_surface_desc.layers[1].offset[0];
hip_interop_.offset[2] = va_drm_prime_surface_desc.layers[2].offset[0];
hip_interop_.pitch[0] = va_drm_prime_surface_desc.layers[0].pitch[0];
hip_interop_.pitch[1] = va_drm_prime_surface_desc.layers[1].pitch[0];
hip_interop_.pitch[2] = va_drm_prime_surface_desc.layers[2].pitch[0];
hip_interop_.num_layers = va_drm_prime_surface_desc.num_layers;
for (uint32_t i = 0; i < va_drm_prime_surface_desc.num_objects; ++i) {
close(va_drm_prime_surface_desc.objects[i].fd);
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::ReleaseHipInteropMem(VASurfaceID current_surface_id) {
if (hip_interop_.hip_mapped_device_mem != nullptr) {
CHECK_HIP(hipFree(hip_interop_.hip_mapped_device_mem));
}
if (hip_interop_.hip_ext_mem != nullptr) {
CHECK_HIP(hipDestroyExternalMemory(hip_interop_.hip_ext_mem));
}
memset((void*)&hip_interop_, 0, sizeof(hip_interop_));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.ReleaseSurface(current_surface_id));
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::CopyChannel(RocJpegImage *destination, uint16_t channel_height, uint8_t channel_index) {
if (hip_interop_.pitch[channel_index] != 0 && destination->pitch[channel_index] != 0 && destination->channel[channel_index] != nullptr) {
if (destination->pitch[channel_index] == hip_interop_.pitch[channel_index]) {
RocJpegStatus ROCJpegDecoder::CopyChannel(HipInteropDeviceMem& hip_interop_dev_mem, uint16_t channel_height, uint8_t channel_index, RocJpegImage *destination) {
if (hip_interop_dev_mem.pitch[channel_index] != 0 && destination->pitch[channel_index] != 0 && destination->channel[channel_index] != nullptr) {
if (destination->pitch[channel_index] == hip_interop_dev_mem.pitch[channel_index]) {
uint32_t channel_size = destination->pitch[channel_index] * channel_height;
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[channel_index], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[channel_index], channel_size, hip_stream_));
CHECK_HIP(hipMemcpyDtoDAsync(destination->channel[channel_index], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[channel_index], channel_size, hip_stream_));
} else {
CHECK_HIP(hipMemcpy2DAsync(destination->channel[channel_index], destination->pitch[channel_index], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[channel_index], hip_interop_.pitch[channel_index],
CHECK_HIP(hipMemcpy2DAsync(destination->channel[channel_index], destination->pitch[channel_index], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[channel_index], hip_interop_dev_mem.pitch[channel_index],
destination->pitch[channel_index], channel_height, hipMemcpyDeviceToDevice, hip_stream_));
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint16_t picture_height, uint16_t &chroma_height) {
switch (hip_interop_.surface_format) {
RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint32_t surface_format, uint16_t picture_height, uint16_t &chroma_height) {
switch (surface_format) {
case VA_FOURCC_NV12: /*NV12: two-plane 8-bit YUV 4:2:0*/
chroma_height = picture_height >> 1;
break;
@@ -263,28 +218,28 @@ RocJpegStatus ROCJpegDecoder::GetChromaHeight(uint16_t picture_height, uint16_t
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
switch (hip_interop_.surface_format) {
RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(HipInteropDeviceMem& hip_interop_dev_mem, uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
switch (hip_interop_dev_mem.surface_format) {
case VA_FOURCC_444P:
ColorConvertYUV444ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], hip_interop_.offset[1]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1]);
break;
case ROCJPEG_FOURCC_YUYV:
ColorConvertYUYVToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
break;
case VA_FOURCC_NV12:
ColorConvertNV12ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1], hip_interop_.pitch[1]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0],
hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1], hip_interop_dev_mem.pitch[1]);
break;
case VA_FOURCC_Y800:
ColorConvertYUV400ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
break;
case VA_FOURCC_RGBA:
ColorConvertRGBAToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
break;
default:
ERR("ERROR! surface format is not supported!");
@@ -293,29 +248,29 @@ RocJpegStatus ROCJpegDecoder::ColorConvertToRGB(uint32_t picture_width, uint32_t
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::ColorConvertToRGBPlanar(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
switch (hip_interop_.surface_format) {
RocJpegStatus ROCJpegDecoder::ColorConvertToRGBPlanar(HipInteropDeviceMem& hip_interop_dev_mem, uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
switch (hip_interop_dev_mem.surface_format) {
case VA_FOURCC_444P:
ColorConvertYUV444ToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0], hip_interop_.offset[1]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1]);
break;
case ROCJPEG_FOURCC_YUYV:
ColorConvertYUYVToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
break;
case VA_FOURCC_NV12:
ColorConvertNV12ToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0],
hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1], hip_interop_.pitch[1]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0],
hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1], hip_interop_dev_mem.pitch[1]);
break;
case VA_FOURCC_Y800:
ColorConvertYUV400ToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
break;
case VA_FOURCC_RGBP:
// Copy red, green, and blue channels from the interop memory into the destination
for (uint8_t channel_index = 0; channel_index < 3; channel_index++) {
CHECK_ROCJPEG(CopyChannel(destination, picture_height, channel_index));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, channel_index, destination));
}
break;
default:
@@ -325,33 +280,33 @@ RocJpegStatus ROCJpegDecoder::ColorConvertToRGBPlanar(uint32_t picture_width, ui
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetPlanarYUVOutputFormat(uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination) {
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
RocJpegStatus ROCJpegDecoder::GetPlanarYUVOutputFormat(HipInteropDeviceMem& hip_interop_dev_mem, uint32_t picture_width, uint32_t picture_height, uint16_t chroma_height, RocJpegImage *destination) {
if (hip_interop_dev_mem.surface_format == ROCJPEG_FOURCC_YUYV) {
// Extract the packed YUYV and copy them into the first, second, and thrid channels of the destination.
ConvertPackedYUYVToPlanarYUV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2],
destination->pitch[0], destination->pitch[1], hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
destination->pitch[0], destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
} else {
// Copy Luma
CHECK_ROCJPEG(CopyChannel(destination, picture_height, 0));
if (hip_interop_.surface_format == VA_FOURCC_NV12) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Extract the interleaved UV channels and copy them into the second and thrid channels of the destination.
ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2],
destination->pitch[1], hip_interop_.hip_mapped_device_mem + hip_interop_.offset[1] , hip_interop_.pitch[1]);
} else if (hip_interop_.surface_format == VA_FOURCC_444P) {
CHECK_ROCJPEG(CopyChannel(destination, chroma_height, 1));
CHECK_ROCJPEG(CopyChannel(destination, chroma_height, 2));
destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1] , hip_interop_dev_mem.pitch[1]);
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P) {
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination));
}
}
return ROCJPEG_STATUS_SUCCESS;
}
RocJpegStatus ROCJpegDecoder::GetYOutputFormat(uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
if (hip_interop_.surface_format == ROCJPEG_FOURCC_YUYV) {
RocJpegStatus ROCJpegDecoder::GetYOutputFormat(HipInteropDeviceMem& hip_interop_dev_mem, uint32_t picture_width, uint32_t picture_height, RocJpegImage *destination) {
if (hip_interop_dev_mem.surface_format == ROCJPEG_FOURCC_YUYV) {
ExtractYFromPackedYUYV(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0],
hip_interop_.hip_mapped_device_mem, hip_interop_.pitch[0]);
hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]);
} else {
// Copy Luma
CHECK_ROCJPEG(CopyChannel(destination, picture_height, 0));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, picture_height, 0, destination));
}
return ROCJPEG_STATUS_SUCCESS;
}