Add support for rocJpegDecodeBatched API - part 2 of 2 (#39)

* Modify the batchDecode submission

* Add support for grouping the jpeg streams based on their chroma_subsampling, width, and height

* Modify the RocJpegVappiMemPoolEntry to use vectors for va_surface_ids and hip_interops

* Add a function to get the current vcn jpeg spec

* Add support to get hip interop mem for the batched submission

* switch to std::mutext

* Updatet the rocJPEG version

* use std::find to find the surface_id

* correct the vcn_jpeg_spec_ table for gfx942-mi300a/mi300x

* Fix typos

* pass the decode_params to the SubmitDecode function
Этот коммит содержится в:
Aryan Salmanpour
2024-07-12 15:20:38 -04:00
коммит произвёл GitHub
родитель d9df1e04b5
Коммит 3f0be8387e
5 изменённых файлов: 385 добавлений и 147 удалений
+75 -6
Просмотреть файл
@@ -102,7 +102,7 @@ RocJpegStatus RocJpegDecoder::InitializeDecoder() {
* @return The status of the JPEG decoding operation.
*/
RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, const RocJpegDecodeParams *decode_params, RocJpegImage *destination) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
std::lock_guard<std::mutex> lock(mutex_);
RocJpegStatus rocjpeg_status = ROCJPEG_STATUS_SUCCESS;
if (jpeg_stream_handle == nullptr || decode_params == nullptr || destination == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
@@ -111,7 +111,7 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con
const JpegStreamParameters *jpeg_stream_params = rocjpeg_stream_handle->rocjpeg_stream->GetJpegStreamParameters();
VASurfaceID current_surface_id;
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id, decode_params->output_format));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecode(jpeg_stream_params, current_surface_id, decode_params));
HipInteropDeviceMem hip_interop_dev_mem = {};
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id));
@@ -171,10 +171,79 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con
* @return A RocJpegStatus value indicating the success or failure of the decoding operation.
*/
RocJpegStatus RocJpegDecoder::DecodeBatched(RocJpegStreamHandle *jpeg_streams, int batch_size, const RocJpegDecodeParams *decode_params, RocJpegImage *destinations) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
for(int i = 0; i < batch_size; i++) {
CHECK_ROCJPEG(Decode(jpeg_streams[i], decode_params, &destinations[i]));
std::lock_guard<std::mutex> lock(mutex_);
if (jpeg_streams == nullptr || decode_params == nullptr || destinations == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}
std::vector<VASurfaceID> current_surface_ids;
std::vector<JpegStreamParameters> jpeg_streams_params;
current_surface_ids.resize(batch_size);
jpeg_streams_params.resize(batch_size);
VcnJpegSpec current_vcn_jpeg_spec = jpeg_vaapi_decoder_.GetCurrentVcnJpegSpec();
for (int i = 0; i < batch_size; i += current_vcn_jpeg_spec.num_jpeg_cores) {
int batch_end = std::min(i + static_cast<int>(current_vcn_jpeg_spec.num_jpeg_cores), batch_size);
int current_batch_size = batch_end - i;
for (int j = i; j < batch_end; j++) {
auto rocjpeg_stream_handle = static_cast<RocJpegStreamParserHandle*>(jpeg_streams[j]);
const JpegStreamParameters *jpeg_stream_params = rocjpeg_stream_handle->rocjpeg_stream->GetJpegStreamParameters();
jpeg_streams_params[j] = std::move(*jpeg_stream_params);
}
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SubmitDecodeBatched(jpeg_streams_params.data() + i, current_batch_size, decode_params, current_surface_ids.data() + i));
for (int k = 0; k < current_batch_size; k++) {
HipInteropDeviceMem hip_interop_dev_mem = {};
VASurfaceID current_surface_id = *(current_surface_ids.data() + k + i);
const JpegStreamParameters *jpeg_stream_params = jpeg_streams_params.data() + k + i;
CHECK_ROCJPEG(jpeg_vaapi_decoder_.SyncSurface(current_surface_id));
CHECK_ROCJPEG(jpeg_vaapi_decoder_.GetHipInteropMem(current_surface_id, hip_interop_dev_mem));
uint16_t chroma_height = 0;
switch (decode_params->output_format) {
case ROCJPEG_OUTPUT_NATIVE:
// Copy the native decoded output buffers from interop memory directly to the destination buffers
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
// Copy Luma (first channel) for any surface format
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_height, 0, &destinations[k + i]));
if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) {
// Copy the second channel (UV interleaved) for NV12
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i]));
} else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P ||
hip_interop_dev_mem.surface_format == VA_FOURCC_422V) {
// Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V)
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, &destinations[k + i]));
CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, &destinations[k + i]));
}
break;
case ROCJPEG_OUTPUT_YUV_PLANAR:
CHECK_ROCJPEG(GetChromaHeight(hip_interop_dev_mem.surface_format, jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height));
CHECK_ROCJPEG(GetPlanarYUVOutputFormat(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, chroma_height, &destinations[k + i]));
break;
case ROCJPEG_OUTPUT_Y:
CHECK_ROCJPEG(GetYOutputFormat(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, &destinations[k + i]));
break;
case ROCJPEG_OUTPUT_RGB:
CHECK_ROCJPEG(ColorConvertToRGB(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, &destinations[k + i]));
break;
case ROCJPEG_OUTPUT_RGB_PLANAR:
CHECK_ROCJPEG(ColorConvertToRGBPlanar(hip_interop_dev_mem, jpeg_stream_params->picture_parameter_buffer.picture_width,
jpeg_stream_params->picture_parameter_buffer.picture_height, &destinations[k + i]));
break;
default:
break;
}
}
}
CHECK_HIP(hipStreamSynchronize(hip_stream_));
return ROCJPEG_STATUS_SUCCESS;
}
/**
@@ -192,7 +261,7 @@ RocJpegStatus RocJpegDecoder::DecodeBatched(RocJpegStreamHandle *jpeg_streams, i
* or ROCJPEG_STATUS_INVALID_PARAMETER if any of the input parameters are invalid.
*/
RocJpegStatus RocJpegDecoder::GetImageInfo(RocJpegStreamHandle jpeg_stream_handle, uint8_t *num_components, RocJpegChromaSubsampling *subsampling, uint32_t *widths, uint32_t *heights){
std::lock_guard<std::recursive_mutex> lock(mutex_);
std::lock_guard<std::mutex> lock(mutex_);
if (jpeg_stream_handle == nullptr || num_components == nullptr || subsampling == nullptr || widths == nullptr || heights == nullptr) {
return ROCJPEG_STATUS_INVALID_PARAMETER;
}