diff --git a/projects/rocjpeg/README.md b/projects/rocjpeg/README.md index f5274b2548..34a778b2ae 100644 --- a/projects/rocjpeg/README.md +++ b/projects/rocjpeg/README.md @@ -7,6 +7,7 @@ rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG AP ## Supported JPEG chroma subsampling * YUV 4:4:4 +* YUV 4:4:0 * YUV 4:2:2 * YUV 4:2:0 * YUV 4:0:0 diff --git a/projects/rocjpeg/api/rocjpeg.h b/projects/rocjpeg/api/rocjpeg.h index 6217cd56f4..bc1603ff8a 100644 --- a/projects/rocjpeg/api/rocjpeg.h +++ b/projects/rocjpeg/api/rocjpeg.h @@ -122,7 +122,7 @@ typedef struct { */ typedef enum { /**< return native unchanged decoded YUV image from the VCN JPEG deocder. - For ROCJPEG_CSS_444 write Y, U, and V to first, second, and third channels of RocJpegImage + For ROCJPEG_CSS_444 and ROCJPEG_CSS_440 write Y, U, and V to first, second, and third channels of RocJpegImage For ROCJPEG_CSS_422 write YUYV (packed) to first channel of RocJpegImage For ROCJPEG_CSS_420 write Y to first channel and UV (interleaved) to second channel of RocJpegImage For ROCJPEG_CSS_400 write Y to first channel of RocJpegImage */ diff --git a/projects/rocjpeg/docs/how-to/using-rocjpeg.rst b/projects/rocjpeg/docs/how-to/using-rocjpeg.rst index 737b8e4488..1ac02162b8 100644 --- a/projects/rocjpeg/docs/how-to/using-rocjpeg.rst +++ b/projects/rocjpeg/docs/how-to/using-rocjpeg.rst @@ -103,7 +103,7 @@ list is composed of the chroma subsampling property retrieved from the JPEG imag .. note:: - The VCN hardware-accelerated JPEG decoder in AMD GPUs only supports decoding JPEG images with ``ROCJPEG_CSS_444``, ``ROCJPEG_CSS_422``, + The VCN hardware-accelerated JPEG decoder in AMD GPUs only supports decoding JPEG images with ``ROCJPEG_CSS_444``, ``ROCJPEG_CSS_440``, ``ROCJPEG_CSS_422``, ``ROCJPEG_CSS_420``, and ``ROCJPEG_CSS_400`` chroma subsampling. 6. Decode a JPEG stream @@ -149,7 +149,7 @@ You can set the ``RocJpegOutputFormat`` parameter of the ``RocJpegDecodeParams`` For example, if ``output_format`` is set to ``ROCJPEG_OUTPUT_NATIVE``, then based on the chroma subsampling of the input image, the ``rocJpegDecode()`` function does one of the following: -* For ``ROCJPEG_CSS_444`` write Y, U, and V to first, second, and third channels of ``RocJpegImage``. +* For ``ROCJPEG_CSS_444`` and ``ROCJPEG_CSS_440`` write Y, U, and V to first, second, and third channels of ``RocJpegImage``. * For ``ROCJPEG_CSS_422`` write YUYV (packed) to first channel of ``RocJpegImage``. * For ``ROCJPEG_CSS_420`` write Y to first channel and UV (interleaved) to second channel of ``RocJpegImage``. * For ``ROCJPEG_CSS_400`` write Y to first channel of ``RocJpegImage``. @@ -168,10 +168,11 @@ the required size for the output buffers for a single decode JPEG. To optimally :header: "output_format", "chroma subsampling", "destination.pitch[c] should be atleast:", "destination.channel[c] should be atleast:" "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_444", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[c] = destination.pitch[c] * heights[0] for c = 0, 1, 2" + "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_440", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[0] = destination.pitch[0] * heights[0], destination.channel[c] = destination.pitch[c] * heights[0] / 2 for c = 1, 2" "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_422", "destination.pitch[0] = widths[0] * 2", "destination.channel[0] = destination.pitch[0] * heights[0]" "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_420", "destination.pitch[1] = destination.pitch[0] = widths[0]", "destination.channel[0] = destination.pitch[0] * heights[0], destination.channel[1] = destination.pitch[1] * (heights[0] >> 1)" "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_400", "destination.pitch[0] = widths[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" - "ROCJPEG_OUTPUT_YUV_PLANAR", "ROCJPEG_CSS_444, ROCJPEG_CSS_422, ROCJPEG_CSS_420", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[c] = destination.pitch[c] * heights[c] for c = 0, 1, 2" + "ROCJPEG_OUTPUT_YUV_PLANAR", "ROCJPEG_CSS_444, ROCJPEG_CSS_440, ROCJPEG_CSS_422, ROCJPEG_CSS_420", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[c] = destination.pitch[c] * heights[c] for c = 0, 1, 2" "ROCJPEG_OUTPUT_YUV_PLANAR", "ROCJPEG_CSS_400", "destination.pitch[0] = widths[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" "ROCJPEG_OUTPUT_Y", "Any of the supported chroma subsampling", "destination.pitch[0] = widths[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" "ROCJPEG_OUTPUT_RGB", "Any of the supported chroma subsampling", "destination.pitch[0] = widths[0] * 3", "destination.channel[0] = destination.pitch[0] * heights[0]" diff --git a/projects/rocjpeg/docs/install/install.rst b/projects/rocjpeg/docs/install/install.rst index 738f521581..a46c429f99 100644 --- a/projects/rocjpeg/docs/install/install.rst +++ b/projects/rocjpeg/docs/install/install.rst @@ -29,6 +29,7 @@ Supported JPEG chroma subsampling ======================================== * YUV 4:4:4 +* YUV 4:4:0 * YUV 4:2:2 * YUV 4:2:0 * YUV 4:0:0 @@ -45,17 +46,23 @@ Prerequisites * `ROCm-supported hardware `_ (``gfx908`` or higher is required) -* Install ROCm 6.1.0 or later with +* Install ROCm 6.3.0 or later with `amdgpu-install `_ * Run: ``--usecase=rocm`` * To install rocJPEG with minimum requirements, follow the :doc:`quick-start instructions <./quick-start>` -* AMD multimedia packages +* Video Acceleration API - Version `1.5.0+` - `Libva` is an implementation for VA-API .. code:: shell - sudo apt install libva-amdgpu-dev libdrm-amdgpu1 mesa-amdgpu-va-drivers + sudo apt install libva-dev + +* AMD VA Drivers + + .. code:: shell + + sudo apt install mesa-amdgpu-va-drivers * CMake 3.5 or later @@ -63,7 +70,7 @@ Prerequisites sudo apt install cmake -* `pkg-config `_ +* pkg-config .. code:: shell diff --git a/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp b/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp index 2d14a9bc3c..93ae38bf38 100644 --- a/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp +++ b/projects/rocjpeg/samples/jpegDecode/jpegdecode.cpp @@ -91,7 +91,7 @@ int main(int argc, char **argv) { std::cout << "Input file name: " << base_file_name << std::endl; std::cout << "Input image resolution: " << widths[0] << "x" << heights[0] << std::endl; std::cout << "Chroma subsampling: " + chroma_sub_sampling << std::endl; - if (subsampling == ROCJPEG_CSS_440 || subsampling == ROCJPEG_CSS_411) { + if (subsampling == ROCJPEG_CSS_411) { std::cerr << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl; if (is_dir) { std::cout << std::endl; diff --git a/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp b/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp index aadb2f2cbd..445a23ca09 100644 --- a/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp +++ b/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp @@ -71,7 +71,7 @@ void ThreadFunction(std::vector& jpegFiles, RocJpegHandle rocjpeg_h CHECK_ROCJPEG(rocJpegStreamParse(reinterpret_cast(file_data.data()), file_size, rocjpeg_stream)); CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream, &num_components, &subsampling, widths, heights)); - if (subsampling == ROCJPEG_CSS_440 || subsampling == ROCJPEG_CSS_411) { + if (subsampling == ROCJPEG_CSS_411) { std::cout << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl; std::cout << "Skipping decoding file " << base_file_name << std::endl; return; diff --git a/projects/rocjpeg/samples/rocjpeg_samples_utils.h b/projects/rocjpeg/samples/rocjpeg_samples_utils.h index db062d8ae8..b34c5cc7ba 100644 --- a/projects/rocjpeg/samples/rocjpeg_samples_utils.h +++ b/projects/rocjpeg/samples/rocjpeg_samples_utils.h @@ -276,6 +276,12 @@ public: output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = widths[0]; channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = output_image.pitch[0] * heights[0]; break; + case ROCJPEG_CSS_440: + num_channels = 3; + output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] = widths[0]; + channel_sizes[0] = output_image.pitch[0] * heights[0]; + channel_sizes[2] = channel_sizes[1] = output_image.pitch[0] * (heights[0] >> 1); + break; case ROCJPEG_CSS_422: num_channels = 1; output_image.pitch[0] = widths[0] * 2; @@ -407,6 +413,11 @@ public: widths[2] = widths[1] = widths[0] = img_width; heights[2] = heights[1] = heights[0] = img_height; break; + case ROCJPEG_CSS_440: + widths[2] = widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; case ROCJPEG_CSS_422: widths[0] = img_width * 2; heights[0] = img_height; @@ -431,6 +442,11 @@ public: widths[2] = widths[1] = widths[0] = img_width; heights[2] = heights[1] = heights[0] = img_height; break; + case ROCJPEG_CSS_440: + widths[2] = widths[1] = widths[0] = img_width; + heights[0] = img_height; + heights[2] = heights[1] = img_height >> 1; + break; case ROCJPEG_CSS_422: widths[0] = img_width; widths[2] = widths[1] = widths[0] >> 1; diff --git a/projects/rocjpeg/src/rocjpeg_decoder.cpp b/projects/rocjpeg/src/rocjpeg_decoder.cpp index 14d6b2d745..0e799b636e 100644 --- a/projects/rocjpeg/src/rocjpeg_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_decoder.cpp @@ -128,8 +128,9 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con if (hip_interop_dev_mem.surface_format == VA_FOURCC_NV12) { // Copy the second channel (UV interleaved) for NV12 CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination)); - } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P) { - // Copy the second and third channels for YUV444 + } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P || + hip_interop_dev_mem.surface_format == VA_FOURCC_422V) { + // Copy the second and third channels for YUV444 and YUV440 (i.e., YUV422V) CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination)); CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination)); } @@ -194,6 +195,11 @@ RocJpegStatus RocJpegDecoder::GetImageInfo(RocJpegStreamHandle jpeg_stream_handl widths[2] = widths[1] = widths[0]; heights[2] = heights[1] = heights[0]; break; + case CSS_440: + *subsampling = ROCJPEG_CSS_440; + widths[2] = widths[1] = widths[0]; + heights[2] = heights[1] = heights[0] >> 1; + break; case CSS_422: *subsampling = ROCJPEG_CSS_422; widths[2] = widths[1] = widths[0] >> 1; @@ -214,11 +220,6 @@ RocJpegStatus RocJpegDecoder::GetImageInfo(RocJpegStreamHandle jpeg_stream_handl widths[2] = widths[1] = widths[0] >> 2; heights[2] = heights[1] = heights[0]; break; - case CSS_440: - *subsampling = ROCJPEG_CSS_440; - widths[2] = widths[1] = widths[0] >> 1; - heights[2] = heights[1] = heights[0] >> 1; - break; default: *subsampling = ROCJPEG_CSS_UNKNOWN; break; @@ -278,6 +279,9 @@ RocJpegStatus RocJpegDecoder::GetChromaHeight(uint32_t surface_format, uint16_t case ROCJPEG_FOURCC_YUYV: /*YUYV: one-plane packed 8-bit YUV 4:2:2. Four bytes per pair of pixels: Y, U, Y, V*/ chroma_height = picture_height; break; + case VA_FOURCC_422V: /*422V: three-plane 8-bit YUV 4:4:0*/ + chroma_height = picture_height >> 1; + break; default: return ROCJPEG_STATUS_JPEG_NOT_SUPPORTED; } @@ -304,6 +308,10 @@ RocJpegStatus RocJpegDecoder::ColorConvertToRGB(HipInteropDeviceMem& hip_interop ColorConvertYUV444ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1]); break; + case VA_FOURCC_422V: + ColorConvertYUV440ToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], + hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1], hip_interop_dev_mem.offset[2]); + break; case ROCJPEG_FOURCC_YUYV: ColorConvertYUYVToRGB(hip_stream_, picture_width, picture_height, destination->channel[0], destination->pitch[0], hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]); @@ -349,6 +357,10 @@ RocJpegStatus RocJpegDecoder::ColorConvertToRGBPlanar(HipInteropDeviceMem& hip_i ColorConvertYUV444ToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0], hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1]); break; + case VA_FOURCC_422V: + ColorConvertYUV440ToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0], + hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0], hip_interop_dev_mem.offset[1], hip_interop_dev_mem.offset[2]); + break; case ROCJPEG_FOURCC_YUYV: ColorConvertYUYVToRGBPlanar(hip_stream_, picture_width, picture_height, destination->channel[0], destination->channel[1], destination->channel[2], destination->pitch[0], hip_interop_dev_mem.hip_mapped_device_mem, hip_interop_dev_mem.pitch[0]); @@ -404,7 +416,8 @@ RocJpegStatus RocJpegDecoder::GetPlanarYUVOutputFormat(HipInteropDeviceMem& hip_ // Extract the interleaved UV channels and copy them into the second and thrid channels of the destination. ConvertInterleavedUVToPlanarUV(hip_stream_, picture_width >> 1, picture_height >> 1, destination->channel[1], destination->channel[2], destination->pitch[1], hip_interop_dev_mem.hip_mapped_device_mem + hip_interop_dev_mem.offset[1] , hip_interop_dev_mem.pitch[1]); - } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P) { + } else if (hip_interop_dev_mem.surface_format == VA_FOURCC_444P || + hip_interop_dev_mem.surface_format == VA_FOURCC_422V) { CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 1, destination)); CHECK_ROCJPEG(CopyChannel(hip_interop_dev_mem, chroma_height, 2, destination)); } diff --git a/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp b/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp index 127a7de348..7c20a667a1 100644 --- a/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp +++ b/projects/rocjpeg/src/rocjpeg_hip_kernels.cpp @@ -486,6 +486,437 @@ void ColorConvertYUV444ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); } +__global__ void ColorConvertYUV440ToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, + uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); + uint32_t src_y1_idx = src_y0_idx + src_yuv_image_stride_in_bytes; + uint32_t src_chroma_idx = y * src_yuv_image_stride_in_bytes + (x << 3); + + uint2 y0 = *((uint2 *)(&src_y_image[src_y0_idx])); + uint2 y1 = *((uint2 *)(&src_y_image[src_y1_idx])); + + uint2 u0 = *((uint2 *)(&src_u_image[src_chroma_idx])); + uint2 v0 = *((uint2 *)(&src_v_image[src_chroma_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 24); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + float2 cr = make_float2( 0.0000f, 1.5748f); + float2 cg = make_float2(-0.1873f, -0.4681f); + float2 cb = make_float2( 1.8556f, 0.0000f); + float3 yuv; + DUINT6 rgb0, rgb1; + float4 f; + + yuv = make_float3(hipUnpack0(y0.x), hipUnpack0(u0.x), hipUnpack0(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y0.x), hipUnpack1(u0.x), hipUnpack1(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y0.x), hipUnpack2(u0.x), hipUnpack2(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y0.x), hipUnpack3(u0.x), hipUnpack3(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[2] = hipPack(f); + + yuv = make_float3(hipUnpack0(y0.y), hipUnpack0(u0.y), hipUnpack0(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y0.y), hipUnpack1(u0.y), hipUnpack1(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y0.y), hipUnpack2(u0.y), hipUnpack2(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y0.y), hipUnpack3(u0.y), hipUnpack3(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[5] = hipPack(f); + + yuv = make_float3(hipUnpack0(y1.x), hipUnpack0(u0.x), hipUnpack0(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y1.x), hipUnpack1(u0.x), hipUnpack1(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y1.x), hipUnpack2(u0.x), hipUnpack2(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y1.x), hipUnpack3(u0.x), hipUnpack3(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[2] = hipPack(f); + + yuv = make_float3(hipUnpack0(y1.y), hipUnpack0(u0.y), hipUnpack0(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y1.y), hipUnpack1(u0.y), hipUnpack1(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y1.y), hipUnpack2(u0.y), hipUnpack2(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y1.y), hipUnpack3(u0.y), hipUnpack3(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[5] = hipPack(f); + + *((DUINT6 *)(&dst_image[rgb0_idx])) = rgb0; + *((DUINT6 *)(&dst_image[rgb1_idx])) = rgb1; + } +} + +/** + * @brief Converts YUV440 image to RGB image. + * + * This function takes a YUV440 image and converts it to an RGB image using the ColorConvertYUV444ToRGBKernel HIP kernel. + * + * @param stream The HIP stream used for asynchronous execution of the kernel. + * @param dst_width The width of the destination RGB image. + * @param dst_height The height of the destination RGB image. + * @param dst_image Pointer to the destination RGB image buffer. + * @param dst_image_stride_in_bytes The stride (in bytes) of the destination RGB image buffer. + * @param src_yuv_image Pointer to the source YUV440 image buffer. + * @param src_yuv_image_stride_in_bytes The stride (in bytes) of the source YUV440 image buffer. + * @param src_u_image_offset The offset (in bytes) to the U component in the source YUV440 image buffer. + */ +void ColorConvertYUV440ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset, uint32_t src_v_image_offset) { + + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; + + ColorConvertYUV440ToRGBKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image, + dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, + src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, + dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); +} + +__global__ void ColorConvertYUV440ToRGBPlanarKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, + uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_y_image, const uint8_t *src_u_image, const uint8_t *src_v_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t dst_width_comp, uint32_t dst_height_comp, uint32_t src_yuv_image_stride_in_bytes_comp) { + + int32_t x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int32_t y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + if ((x < dst_width_comp) && (y < dst_height_comp)) { + uint32_t src_y0_idx = y * src_yuv_image_stride_in_bytes_comp + (x << 3); + uint32_t src_y1_idx = src_y0_idx + src_yuv_image_stride_in_bytes; + uint32_t src_chroma_idx = y * src_yuv_image_stride_in_bytes + (x << 3); + + uint2 y0 = *((uint2 *)(&src_y_image[src_y0_idx])); + uint2 y1 = *((uint2 *)(&src_y_image[src_y1_idx])); + + uint2 u0 = *((uint2 *)(&src_u_image[src_chroma_idx])); + uint2 v0 = *((uint2 *)(&src_v_image[src_chroma_idx])); + + uint32_t rgb0_idx = y * dst_image_stride_in_bytes_comp + (x * 8); + uint32_t rgb1_idx = rgb0_idx + dst_image_stride_in_bytes; + + float2 cr = make_float2( 0.0000f, 1.5748f); + float2 cg = make_float2(-0.1873f, -0.4681f); + float2 cb = make_float2( 1.8556f, 0.0000f); + float3 yuv; + DUINT6 rgb0, rgb1; + float4 f; + + yuv = make_float3(hipUnpack0(y0.x), hipUnpack0(u0.x), hipUnpack0(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y0.x), hipUnpack1(u0.x), hipUnpack1(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y0.x), hipUnpack2(u0.x), hipUnpack2(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y0.x), hipUnpack3(u0.x), hipUnpack3(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[2] = hipPack(f); + + yuv = make_float3(hipUnpack0(y0.y), hipUnpack0(u0.y), hipUnpack0(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y0.y), hipUnpack1(u0.y), hipUnpack1(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb0.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y0.y), hipUnpack2(u0.y), hipUnpack2(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb0.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y0.y), hipUnpack3(u0.y), hipUnpack3(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb0.data[5] = hipPack(f); + + yuv = make_float3(hipUnpack0(y1.x), hipUnpack0(u0.x), hipUnpack0(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y1.x), hipUnpack1(u0.x), hipUnpack1(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[0] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y1.x), hipUnpack2(u0.x), hipUnpack2(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[1] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y1.x), hipUnpack3(u0.x), hipUnpack3(v0.x)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[2] = hipPack(f); + + yuv = make_float3(hipUnpack0(y1.y), hipUnpack0(u0.y), hipUnpack0(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.x = fmaf(cr.y, yuv.z, yuv.x); + f.y = fmaf(cg.x, yuv.y, yuv.x); + f.y = fmaf(cg.y, yuv.z, f.y); + f.z = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack1(y1.y), hipUnpack1(u0.y), hipUnpack1(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.w = fmaf(cr.y, yuv.z, yuv.x); + rgb1.data[3] = hipPack(f); + + f.x = fmaf(cg.x, yuv.y, yuv.x); + f.x = fmaf(cg.y, yuv.z, f.x); + f.y = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack2(y1.y), hipUnpack2(u0.y), hipUnpack2(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.z = fmaf(cr.y, yuv.z, yuv.x); + f.w = fmaf(cg.x, yuv.y, yuv.x); + f.w = fmaf(cg.y, yuv.z, f.w); + rgb1.data[4] = hipPack(f); + + f.x = fmaf(cb.x, yuv.y, yuv.x); + yuv = make_float3(hipUnpack3(y1.y), hipUnpack3(u0.y), hipUnpack3(v0.y)); + yuv.y -= 128.0f; + yuv.z -= 128.0f; + f.y = fmaf(cr.y, yuv.z, yuv.x); + f.z = fmaf(cg.x, yuv.y, yuv.x); + f.z = fmaf(cg.y, yuv.z, f.z); + f.w = fmaf(cb.x, yuv.y, yuv.x); + rgb1.data[5] = hipPack(f); + + uint2 red0, red1, green0, green1, blue0, blue1; + red0.x = hipPack(make_float4(hipUnpack0(rgb0.data[0]), hipUnpack3(rgb0.data[0]), hipUnpack2(rgb0.data[1]), hipUnpack1(rgb0.data[2]))); + red0.y = hipPack(make_float4(hipUnpack0(rgb0.data[3]), hipUnpack3(rgb0.data[3]), hipUnpack2(rgb0.data[4]), hipUnpack1(rgb0.data[5]))); + red1.x = hipPack(make_float4(hipUnpack0(rgb1.data[0]), hipUnpack3(rgb1.data[0]), hipUnpack2(rgb1.data[1]), hipUnpack1(rgb1.data[2]))); + red1.y = hipPack(make_float4(hipUnpack0(rgb1.data[3]), hipUnpack3(rgb1.data[3]), hipUnpack2(rgb1.data[4]), hipUnpack1(rgb1.data[5]))); + + green0.x = hipPack(make_float4(hipUnpack1(rgb0.data[0]), hipUnpack0(rgb0.data[1]), hipUnpack3(rgb0.data[1]), hipUnpack2(rgb0.data[2]))); + green0.y = hipPack(make_float4(hipUnpack1(rgb0.data[3]), hipUnpack0(rgb0.data[4]), hipUnpack3(rgb0.data[4]), hipUnpack2(rgb0.data[5]))); + green1.x = hipPack(make_float4(hipUnpack1(rgb1.data[0]), hipUnpack0(rgb1.data[1]), hipUnpack3(rgb1.data[1]), hipUnpack2(rgb1.data[2]))); + green1.y = hipPack(make_float4(hipUnpack1(rgb1.data[3]), hipUnpack0(rgb1.data[4]), hipUnpack3(rgb1.data[4]), hipUnpack2(rgb1.data[5]))); + + blue0.x = hipPack(make_float4(hipUnpack2(rgb0.data[0]), hipUnpack1(rgb0.data[1]), hipUnpack0(rgb0.data[2]), hipUnpack3(rgb0.data[2]))); + blue0.y = hipPack(make_float4(hipUnpack2(rgb0.data[3]), hipUnpack1(rgb0.data[4]), hipUnpack0(rgb0.data[5]), hipUnpack3(rgb0.data[5]))); + blue1.x = hipPack(make_float4(hipUnpack2(rgb1.data[0]), hipUnpack1(rgb1.data[1]), hipUnpack0(rgb1.data[2]), hipUnpack3(rgb1.data[2]))); + blue1.y = hipPack(make_float4(hipUnpack2(rgb1.data[3]), hipUnpack1(rgb1.data[4]), hipUnpack0(rgb1.data[5]), hipUnpack3(rgb1.data[5]))); + + *((uint2 *)(&dst_image_r[rgb0_idx])) = red0; + *((uint2 *)(&dst_image_r[rgb1_idx])) = red1; + *((uint2 *)(&dst_image_g[rgb0_idx])) = green0; + *((uint2 *)(&dst_image_g[rgb1_idx])) = green1; + *((uint2 *)(&dst_image_b[rgb0_idx])) = blue0; + *((uint2 *)(&dst_image_b[rgb1_idx])) = blue1; + } +} + + +/** + * @brief Converts YUV440 image to RGB planar format. + * + * This function takes a YUV440 image and converts it to RGB planar format using the ColorConvertYUV444ToRGBPlanarKernel HIP kernel. + * + * @param stream The HIP stream to be used for the kernel launch. + * @param dst_width The width of the destination RGB image. + * @param dst_height The height of the destination RGB image. + * @param dst_image_r Pointer to the destination red channel image. + * @param dst_image_g Pointer to the destination green channel image. + * @param dst_image_b Pointer to the destination blue channel image. + * @param dst_image_stride_in_bytes The stride (in bytes) of the destination image. + * @param src_yuv_image Pointer to the source YUV image. + * @param src_yuv_image_stride_in_bytes The stride (in bytes) of the source YUV image. + * @param src_u_image_offset The offset (in bytes) to the U channel in the source YUV image. + */ +void ColorConvertYUV440ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset, uint32_t src_v_image_offset) { + + int32_t local_threads_x = 16; + int32_t local_threads_y = 4; + int32_t global_threads_x = (dst_width + 7) >> 3; + int32_t global_threads_y = (dst_height + 1) >> 1; + + uint32_t dst_width_comp = (dst_width + 7) / 8; + uint32_t dst_height_comp = (dst_height + 1) / 2; + uint32_t dst_image_stride_in_bytes_comp = dst_image_stride_in_bytes * 2; + uint32_t src_yuv_image_stride_in_bytes_comp = src_yuv_image_stride_in_bytes * 2; + + ColorConvertYUV440ToRGBPlanarKernel<<(global_threads_x) / local_threads_x), ceil(static_cast(global_threads_y) / local_threads_y)), + dim3(local_threads_x, local_threads_y), 0, stream>>>(dst_width, dst_height, dst_image_r, dst_image_g, dst_image_b, + dst_image_stride_in_bytes, dst_image_stride_in_bytes_comp, src_yuv_image, src_yuv_image + src_u_image_offset, + src_yuv_image + src_v_image_offset, src_yuv_image_stride_in_bytes, + dst_width_comp, dst_height_comp, src_yuv_image_stride_in_bytes_comp); +} + __global__ void ColorConvertYUYVToRGBKernel(uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, uint32_t dst_image_stride_in_bytes_comp, const uint8_t *src_image, uint32_t src_image_stride_in_bytes, uint32_t src_image_stride_in_bytes_comp, diff --git a/projects/rocjpeg/src/rocjpeg_hip_kernels.h b/projects/rocjpeg/src/rocjpeg_hip_kernels.h index 388f8293d8..87e637ec39 100644 --- a/projects/rocjpeg/src/rocjpeg_hip_kernels.h +++ b/projects/rocjpeg/src/rocjpeg_hip_kernels.h @@ -45,6 +45,24 @@ void ColorConvertYUV444ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t ds uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset); +/** + * @brief Converts YUV440 image to RGB image. + * + * This function takes a YUV440 image and converts it to an RGB image. + * + * @param stream The HIP stream to be used for the conversion. + * @param dst_width The width of the destination RGB image. + * @param dst_height The height of the destination RGB image. + * @param dst_image Pointer to the destination RGB image buffer. + * @param dst_image_stride_in_bytes The stride (in bytes) of the destination RGB image buffer. + * @param src_yuv_image Pointer to the source YUV440 image buffer. + * @param src_yuv_image_stride_in_bytes The stride (in bytes) of the source YUV440 image buffer. + * @param src_u_image_offset The offset (in bytes) of the U component in the source YUV440 image buffer. + */ +void ColorConvertYUV440ToRGB(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset, uint32_t src_v_image_offset); + /** * @brief Converts an image in YUYV format to RGB format. * @@ -140,6 +158,43 @@ void ColorConvertYUV444ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint3 uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset); +/** + * @brief Converts YUV440 image to RGB planar format. + * + * This function takes a YUV440 image and converts it to RGB planar format. + * The resulting RGB image is stored in separate R, G, and B planes. + * + * @param stream The HIP stream to be used for the kernel execution. + * @param dst_width The width of the destination RGB image. + * @param dst_height The height of the destination RGB image. + * @param dst_image_r Pointer to the destination R plane of the RGB image. + * @param dst_image_g Pointer to the destination G plane of the RGB image. + * @param dst_image_b Pointer to the destination B plane of the RGB image. + * @param dst_image_stride_in_bytes The stride (in bytes) of the destination RGB image. + * @param src_yuv_image Pointer to the source YUV440 image. + * @param src_yuv_image_stride_in_bytes The stride (in bytes) of the source YUV440 image. + * @param src_u_image_offset The offset (in bytes) of the U plane in the source YUV440 image. + */ +void ColorConvertYUV440ToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, + uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, const uint8_t *src_yuv_image, + uint32_t src_yuv_image_stride_in_bytes, uint32_t src_u_image_offset, uint32_t src_v_image_offset); + +/** + * Converts a YUYV image to RGB planar format. + * + * This function takes a YUYV image and converts it to RGB planar format. The resulting RGB image + * is stored in separate planes for red, green, and blue channels. + * + * @param stream The HIP stream to use for the conversion. + * @param dst_width The width of the destination RGB image. + * @param dst_height The height of the destination RGB image. + * @param dst_image_r Pointer to the destination red channel plane. + * @param dst_image_g Pointer to the destination green channel plane. + * @param dst_image_b Pointer to the destination blue channel plane. + * @param dst_image_stride_in_bytes The stride (in bytes) between consecutive rows in the destination image planes. + * @param src_image Pointer to the source YUYV image. + * @param src_image_stride_in_bytes The stride (in bytes) between consecutive rows in the source image. + */ void ColorConvertYUYVToRGBPlanar(hipStream_t stream, uint32_t dst_width, uint32_t dst_height, uint8_t *dst_image_r, uint8_t *dst_image_g, uint8_t *dst_image_b, uint32_t dst_image_stride_in_bytes, const uint8_t *src_image, uint32_t src_image_stride_in_bytes); diff --git a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp index a9a7e362cc..174e86114e 100644 --- a/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp +++ b/projects/rocjpeg/src/rocjpeg_vaapi_decoder.cpp @@ -541,6 +541,10 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg surface_format = VA_RT_FORMAT_YUV444; surface_attrib.value.value.i = VA_FOURCC_444P; break; + case CSS_440: + surface_format = VA_RT_FORMAT_YUV422; + surface_attrib.value.value.i = VA_FOURCC_422V; + break; case CSS_422: surface_format = VA_RT_FORMAT_YUV422; surface_attrib.value.value.i = ROCJPEG_FOURCC_YUYV;