diff --git a/docs/conf.py b/docs/conf.py index 3e17044e14..cb3eb2c2d0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,8 +9,6 @@ import shutil from rocm_docs import ROCmDocs -shutil.copy2('../README.md','./index.md') - with open('../CMakeLists.txt', encoding='utf-8') as f: match = re.search(r'.*\bset\(VERSION\s+\"?([0-9.]+)[^0-9.]+', f.read()) if not match: diff --git a/docs/how-to/using-rocjpeg.rst b/docs/how-to/using-rocjpeg.rst index 2f32eeb275..89cb8f475d 100644 --- a/docs/how-to/using-rocjpeg.rst +++ b/docs/how-to/using-rocjpeg.rst @@ -17,24 +17,139 @@ this file in the `api` folder in the rocJPEG repository. 2. Create a decoder ==================================================== -``rocJpegCreate()`` creates an instance of the hardware JPEG decoder object and provides you -with a handle upon successful creation. The decoder handle -returned by ``rocJpegCreate()`` must be retained for the entire decode session because the -handle is passed along with the other decoding APIs. +The ``rocJpegCreate()`` function creates a JPEG decoder object and returns a handle upon successful creation. + +Below is the signature of ``rocJpegCreate()`` function: + +.. code:: cpp + + RocJpegStatus rocJpegCreate( + RocJpegBackend backend, + int device_id, + RocJpegHandle *handle); + +The API takes in the following arguments: + +* A ``RocJpegBackend`` type, which specifies the backend to use for creating a decoder handle. + Currently, the rocJPEG library only supports ``ROCJPEG_BACKEND_HARDWARE``, which creates a decoder + for baseline JPEG bitstream using VCN hardware-accelerated JPEG decoder in AMD GPUs. +* The GPU device ID for which a decoder should be created. The GPU device ID is a zero-based index, where 0 is for the first GPU on a system. +* A decoder handle, which is returned by ``rocJpegCreate()`` and must be retained for the entire decode session, + as it is passed along with the other decoding APIs. 3. Retrieve the image info ==================================================== -``rocJpegGetImageInfo()`` retrieves the image info, including channel, width and height of each component, and chroma subsampling. +``rocJpegGetImageInfo()`` retrieves the image info, including number of components, width and height of each component, and chroma subsampling. +For each image to be decoded, pass the JPEG data pointer and data length to the ``rocJpegGetImageInfo()`` function. This function is thread safe. + +Below is the signature of ``rocJpegGetImageInfo()`` function: + +.. code:: cpp + + RocJpegStatus rocJpegGetImageInfo( + RocJpegHandle handle, + const uint8_t *data, + size_t length, + uint8_t *num_components, + RocJpegChromaSubsampling *subsampling, + uint32_t *widths, + uint32_t *heights); + +One of the outputs of the ``rocJpegGetImageInfo()`` function is ``RocJpegChromaSubsampling``. This parameter is an enum type, and its enumerator +list is composed of the chroma subsampling property retrieved from the JPEG image. See the ``RocJpegChromaSubsampling`` enum below. + +.. code:: cpp + + typedef enum { + ROCJPEG_CSS_444 = 0, + ROCJPEG_CSS_440 = 1, + ROCJPEG_CSS_422 = 2, + ROCJPEG_CSS_420 = 3, + ROCJPEG_CSS_411 = 4, + ROCJPEG_CSS_400 = 5, + ROCJPEG_CSS_UNKNOWN = -1 + } RocJpegChromaSubsampling; + +.. note:: + + The VCN hardware-accelerated JPEG decoder in AMD GPUs only supports decoding JPEG images with ``ROCJPEG_CSS_444``, ``ROCJPEG_CSS_422``, + ``ROCJPEG_CSS_420``, and ``ROCJPEG_CSS_400`` chroma subsampling. 4. Decode a JPEG stream ==================================================== -``rocJpegDecode()`` decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API. -Destination buffers should be large enough to be able to store output of specified format. These buffers should be pre-allocted by the user in the device memories. -For each color plane (channel) sizes could be retrieved for image using ``rocJpegGetImageInfo()`` API -and minimum required memory buffer for each plane is plane_height * plane_pitch where plane_pitch >= plane_width for -planar output formats and plane_pitch >= plane_width * num_components for interleaved output format. +``rocJpegDecode()`` decodes single image based on the backend used to create the rocJpeg handle in rocJpegCreate API. For each image to be decoded, +pass the JPEG data pointer and data length to the ``rocJpegDecode()`` function. This function is thread safe. -5. Destroy the decoder +See the signature of this function below: + +.. code:: cpp + + RocJpegStatus rocJpegDecode( + RocJpegHandle handle, + const uint8_t *data, + size_t length, + RocJpegOutputFormat output_format, + RocJpegImage *destination); + +In the above ``rocJpegDecode()`` function, you can use the parameters ``RocJpegOutputFormat`` and ``RocJpegImage`` to set +the output behavior of the ``rocJpegDecode()`` function. The ``RocJpegImage`` structure is JPEG image descriptor used to +return the decoded output image. User must allocate device memories for each channel for this structure and pass it to the +``rocJpegDecode()`` API. This API then copies the decoded image to this struct based on the requested output format ``RocJpegOutputFormat``. +Below is the ``RocJpegImage`` structure. + +.. code:: cpp + + typedef struct { + uint8_t* channel[ROCJPEG_MAX_COMPONENT]; + uint32_t pitch[ROCJPEG_MAX_COMPONENT]; + } RocJpegImage; + +You can set the ``RocJpegOutputFormat`` parameter to one of the ``output_format`` settings below: + +.. csv-table:: + :header: "output_format", "Meaning" + + "ROCJPEG_OUTPUT_NATIVE", "Return native unchanged decoded YUV image from the VCN JPEG deocder." + "ROCJPEG_OUTPUT_YUV_PLANAR", "Return in the YUV planar format." + "ROCJPEG_OUTPUT_Y", "Return the Y component only." + "ROCJPEG_OUTPUT_RGB", "Convert to interleaved RGB." + +For example, if ``output_format`` is set to ``ROCJPEG_OUTPUT_NATIVE``, then based on the chroma subsampling of the input image, the +``rocJpegDecode()`` function does one of the following: + +* For ``ROCJPEG_CSS_444`` write Y, U, and V to first, second, and third channels of ``RocJpegImage``. +* For ``ROCJPEG_CSS_422`` write YUYV (packed) to first channel of ``RocJpegImage``. +* For ``ROCJPEG_CSS_420`` write Y to first channel and UV (interleaved) to second channel of ``RocJpegImage``. +* For ``ROCJPEG_CSS_400`` write Y to first channel of ``RocJpegImage``. + +if ``output_format`` is set to ``ROCJPEG_OUTPUT_Y`` or ``ROCJPEG_OUTPUT_RGB`` then ``rocJpegDecode()`` copies the output to first channel of ``RocJpegImage``. +Alternately, in the case of ``ROCJPEG_OUTPUT_YUV_PLANAR``, the data is written to the corresponding channels of the ``RocJpegImage`` destination structure. +The destination buffers should be large enough to be able to store output of specified format. These buffers should be +pre-allocted by the user in the device memories. For each color plane (channel), sizes could be retrieved for image using +``rocJpegGetImageInfo()`` API and minimum required memory buffer for each plane is plane_height * plane_pitch where +plane_pitch >= plane_width for planar output formats and plane_pitch >= plane_width * num_components for interleaved output format. + +As mentioned above, you can use the retrieved parameters, ``num_components``, ``subsampling``, ``widths``, and ``heights`` from the ``rocJpegGetImageInfo()`` API to calculate +the required size for the output buffers for a single decode JPEG. To optimally set the destination parameter for the ``rocJpegDecode()`` function, use the following guidelines: + +.. csv-table:: + :header: "output_format", "chroma subsampling", "destination.pitch[c] should be atleast:", "destination.channel[c] should be atleast:" + + "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_444", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[c] = destination.pitch[c] * heights[0] for c = 0, 1, 2" + "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_422", "destination.pitch[0] = widths[0] * 2", "destination.channel[0] = destination.pitch[0] * heights[0]" + "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_420", "destination.pitch[1] = destination.pitch[0] = widths[0]", "destination.channel[0] = destination.pitch[0] * heights[0], destination.channel[1] = destination.pitch[1] * (heights[0] >> 1)" + "ROCJPEG_OUTPUT_NATIVE", "ROCJPEG_CSS_400", "destination.pitch[0] = widhts[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" + "ROCJPEG_OUTPUT_YUV_PLANAR", "ROCJPEG_CSS_444, ROCJPEG_CSS_422, ROCJPEG_CSS_420", "destination.pitch[c] = widths[c] for c = 0, 1, 2", "destination.channel[c] = destination.pitch[c] * heights[c] for c = 0, 1, 2" + "ROCJPEG_OUTPUT_YUV_PLANAR", "ROCJPEG_CSS_400", "destination.pitch[0] = widhts[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" + "ROCJPEG_OUTPUT_Y", "Any of the supported chroma subsampling", "destination.pitch[0] = widhts[0]", "destination.channel[0] = destination.pitch[0] * heights[0]" + "ROCJPEG_OUTPUT_RGB", "Any of the supported chroma subsampling", "destination.pitch[0] = widhts[0] * 3", "destination.channel[0] = destination.pitch[0] * heights[0]" + +5. Destroy the decoder ==================================================== -You must call the ``rocJpegDestroy()`` to destroy the session and free up resources. \ No newline at end of file +You must call the ``rocJpegDestroy()`` to destroy the session and free up resources. + +6. Get Error name +==================================================== + +You can call ``rocJpegGetErrorName`` to retrieve the name of the specified error code in text form returned from rocJPEG APIs. \ No newline at end of file diff --git a/docs/install/install.rst b/docs/install/install.rst index b944f6282b..738f521581 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -230,9 +230,8 @@ architecture. "gfx908 - MI1xx", "VCN 2.5.0", "2", "1", "4096, 4096" "gfx90a - MI2xx", "VCN 2.6.0", "2", "1", "4096, 4096" - "gfx940 - MI3xx", "VCN 3.0", "3", "8", "4096, 4096" - "gfx941 - MI3xx", "VCN 3.0", "4", "8", "4096, 4096" - "gfx942 - MI3xx", "VCN 3.0", "4", "8", "4096, 4096" + "gfx940, gfx942 - MI300A", "VCN 3.0", "3", "8", "4096, 4096" + "gfx941, gfx942 - MI300X", "VCN 3.0", "4", "8", "4096, 4096" "gfx1030, gfx1031, gfx1032 - Navi2x", "VCN 3.x", "2", "1", "4096, 4096" "gfx1100, gfx1102 - Navi3x", "VCN 4.0", "2", "1", "4096, 4096" "gfx1101 - Navi3x", "VCN 4.0", "1", "1", "4096, 4096"