From 7fafba6e700dffc0b9e4fe10a55329975bc5ebff Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Mon, 22 Jul 2024 12:12:35 -0400 Subject: [PATCH] CMakeLists/README/DOC - updates and fixes (#43) * CMakeLists/README/DOC - updates and fixes * Fix few typos in the sample code in doc * Add a sample code snippet for decoding a batch of jpeg images --- CMakeLists.txt | 14 +- README.md | 4 +- docker/rocJPEG-on-ubuntu20.dockerfile | 2 +- docker/rocJPEG-on-ubuntu22.dockerfile | 2 +- docs/how-to/using-rocjpeg.rst | 218 +++++++++++++++++++++++++- docs/install/install.rst | 21 +-- docs/install/quick-start.rst | 2 +- 7 files changed, 230 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be7db09116..e875d1cd7c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ if(HIP_FOUND AND Libva_FOUND) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_LIBRARY}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${LIBVA_DRM_LIBRARY}) + #filesystem: c++ compilers less than equal to 8.5 need explicit link with stdc++fs + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL "8.5") + set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs) + endif() + # local include files include_directories(api src) # source files @@ -184,12 +189,13 @@ if(HIP_FOUND AND Libva_FOUND) file(READ "/etc/os-release" OS_RELEASE) string(REGEX MATCH "22.04" UBUNTU_22_FOUND ${OS_RELEASE}) string(REGEX MATCH "SLES" SLES_FOUND ${OS_RELEASE}) + string(REGEX MATCH "Mariner" MARINER_FOUND ${OS_RELEASE}) # Set the dependent packages set(rocJPEG_DEBIAN_PACKAGE_LIST "rocm-hip-runtime, libva2, libdrm-amdgpu1, mesa-amdgpu-va-drivers") set(rocJPEG_RPM_PACKAGE_LIST "rocm-hip-runtime, libva, libdrm-amdgpu, mesa-amdgpu-va-drivers, mesa-amdgpu-dri-drivers") - if(SLES_FOUND) - set(rocDecode_RPM_PACKAGE_LIST "rocm-hip-runtime, libva2, libdrm-amdgpu, mesa-amdgpu-va-drivers, mesa-amdgpu-dri-drivers") + if(SLES_FOUND OR MARINER_FOUND) + set(rocJPEG_RPM_PACKAGE_LIST "rocm-hip-runtime, libva2, libdrm-amdgpu, mesa-amdgpu-va-drivers, mesa-amdgpu-dri-drivers") endif() set(rocJPEG_DEBIAN_DEV_PACKAGE_LIST "rocm-hip-runtime-dev, libva-dev, pkg-config") if(UBUNTU_22_FOUND) @@ -230,22 +236,18 @@ if(HIP_FOUND AND Libva_FOUND) set(CPACK_RPM_COMPONENT_INSTALL ON) set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}") set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core, ${rocJPEG_RPM_PACKAGE_LIST}") - set(CPACK_RPM_RUNTIME_PACKAGE_PROVIDES "${PROJECT_NAME}") set(CPACK_RPM_RUNTIME_PACKAGE_OBSOLETES "${PROJECT_NAME}") set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel") set(CPACK_RPM_DEV_PACKAGE_REQUIRES "rocm-core, ${PROJECT_NAME}, ${rocJPEG_RPM_DEV_PACKAGE_LIST}") - set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-devel") set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-devel") set(CPACK_RPM_PACKAGE_LICENSE "MIT" ) # RPM package specific variable for ASAN set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" ) set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan, ${rocJPEG_RPM_PACKAGE_LIST}" ) - set(CPACK_RPM_ASAN_PACKAGE_PROVIDES "${PROJECT_NAME}-asan") set(CPACK_RPM_ASAN_PACKAGE_OBSOLETES "${PROJECT_NAME}-asan") # RPM package specific variable for Test set(CPACK_RPM_TEST_PACKAGE_NAME "${PROJECT_NAME}-test" ) set(CPACK_RPM_TEST_PACKAGE_REQUIRES "rocm-core, ${CPACK_RPM_DEV_PACKAGE_NAME}" ) - set(CPACK_RPM_TEST_PACKAGE_PROVIDES "${PROJECT_NAME}-test") set(CPACK_RPM_TEST_PACKAGE_OBSOLETES "${PROJECT_NAME}-test") if(NOT ROCM_DEP_ROCMCORE) diff --git a/README.md b/README.md index 5429c9fb0e..d4cc4baff9 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG AP > [!IMPORTANT] > `gfx908` or higher GPU required -* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm +* Install ROCm `6.3.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html): Required usecase - rocm > [!IMPORTANT] > `sudo amdgpu-install --usecase=rocm` @@ -87,7 +87,7 @@ The installation process uses the following steps: * [ROCm-supported hardware](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/reference/system-requirements.html) install verification -* Install ROCm `6.1.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm` +* Install ROCm `6.3.0` or later with [amdgpu-install](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/how-to/amdgpu-install.html) with `--usecase=rocm` * Use either [Package install](#package-install) or [Source install](#source-install) as described below. diff --git a/docker/rocJPEG-on-ubuntu20.dockerfile b/docker/rocJPEG-on-ubuntu20.dockerfile index 82d1b26be3..5464705463 100644 --- a/docker/rocJPEG-on-ubuntu20.dockerfile +++ b/docker/rocJPEG-on-ubuntu20.dockerfile @@ -7,7 +7,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g # install ROCm RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \ - wget https://repo.radeon.com/amdgpu-install/6.3/ubuntu/focal/amdgpu-install_6.3.60100-1_all.deb && \ + wget https://repo.radeon.com/amdgpu-install/6.3/ubuntu/focal/amdgpu-install_6.3.60300-1_all.deb && \ sudo apt-get install ./amdgpu-install_6.3.60100-1_all.deb && \ sudo amdgpu-install -y --usecase=rocm diff --git a/docker/rocJPEG-on-ubuntu22.dockerfile b/docker/rocJPEG-on-ubuntu22.dockerfile index 3cfab872d0..bc81523890 100644 --- a/docker/rocJPEG-on-ubuntu22.dockerfile +++ b/docker/rocJPEG-on-ubuntu22.dockerfile @@ -7,7 +7,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y install gcc g++ cmake pkg-config g # install ROCm RUN DEBIAN_FRONTEND=noninteractive apt-get -y install initramfs-tools libnuma-dev wget keyboard-configuration && \ - wget https://repo.radeon.com/amdgpu-install/6.3/ubuntu/jammy/amdgpu-install_6.3.60100-1_all.deb && \ + wget https://repo.radeon.com/amdgpu-install/6.3/ubuntu/jammy/amdgpu-install_6.3.60300-1_all.deb && \ sudo apt-get install ./amdgpu-install_6.3.60100-1_all.deb && \ sudo amdgpu-install -y --usecase=rocm diff --git a/docs/how-to/using-rocjpeg.rst b/docs/how-to/using-rocjpeg.rst index c60db2eb1b..01ab2945e4 100644 --- a/docs/how-to/using-rocjpeg.rst +++ b/docs/how-to/using-rocjpeg.rst @@ -204,6 +204,10 @@ The ``rocJpegDecodeBatched()`` function takes the following arguments: To use the ``rocJpegDecodeBatched()`` function, you need to provide the appropriate rocJPEG handles, stream handles, decode parameters, and destination images. The function will decode the batch of JPEG images and store the decoded images in the ``destinations`` array. Remember to allocate device memories for each channel of the destination images and pass them to the ``rocJpegDecodeBatched()`` API. The API will then copy the decoded images to the destination images based on the requested output format specified in the ``RocJpegDecodeParams``. +The ``rocJpegDecodeBatched()`` function provides optimal performance on ASICs with multiple JPEG cores, such as the MI300 series. It efficiently submits a batch of JPEG streams for decoding based on the available JPEG cores, resulting in better performance compared +to the single JPEG decode API ``rocJpegDecode``. To achieve the best performance, it is recommended to choose a batch size that is a multiple of the available JPEG cores. For example, the MI300X +has 32 independent JPEG cores, so a batch size that is a multiple of 32 will provide optimal performance. + 8. Destroy the decoder ==================================================== @@ -286,7 +290,7 @@ Finally, the code decodes the JPEG stream by calling the ``rocJpegDecode()`` fun uint32_t widths[ROCJPEG_MAX_COMPONENT] = {}; uint32_t heights[ROCJPEG_MAX_COMPONENT] = {}; - status = rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream_handle, &num_components, &subsampling, widths, heights); + status = rocJpegGetImageInfo(handle, rocjpeg_stream_handle, &num_components, &subsampling, widths, heights); if (status != ROCJPEG_STATUS_SUCCESS) { std::cerr << "Failed to get image info with error code: " << rocJpegGetErrorName(status) << std::endl; rocJpegStreamDestroy(rocjpeg_stream_handle); @@ -297,7 +301,7 @@ Finally, the code decodes the JPEG stream by calling the ``rocJpegDecode()`` fun // Allocate device memory for the decoded output image RocJpegImage output_image = {}; RocJpegDecodeParams decode_params = {}; - RocJpegDecodeParams.output_format = ROCJPEG_OUTPUT_NATIVE; + decode_params.output_format = ROCJPEG_OUTPUT_NATIVE; // For this sample assuming the input image has a YUV420 chroma subsampling. // For YUV420 subsampling, the native decoded output image would be NV12 (i.e., the rocJPegDecode API copies Y to first channel and UV (interleaved) to second channel of RocJpegImage) @@ -321,7 +325,7 @@ Finally, the code decodes the JPEG stream by calling the ``rocJpegDecode()`` fun } // Decode the JPEG stream - status = rocJpegDecode(rocjpeg_handle, rocjpeg_stream_handle, &decode_params, &output_image); + status = rocJpegDecode(handle, rocjpeg_stream_handle, &decode_params, &output_image); if (status != ROCJPEG_STATUS_SUCCESS) { std::cerr << "Failed to decode JPEG stream with error code: " << rocJpegGetErrorName(status) << std::endl; hipFree((void *)output_image.channel[0]); @@ -342,3 +346,211 @@ Finally, the code decodes the JPEG stream by calling the ``rocJpegDecode()`` fun return EXIT_SUCCESS; } + +12. Sample code snippet for decoding a batch of JPEG streams using the rocJPEG APIs +==================================================== + +The code snippet provided demonstrates how to decode a batch of JPEG streams using the rocJPEG library. + +.. code:: cpp + + #include + #include + #include + #include + #include "rocjpeg.h" + + int main() { + // the input path of a folder containing JPEG files + // note: replace the "path_to_a_folder_of_JPEG_files" with an actual path of a folder + std::string input_path = "path_to_a_folder_of_JPEG_files"; + + // vector of string to store the paths of the JPEG files + std::vector file_paths = {}; + + if (std::filesystem::is_directory(input_path)) { + for (const auto &entry : std::filesystem::recursive_directory_iterator(input_path)) { + if (std::filesystem::is_regular_file(entry)) { + file_paths.push_back(entry.path().string()); + } + } + } else { + std::cerr << "ERROR: the input path is not a directoy!" << std::endl; + return EXIT_FAILURE; + } + + // Initialize rocJPEG handle + RocJpegHandle handle; + RocJpegStatus status = rocJpegCreate(ROCJPEG_BACKEND_HARDWARE, 0, &handle); + if (status != ROCJPEG_STATUS_SUCCESS) { + std::cerr << "Failed to create rocJPEG handle with error code: " << rocJpegGetErrorName(status) << std::endl; + return EXIT_FAILURE; + } + + int batch_size = 32; + batch_size = std::min(batch_size, static_cast(file_paths.size())); + + std::vector rocjpeg_stream_handles; + rocjpeg_stream_handles.resize(batch_size); + // Create stream handles of batch_size + for (auto i = 0; i < batch_size; i++) { + status = rocJpegStreamCreate(&rocjpeg_stream_handles[i]); + if (status != ROCJPEG_STATUS_SUCCESS) { + std::cerr << "Failed to create rocJPEG stream handle with error code: " << rocJpegGetErrorName(status) << std::endl; + rocJpegDestroy(handle); + for (auto j = 0; j < i; j++) { + rocJpegStreamDestroy(rocjpeg_stream_handles[j]); + } + return EXIT_FAILURE; + } + } + + // Vector to store batch of raw JPEG data from files + std::vector> batch_images; + batch_images.resize(batch_size); + + // Vector to store widths of JPEG images + std::vector> widths; + widths.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + + // Vector to store heights of JPEG images + std::vector> heights; + heights.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + + // Vector to store chroma subsamplings of JPEG images + std::vector subsamplings; + subsamplings.resize(batch_size); + + // Vector to store output images + std::vector output_images; + output_images.resize(batch_size); + + uint8_t num_components; + RocJpegDecodeParams decode_params = {}; + decode_params.output_format = ROCJPEG_OUTPUT_NATIVE; + + // Start reading images from files and prepare a batch of JPEG streams for decoding + for (int i = 0; i < file_paths.size(); i += batch_size) { + int batch_end = std::min(i + batch_size, static_cast(file_paths.size())); + for (int j = i; j < batch_end; j++) { + int index = j - i; + // Read an image from disk + std::ifstream input(file_paths[j].c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if (!(input.is_open())) { + std::cerr << "ERROR: Cannot open image: " << file_paths[j] << std::endl; + rocJpegDestroy(handle); + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + return EXIT_FAILURE; + } + // Get the size + std::streamsize file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // Resize if buffer is too small + if (batch_images[index].size() < file_size) { + batch_images[index].resize(file_size); + } + if (!input.read(batch_images[index].data(), file_size)) { + std::cerr << "ERROR: Cannot read from file: " << file_paths[j] << std::endl; + rocJpegDestroy(handle); + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + return EXIT_FAILURE; + } + + status = rocJpegStreamParse(reinterpret_cast(batch_images[index].data()), file_size, rocjpeg_stream_handles[index]); + if (status != ROCJPEG_STATUS_SUCCESS) { + std::cerr << "Failed to parse a JPEG stream with error code: " << rocJpegGetErrorName(status) << std::endl; + rocJpegDestroy(handle); + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + return EXIT_FAILURE; + } + + status = rocJpegGetImageInfo(handle, rocjpeg_stream_handles[index], &num_components, &subsamplings[index], widths[index].data(), heights[index].data()); + if (status != ROCJPEG_STATUS_SUCCESS) { + std::cerr << "Failed to get image info with error code: " << rocJpegGetErrorName(status) << std::endl; + rocJpegDestroy(handle); + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + return EXIT_FAILURE; + } + + // Allocate memory for each channel of RocJpegImage + // For this sample assuming the all the input images have a YUV420 chroma subsampling (i.e., subsamplings[index] = ROCJPEG_CSS_420) + // For YUV420 subsampling, the native decoded output image would be NV12 (i.e., the rocJPegDecodeBatched API copies Y to first channel + // and UV (interleaved) to second channel of RocJpegImage for each image in the batch) + output_images[index].pitch[1] = output_images[index].pitch[0] = widths[index][0]; + hipError_t hip_status; + if (output_images[index].channel[0] != nullptr) { + hipFree((void *)output_images[index].channel[0]); + output_images[index].channel[0] = nullptr; + } + // Allocate device memory for the first channel (Y) + hip_status = hipMalloc(&output_images[index].channel[0], output_images[index].pitch[0] * heights[index][0]); + if (hip_status != hipSuccess) { + std::cerr << "Failed to allocate device memory for the first channel" << std::endl; + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + rocJpegDestroy(handle); + return EXIT_FAILURE; + } + + if (output_images[index].channel[1] != nullptr) { + hipFree((void *)output_images[index].channel[1]); + output_images[index].channel[1] = nullptr; + } + // Allocate device memory for the second channel (UV) + hip_status = hipMalloc(&output_images[index].channel[1], output_images[index].pitch[1] * (heights[index][0] >> 1)); + if (hip_status != hipSuccess) { + std::cerr << "Failed to allocate device memory for the second channel" << std::endl; + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + rocJpegDestroy(handle); + return EXIT_FAILURE; + } + } + int current_batch_size = batch_end - i; + status = rocJpegDecodeBatched(handle, rocjpeg_stream_handles.data(), current_batch_size, &decode_params, output_images.data()); + if (status != ROCJPEG_STATUS_SUCCESS) { + std::cerr << "Failed to decode a batch of JPEG streams with error code: " << rocJpegGetErrorName(status) << std::endl; + for (int b = 0; b < batch_size; b++) { + hipFree((void *)output_images[b].channel[0]); + hipFree((void *)output_images[b].channel[1]); + } + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + rocJpegDestroy(handle); + return EXIT_FAILURE; + } + // Perform additional post-processing on the decoded image or optionally save it + // ... + + // Clear the batch_images vector after processing each batch + for (int j = i; j < batch_end; j++) { + batch_images[j - i].clear(); + } + } + + // Clean up resources + for (auto& it : output_images) { + for (int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) { + if (it.channel[i] != nullptr) { + hipFree((void *)it.channel[i]); + it.channel[i] = nullptr; + } + } + } + rocJpegDestroy(handle); + for (auto& it : rocjpeg_stream_handles) { + rocJpegStreamDestroy(it); + } + return EXIT_SUCCESS; + } \ No newline at end of file diff --git a/docs/install/install.rst b/docs/install/install.rst index 3b45a27ee1..e32f048351 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -9,22 +9,6 @@ Installation rocJPEG is a high performance JPEG decode SDK for AMD GPUs. Using the rocJPEG API, you can access the JPEG decoding features available on your GPU. -Tested configurations -======================================== - -* Linux - - * Ubuntu: 20.04/22.04 - * RHEL: 8/9 - * SLES: 15-SP5 - -* ROCm - - * rocm-core: 6.3.0.60300-14317 - * amdgpu-core: 6.3.60300-1798298 - -* rocJPEG Setup Script: V1.0 - Supported JPEG chroma subsampling ======================================== @@ -136,7 +120,7 @@ system. Package install auto installs all dependencies. -* Runtime package: ``rocjpeg`` only provides the rocjpeg library ``librocdecode.so`` +* Runtime package: ``rocjpeg`` only provides the rocjpeg library ``librocjpeg.so`` * Development package: ``rocjpeg-dev``or ``rocjpeg-devel`` provides the library, header files, and samples * Test package: ``rocjpeg-test`` provides CTest to verify installation @@ -240,5 +224,4 @@ architecture. "gfx940, gfx942 - MI300A", "VCN 3.0", "24", "16384, 16384" "gfx941, gfx942 - MI300X", "VCN 3.0", "32", "16384, 16384" "gfx1030, gfx1031, gfx1032 - Navi2x", "VCN 3.x", "1", "16384, 16384" - "gfx1100, gfx1102 - Navi3x", "VCN 4.0", "1", "16384, 16384" - "gfx1101 - Navi3x", "VCN 4.0", "1", "16384, 16384" + "gfx1100, gfx1101, gfx1102 - Navi3x", "VCN 4.0", "1", "16384, 16384" \ No newline at end of file diff --git a/docs/install/quick-start.rst b/docs/install/quick-start.rst index 42ff0451c4..40837d8756 100644 --- a/docs/install/quick-start.rst +++ b/docs/install/quick-start.rst @@ -8,7 +8,7 @@ rocJPEG quick-start installation To install the rocJPEG runtime with minimum requirements, follow these steps: -1. Install core ROCm components (ROCm 6.1.0 or later) using the +1. Install core ROCm components (ROCm 6.3.0 or later) using the :doc:`native package manager ` installation instructions.