From 9394c3cea9e32a23a6561cf12a4229c94de1d8c7 Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Mon, 25 Nov 2024 20:40:50 -0500 Subject: [PATCH] Update samples (#82) * Update samples * Simplify the arguments of the DecodeImages function by grouping some of them into a struct. * Modify the logic for selecting the valid images per batch * Modify the logic for selecting the valid images per batch for jpegDecodeBatched sample too [ROCm/rocjpeg commit: a4f3daef1e47810527c0700f4799b1fa87cf55a0] --- projects/rocjpeg/CHANGELOG.md | 1 + projects/rocjpeg/CMakeLists.txt | 2 +- projects/rocjpeg/samples/CMakeLists.txt | 10 +- projects/rocjpeg/samples/README.md | 4 +- .../samples/jpegDecodeBatched/README.md | 2 +- .../jpegDecodeBatched/jpegdecodebatched.cpp | 115 +++---- .../jpegdecodemultithreads.cpp | 270 --------------- .../CMakeLists.txt | 4 +- .../README.md | 9 +- .../samples/jpegDecodePerf/jpegdecodeperf.cpp | 311 ++++++++++++++++++ .../rocjpeg/samples/rocjpeg_samples_utils.h | 76 ++++- projects/rocjpeg/test/CMakeLists.txt | 8 +- 12 files changed, 443 insertions(+), 369 deletions(-) delete mode 100644 projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp rename projects/rocjpeg/samples/{jpegDecodeMultiThreads => jpegDecodePerf}/CMakeLists.txt (96%) rename projects/rocjpeg/samples/{jpegDecodeMultiThreads => jpegDecodePerf}/README.md (65%) create mode 100644 projects/rocjpeg/samples/jpegDecodePerf/jpegdecodeperf.cpp diff --git a/projects/rocjpeg/CHANGELOG.md b/projects/rocjpeg/CHANGELOG.md index b4546e1dca..69e558d708 100644 --- a/projects/rocjpeg/CHANGELOG.md +++ b/projects/rocjpeg/CHANGELOG.md @@ -9,6 +9,7 @@ Documentation for rocJPEG is available at * AMD Clang++ is now the default CXX compiler. * `rocJPEG-setup.py` setup script updates to common package install: Setup no longer installs public compiler package. +* The jpegDecodeMultiThreads sample has been renamed to jpegDecodePerf, and batch decoding has been added to this sample instead of single image decoding for improved performance. ### Removed diff --git a/projects/rocjpeg/CMakeLists.txt b/projects/rocjpeg/CMakeLists.txt index 09a4e3237f..193e729f27 100644 --- a/projects/rocjpeg/CMakeLists.txt +++ b/projects/rocjpeg/CMakeLists.txt @@ -165,7 +165,7 @@ if(HIP_FOUND AND Libva_FOUND) install(DIRECTORY cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} COMPONENT dev) install(FILES samples/jpegDecode/CMakeLists.txt samples/jpegDecode/jpegdecode.cpp samples/jpegDecode/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecode COMPONENT dev) - install(FILES samples/jpegDecodeMultiThreads/CMakeLists.txt samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp samples/jpegDecodeMultiThreads/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodeMultiThreads COMPONENT dev) + install(FILES samples/jpegDecodePerf/CMakeLists.txt samples/jpegDecodePerf/jpegdecodeperf.cpp samples/jpegDecodePerf/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodePerf COMPONENT dev) install(FILES samples/jpegDecodeBatched/CMakeLists.txt samples/jpegDecodeBatched/jpegdecodebatched.cpp samples/jpegDecodeBatched/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodeBatched COMPONENT dev) install(FILES samples/rocjpeg_samples_utils.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev) install(DIRECTORY data/images DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/ COMPONENT dev) diff --git a/projects/rocjpeg/samples/CMakeLists.txt b/projects/rocjpeg/samples/CMakeLists.txt index cf37af05b6..d9c3a09b92 100644 --- a/projects/rocjpeg/samples/CMakeLists.txt +++ b/projects/rocjpeg/samples/CMakeLists.txt @@ -84,14 +84,14 @@ add_test( add_test( NAME - jpeg-decode-threads-fmt-native + jpeg-decode-perf-fmt-native COMMAND "${CMAKE_CTEST_COMMAND}" - --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecodeMultiThreads" - "${CMAKE_CURRENT_BINARY_DIR}/jpegDecodeMultiThreads" + --build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecodePerf" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecodePerf" --build-generator "${CMAKE_GENERATOR}" - --test-command "jpegdecodemultithreads" - -i ${CMAKE_SOURCE_DIR}/data/images/ -t 2 + --test-command "jpegdecodeperf" + -i ${CMAKE_SOURCE_DIR}/data/images/ ) add_test( diff --git a/projects/rocjpeg/samples/README.md b/projects/rocjpeg/samples/README.md index 8aac29ed1b..a5c56d0bce 100644 --- a/projects/rocjpeg/samples/README.md +++ b/projects/rocjpeg/samples/README.md @@ -10,6 +10,6 @@ The jpeg decode sample illustrates decoding a JPEG images using rocJPEG library The jpeg decode bacthed sample illustrates decoding JPEG images by batches of specified size using rocJPEG library to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file. -## [JPEG decode multi-threads](jpegDecodeMultiThreads) +## [JPEG decode perf](jpegDecodePerf) -The jpeg decode multi threads sample illustrates decoding JPEG images using rocJPEG library with multiple threads to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file. \ No newline at end of file +The jpeg decode perf sample illustrates decoding JPEG images by batches of specified size with multiple threads using rocJPEG library to achieve optimal performance. The individual decoded images can be retrieved in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file. \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecodeBatched/README.md b/projects/rocjpeg/samples/jpegDecodeBatched/README.md index 2b97d8f309..3b29852da7 100644 --- a/projects/rocjpeg/samples/jpegDecodeBatched/README.md +++ b/projects/rocjpeg/samples/jpegDecodeBatched/README.md @@ -24,5 +24,5 @@ make -j -o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]> -d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) - [optional - default: 0]> -crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]> - -b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 2]> + -b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 1]> ``` \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecodeBatched/jpegdecodebatched.cpp b/projects/rocjpeg/samples/jpegDecodeBatched/jpegdecodebatched.cpp index a278a555f9..2d75662595 100644 --- a/projects/rocjpeg/samples/jpegDecodeBatched/jpegdecodebatched.cpp +++ b/projects/rocjpeg/samples/jpegDecodeBatched/jpegdecodebatched.cpp @@ -51,18 +51,16 @@ int main(int argc, char **argv) { RocJpegDecodeParams decode_params = {}; RocJpegUtils rocjpeg_utils; std::vector base_file_names; - std::vector bad_image_indices; - std::vector valid_rocjpeg_stream_handles; - std::vector valid_subsamplings; - std::vector> valid_widths; - std::vector> valid_heights; - std::vector> valid_prior_channel_sizes; - std::vector valid_output_images; - std::vector valid_base_file_names; + std::vector rocjpeg_stream_handles_for_current_batch; + std::vector temp_widths(ROCJPEG_MAX_COMPONENT, 0); + std::vector temp_heights(ROCJPEG_MAX_COMPONENT, 0); + RocJpegChromaSubsampling temp_subsampling; + std::string temp_base_file_name; uint64_t num_bad_jpegs = 0; uint64_t num_jpegs_with_411_subsampling = 0; uint64_t num_jpegs_with_unknown_subsampling = 0; uint64_t num_jpegs_with_unsupported_resolution = 0; + int current_batch_size = 0; RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, nullptr, &batch_size, argc, argv); @@ -96,20 +94,14 @@ int main(int argc, char **argv) { heights.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); subsamplings.resize(batch_size); base_file_names.resize(batch_size); - valid_rocjpeg_stream_handles.resize(batch_size); - valid_output_images.resize(batch_size); - valid_prior_channel_sizes.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); - valid_widths.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); - valid_heights.resize(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); - valid_subsamplings.resize(batch_size); - valid_base_file_names.resize(batch_size); + rocjpeg_stream_handles_for_current_batch.resize(batch_size); std::cout << "Decoding started, please wait! ... " << std::endl; for (int i = 0; i < file_paths.size(); i += batch_size) { int batch_end = std::min(i + batch_size, static_cast(file_paths.size())); for (int j = i; j < batch_end; j++) { int index = j - i; - base_file_names[index] = file_paths[j].substr(file_paths[j].find_last_of("/\\") + 1); + temp_base_file_name = file_paths[j].substr(file_paths[j].find_last_of("/\\") + 1); // Read an image from disk. std::ifstream input(file_paths[j].c_str(), std::ios::in | std::ios::binary | std::ios::ate); if (!(input.is_open())) { @@ -131,7 +123,6 @@ int main(int argc, char **argv) { RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast(batch_images[index].data()), file_size, rocjpeg_stream_handles[index]); if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { if (is_dir) { - bad_image_indices.push_back(index); num_bad_jpegs++; std::cerr << "Skipping decoding input file: " << file_paths[j] << std::endl; continue; @@ -141,16 +132,15 @@ int main(int argc, char **argv) { } } - CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream_handles[index], &num_components, &subsamplings[index], widths[index].data(), heights[index].data())); + CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream_handles[index], &num_components, &temp_subsampling, temp_widths.data(), temp_heights.data())); if (roi_width > 0 && roi_height > 0 && roi_width <= widths[index][0] && roi_height <= heights[index][0]) { is_roi_valid = true; } - rocjpeg_utils.GetChromaSubsamplingStr(subsamplings[index], chroma_sub_sampling); - if (widths[index][0] < 64 || heights[index][0] < 64) { + rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling); + if (temp_widths[0] < 64 || temp_heights[0] < 64) { if (is_dir) { - bad_image_indices.push_back(index); num_jpegs_with_unsupported_resolution++; continue; } else { @@ -159,83 +149,56 @@ int main(int argc, char **argv) { } } - if (subsamplings[index] == ROCJPEG_CSS_411 || subsamplings[index] == ROCJPEG_CSS_UNKNOWN) { + if (temp_subsampling == ROCJPEG_CSS_411 || temp_subsampling == ROCJPEG_CSS_UNKNOWN) { if (is_dir) { - bad_image_indices.push_back(index); - if (subsamplings[index] == ROCJPEG_CSS_411) { - num_jpegs_with_411_subsampling++; - } - if (subsamplings[index] == ROCJPEG_CSS_UNKNOWN) { - num_jpegs_with_unknown_subsampling++; - } - continue; + if (temp_subsampling == ROCJPEG_CSS_411) { + num_jpegs_with_411_subsampling++; + } + if (temp_subsampling == ROCJPEG_CSS_UNKNOWN) { + num_jpegs_with_unknown_subsampling++; + } + continue; } else { std::cerr << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl; return EXIT_FAILURE; } } - if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, subsamplings[index], widths[index].data(), heights[index].data(), num_channels, output_images[index], channel_sizes)) { + if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, temp_subsampling, temp_widths.data(), temp_heights.data(), num_channels, output_images[current_batch_size], channel_sizes)) { std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl; return EXIT_FAILURE; } // allocate memory for each channel and reuse them if the sizes remain unchanged for a new image. for (int n = 0; n < num_channels; n++) { - if (prior_channel_sizes[index][n] != channel_sizes[n]) { - if (output_images[index].channel[n] != nullptr) { - CHECK_HIP(hipFree((void *)output_images[index].channel[n])); - output_images[index].channel[n] = nullptr; + if (prior_channel_sizes[current_batch_size][n] != channel_sizes[n]) { + if (output_images[current_batch_size].channel[n] != nullptr) { + CHECK_HIP(hipFree((void *)output_images[current_batch_size].channel[n])); + output_images[current_batch_size].channel[n] = nullptr; } - CHECK_HIP(hipMalloc(&output_images[index].channel[n], channel_sizes[n])); - prior_channel_sizes[index][n] = channel_sizes[n]; + CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n], channel_sizes[n])); + prior_channel_sizes[current_batch_size][n] = channel_sizes[n]; } } - } - int current_batch_size = batch_end - i - bad_image_indices.size(); - - // Select valid images for decoding - if (current_batch_size > 0) { - if (!bad_image_indices.empty()) { - // Iterate through the batch images and select only the valid ones - int valid_idx = 0; - for (int idx = 0; idx < batch_size; idx++) { - // Check if the current image index is not in the list of bad image indices - if (std::find(bad_image_indices.begin(), bad_image_indices.end(), idx) == bad_image_indices.end()) { - // Add the valid image index to the corresponding vectors - valid_rocjpeg_stream_handles[valid_idx] = rocjpeg_stream_handles[idx]; - valid_subsamplings[valid_idx] = subsamplings[idx]; - valid_widths[valid_idx] = widths[idx]; - valid_heights[valid_idx] = heights[idx]; - valid_prior_channel_sizes[valid_idx] = prior_channel_sizes[idx]; - valid_output_images[valid_idx] = output_images[idx]; - valid_base_file_names[valid_idx] = base_file_names[idx]; - valid_idx++; - } - } - } else { - // If there are no bad images, select all the batch images - valid_rocjpeg_stream_handles = rocjpeg_stream_handles; - valid_subsamplings = subsamplings; - valid_widths = widths; - valid_heights = heights; - valid_prior_channel_sizes = prior_channel_sizes; - valid_output_images = output_images; - valid_base_file_names = base_file_names; - } + rocjpeg_stream_handles_for_current_batch[current_batch_size] = rocjpeg_stream_handles[index]; + subsamplings[current_batch_size] = temp_subsampling; + widths[current_batch_size] = temp_widths; + heights[current_batch_size] = temp_heights; + base_file_names[current_batch_size] = temp_base_file_name; + current_batch_size++; } double time_per_batch_in_milli_sec = 0; if (current_batch_size > 0) { auto start_time = std::chrono::high_resolution_clock::now(); - CHECK_ROCJPEG(rocJpegDecodeBatched(rocjpeg_handle, valid_rocjpeg_stream_handles.data(), current_batch_size, &decode_params, valid_output_images.data())); + CHECK_ROCJPEG(rocJpegDecodeBatched(rocjpeg_handle, rocjpeg_stream_handles_for_current_batch.data(), current_batch_size, &decode_params, output_images.data())); auto end_time = std::chrono::high_resolution_clock::now(); time_per_batch_in_milli_sec = std::chrono::duration(end_time - start_time).count(); } double image_size_in_mpixels = 0; for (int b = 0; b < current_batch_size; b++) { - image_size_in_mpixels += (static_cast(valid_widths[b][0]) * static_cast(valid_heights[b][0]) / 1000000); + image_size_in_mpixels += (static_cast(widths[b][0]) * static_cast(heights[b][0]) / 1000000); } total_images += current_batch_size; @@ -244,12 +207,12 @@ int main(int argc, char **argv) { for (int b = 0; b < current_batch_size; b++) { std::string image_save_path = output_file_path; //if ROI is present, need to pass roi_width and roi_height - uint32_t width = is_roi_valid ? roi_width : valid_widths[b][0]; - uint32_t height = is_roi_valid ? roi_height : valid_heights[b][0]; + uint32_t width = is_roi_valid ? roi_width : widths[b][0]; + uint32_t height = is_roi_valid ? roi_height : heights[b][0]; if (is_dir) { - rocjpeg_utils.GetOutputFileExt(decode_params.output_format, valid_base_file_names[b], width, height, valid_subsamplings[b], image_save_path); + rocjpeg_utils.GetOutputFileExt(decode_params.output_format, base_file_names[b], width, height, subsamplings[b], image_save_path); } - rocjpeg_utils.SaveImage(image_save_path, &valid_output_images[b], width, height, valid_subsamplings[b], decode_params.output_format); + rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height, subsamplings[b], decode_params.output_format); } } @@ -258,7 +221,7 @@ int main(int argc, char **argv) { mpixels_all += image_size_in_mpixels; } - bad_image_indices.clear(); + current_batch_size = 0; } if (is_dir) { diff --git a/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp b/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp deleted file mode 100644 index d0a4a2f442..0000000000 --- a/projects/rocjpeg/samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp +++ /dev/null @@ -1,270 +0,0 @@ -/* -Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "../rocjpeg_samples_utils.h" - -void ThreadFunction(std::vector& jpegFiles, RocJpegHandle rocjpeg_handle, RocJpegStreamHandle rocjpeg_stream, RocJpegUtils rocjpeg_util, RocJpegImage *output_image, std::mutex &mutex, - RocJpegDecodeParams &decode_params, bool save_images, std::string &output_file_path, uint64_t *num_decoded_images, double *image_size_in_mpixels, uint64_t *num_bad_jpegs, uint64_t *num_jpegs_with_411_subsampling, - uint64_t *num_jpegs_with_unknown_subsampling, uint64_t *num_jpegs_with_unsupported_resolution) { - - bool is_roi_valid = false; - uint32_t roi_width; - uint32_t roi_height; - roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left; - roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top; - - std::vector file_data; - uint8_t num_components; - uint32_t widths[ROCJPEG_MAX_COMPONENT] = {}; - uint32_t heights[ROCJPEG_MAX_COMPONENT] = {}; - uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {}; - uint32_t prior_channel_sizes[ROCJPEG_MAX_COMPONENT] = {}; - RocJpegChromaSubsampling subsampling; - std::string chroma_sub_sampling = ""; - uint32_t num_channels = 0; - - while (true) { - // Get the next JPEG file to process - std::string file_path; - { - std::lock_guard lock(mutex); - if (!jpegFiles.empty()) { - file_path = jpegFiles.front(); - jpegFiles.erase(jpegFiles.begin()); - } - } - if (file_path.empty()) { - // No more files to process - break; - } - - std::string base_file_name = file_path.substr(file_path.find_last_of("/\\") + 1); - // Read an image from disk. - std::ifstream input(file_path.c_str(), std::ios::in | std::ios::binary | std::ios::ate); - if (!(input.is_open())) { - std::cerr << "ERROR: Cannot open image: " << file_path << std::endl; - return; - } - // Get the size - std::streamsize file_size = input.tellg(); - input.seekg(0, std::ios::beg); - // resize if buffer is too small - if (file_data.size() < file_size) { - file_data.resize(file_size); - } - if (!input.read(file_data.data(), file_size)) { - std::cerr << "ERROR: Cannot read from file: " << file_path << std::endl; - return; - } - - RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast(file_data.data()), file_size, rocjpeg_stream); - if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { - std::cerr << "Skipping decoding input file: " << file_path << std::endl; - *num_bad_jpegs += 1; - continue; - } - - CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream, &num_components, &subsampling, widths, heights)); - if (roi_width > 0 && roi_height > 0 && roi_width <= widths[0] && roi_height <= heights[0]) { - is_roi_valid = true; - } - - if (widths[0] < 64 || heights[0] < 64) { - *num_jpegs_with_unsupported_resolution += 1; - continue; - } - - if (subsampling == ROCJPEG_CSS_411 || subsampling == ROCJPEG_CSS_UNKNOWN) { - if (subsampling == ROCJPEG_CSS_411) { - *num_jpegs_with_411_subsampling += 1; - } - if (subsampling == ROCJPEG_CSS_UNKNOWN) { - *num_jpegs_with_unknown_subsampling += 1; - } - - continue; - } - - if (rocjpeg_util.GetChannelPitchAndSizes(decode_params, subsampling, widths, heights, num_channels, *output_image, channel_sizes)) { - std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl; - return; - } - - // allocate memory for each channel - for (int i = 0; i < num_channels; i++) { - if (prior_channel_sizes[i] != channel_sizes[i]) { - if (output_image->channel[i] != nullptr) { - CHECK_HIP(hipFree((void*)output_image->channel[i])); - output_image->channel[i] = nullptr; - } - CHECK_HIP(hipMalloc(&output_image->channel[i], channel_sizes[i])); - } - } - - CHECK_ROCJPEG(rocJpegDecode(rocjpeg_handle, rocjpeg_stream, &decode_params, output_image)); - *image_size_in_mpixels += (static_cast(widths[0]) * static_cast(heights[0]) / 1000000); - *num_decoded_images += 1; - - if (save_images) { - std::string image_save_path = output_file_path; - //if ROI is present, need to pass roi_width and roi_height - uint32_t width = is_roi_valid ? roi_width : widths[0]; - uint32_t height = is_roi_valid ? roi_height : heights[0]; - rocjpeg_util.GetOutputFileExt(decode_params.output_format, base_file_name, width, height, subsampling, image_save_path); - rocjpeg_util.SaveImage(image_save_path, output_image, width, height, subsampling, decode_params.output_format); - } - - for (int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) { - prior_channel_sizes[i] = channel_sizes[i]; - } - - } -} - -int main(int argc, char **argv) { - int device_id = 0; - bool save_images = false; - int num_threads = 2; - int total_images_all = 0; - double image_per_sec_all = 0; - std::string input_path, output_file_path; - std::vector file_paths = {}; - bool is_dir = false; - bool is_file = false; - RocJpegChromaSubsampling subsampling; - RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE; - RocJpegDecodeParams decode_params = {}; - std::vector rocjpeg_handles; - std::vector rocjpeg_streams; - std::mutex mutex; - std::vector num_decoded_images_per_thread; - std::vector image_size_in_mpixels_per_thread; - std::vector rocjpeg_images; - RocJpegUtils rocjpeg_utils; - std::vector threads; - std::vector num_bad_jpegs; - std::vector num_jpegs_with_411_subsampling; - std::vector num_jpegs_with_unknown_subsampling; - std::vector num_jpegs_with_unsupported_resolution; - - RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, &num_threads, nullptr, argc, argv); - if (!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file)) { - std::cerr << "ERROR: Failed to get input file paths!" << std::endl; - return EXIT_FAILURE; - } - if (!RocJpegUtils::InitHipDevice(device_id)) { - std::cerr << "ERROR: Failed to initialize HIP!" << std::endl; - return EXIT_FAILURE; - } - - if (num_threads > file_paths.size()) { - num_threads = file_paths.size(); - } - - std::cout << "Creating decoder objects, please wait!" << std::endl; - for (int i = 0; i < num_threads; i++) { - RocJpegStreamHandle rocjpeg_stream; - RocJpegHandle rocjpeg_handle; - CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &rocjpeg_handle)); - rocjpeg_handles.push_back(std::move(rocjpeg_handle)); - CHECK_ROCJPEG(rocJpegStreamCreate(&rocjpeg_stream)); - rocjpeg_streams.push_back(std::move(rocjpeg_stream)); - } - num_decoded_images_per_thread.resize(num_threads, 0); - image_size_in_mpixels_per_thread.resize(num_threads, 0); - rocjpeg_images.resize(num_threads, {0}); - num_bad_jpegs.resize(num_threads, 0); - num_jpegs_with_411_subsampling.resize(num_threads, 0); - num_jpegs_with_unknown_subsampling.resize(num_threads, 0); - num_jpegs_with_unsupported_resolution.resize(num_threads, 0); - - std::cout << "Decoding started with " << num_threads << " threads, please wait!" << std::endl; - auto start_time = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < num_threads; ++i) { - threads.emplace_back(ThreadFunction, std::ref(file_paths), rocjpeg_handles[i], rocjpeg_streams[i], rocjpeg_utils, &rocjpeg_images[i], std::ref(mutex), std::ref(decode_params), save_images, std::ref(output_file_path), - &num_decoded_images_per_thread[i], &image_size_in_mpixels_per_thread[i], &num_bad_jpegs[i], &num_jpegs_with_411_subsampling[i], &num_jpegs_with_unknown_subsampling[i], &num_jpegs_with_unsupported_resolution[i]); - } - for (auto& thread : threads) { - thread.join(); - } - auto end_time = std::chrono::high_resolution_clock::now(); - auto total_time_in_milli_sec = std::chrono::duration(end_time - start_time).count(); - - uint64_t total_decoded_images = 0; - double total_image_size_in_mpixels = 0; - uint64_t total_num_bad_jpegs = 0; - uint64_t total_num_jpegs_with_411_subsampling = 0; - uint64_t total_num_jpegs_with_unknown_subsampling = 0; - uint64_t total_num_jpegs_with_unsupported_resolution = 0; - - for (auto i = 0 ; i < num_threads; i++) { - total_decoded_images += num_decoded_images_per_thread[i]; - total_image_size_in_mpixels += image_size_in_mpixels_per_thread[i]; - total_num_bad_jpegs += num_bad_jpegs[i]; - total_num_jpegs_with_411_subsampling += num_jpegs_with_411_subsampling[i]; - total_num_jpegs_with_unknown_subsampling += num_jpegs_with_unknown_subsampling[i]; - total_num_jpegs_with_unsupported_resolution += num_jpegs_with_unsupported_resolution[i]; - for (int j = 0; j < ROCJPEG_MAX_COMPONENT; j++) { - if (rocjpeg_images[i].channel[j] != nullptr) { - CHECK_HIP(hipFree((void *)rocjpeg_images[i].channel[j])); - rocjpeg_images[i].channel[j] = nullptr; - } - } - } - - double average_decoding_time_in_milli_sec = total_time_in_milli_sec / total_decoded_images; - double avg_images_per_sec = 1000 / average_decoding_time_in_milli_sec; - double avg_image_size_in_mpixels_per_sec = total_image_size_in_mpixels * avg_images_per_sec / total_decoded_images; - std::cout << "Total elapsed time (ms): " << total_time_in_milli_sec << std::endl; - std::cout << "Total decoded images: " << total_decoded_images << std::endl; - if (total_num_bad_jpegs || total_num_jpegs_with_411_subsampling || total_num_jpegs_with_unknown_subsampling || total_num_jpegs_with_unsupported_resolution) { - std::cout << "Total skipped images: " << total_num_bad_jpegs + total_num_jpegs_with_411_subsampling + total_num_jpegs_with_unknown_subsampling + total_num_jpegs_with_unsupported_resolution; - if (total_num_bad_jpegs) { - std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs; - } - if (total_num_jpegs_with_411_subsampling) { - std::cout << " ,total images with YUV 4:1:1 chroam subsampling: " << total_num_jpegs_with_411_subsampling; - } - if (total_num_jpegs_with_unknown_subsampling) { - std::cout << " ,total images with unknwon chroam subsampling: " << total_num_jpegs_with_unknown_subsampling; - } - if (total_num_jpegs_with_unsupported_resolution) { - std::cout << " ,total images with unsupported_resolution: " << total_num_jpegs_with_unsupported_resolution; - } - std::cout << std::endl; - } - - if (total_decoded_images > 0) { - std::cout << "Average processing time per image (ms): " << average_decoding_time_in_milli_sec << std::endl; - std::cout << "Average decoded images per sec (Images/Sec): " << avg_images_per_sec << std::endl; - std::cout << "Average decoded images size (Mpixels/Sec): " << avg_image_size_in_mpixels_per_sec << std::endl; - } - - for (auto& handle : rocjpeg_handles) { - CHECK_ROCJPEG(rocJpegDestroy(handle)); - } - for (auto& rocjpecg_stream : rocjpeg_streams) { - CHECK_ROCJPEG(rocJpegStreamDestroy(rocjpecg_stream)); - } - std::cout << "Decoding completed!" << std::endl; - return EXIT_SUCCESS; -} \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecodeMultiThreads/CMakeLists.txt b/projects/rocjpeg/samples/jpegDecodePerf/CMakeLists.txt similarity index 96% rename from projects/rocjpeg/samples/jpegDecodeMultiThreads/CMakeLists.txt rename to projects/rocjpeg/samples/jpegDecodePerf/CMakeLists.txt index 512459538a..04e2d860fd 100644 --- a/projects/rocjpeg/samples/jpegDecodeMultiThreads/CMakeLists.txt +++ b/projects/rocjpeg/samples/jpegDecodePerf/CMakeLists.txt @@ -22,7 +22,7 @@ ################################################################################ cmake_minimum_required(VERSION 3.10) -project(jpegdecodemultithreads) +project(jpegdecodeperf) set(CMAKE_CXX_STANDARD 17) # ROCM Path @@ -65,7 +65,7 @@ if(HIP_FOUND AND ROCJPEG_FOUND AND Threads_FOUND) # rocJPEG include_directories (${ROCJPEG_INCLUDE_DIR}) set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY}) - list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodemultithreads.cpp) + list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodeperf.cpp) add_executable(${PROJECT_NAME} ${SOURCES}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17") target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST}) diff --git a/projects/rocjpeg/samples/jpegDecodeMultiThreads/README.md b/projects/rocjpeg/samples/jpegDecodePerf/README.md similarity index 65% rename from projects/rocjpeg/samples/jpegDecodeMultiThreads/README.md rename to projects/rocjpeg/samples/jpegDecodePerf/README.md index 76a8544cab..4d19731670 100644 --- a/projects/rocjpeg/samples/jpegDecodeMultiThreads/README.md +++ b/projects/rocjpeg/samples/jpegDecodePerf/README.md @@ -1,6 +1,6 @@ # JPEG decode multi-threads sample -The jpeg decode multi-threads sample illustrates decoding JPEG images using rocJPEG library with multiple threads to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file. +The jpeg decode perf sample illustrates decoding JPEG images by batches of specified size with multiple threads using rocJPEG library to achieve optimal performance. The individual decoded images can be retrieved in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file. ## Prerequisites: @@ -9,7 +9,7 @@ The jpeg decode multi-threads sample illustrates decoding JPEG images using rocJ ## Build ```shell -mkdir jpeg_decode_threads_sample && cd jpeg_decode_threads_sample +mkdir jpeg_decode_perf_sample && cd jpeg_decode_perf_sample cmake ../ make -j ``` @@ -17,12 +17,13 @@ make -j ## Run ```shell -./jpegdecodemultithreads -i <[input path] - input path to a single JPEG image or a directory containing JPEG images - [required]> +./jpegdecodeperf -i <[input path] - input path to a single JPEG image or a directory containing JPEG images - [required]> -be <[backend] - select rocJPEG backend (0 for hardware-accelerated JPEG decoding using VCN, 1 for hybrid JPEG decoding using CPU and GPU HIP kernels (currently not supported)) [optional - default: 0]> -fmt <[output format] - select rocJPEG output format for decoding, one of the [native, yuv_planar, y, rgb, rgb_planar] [optional - default: native]> -o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]> -d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]> -crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]> - -t <[threads] - number of threads for parallel JPEG decoding [optional - default: 2]> + -t <[threads] - number of threads for parallel JPEG decoding [optional - default: 1]> + -b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 1]> ``` \ No newline at end of file diff --git a/projects/rocjpeg/samples/jpegDecodePerf/jpegdecodeperf.cpp b/projects/rocjpeg/samples/jpegDecodePerf/jpegdecodeperf.cpp new file mode 100644 index 0000000000..b5fa60a515 --- /dev/null +++ b/projects/rocjpeg/samples/jpegDecodePerf/jpegdecodeperf.cpp @@ -0,0 +1,311 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "../rocjpeg_samples_utils.h" + +struct DecodeInfo { + std::vector file_paths; + RocJpegHandle rocjpeg_handle; + std::vector rocjpeg_stream_handles; + uint64_t num_decoded_images; + double images_per_sec; + double image_size_in_mpixels_per_sec; + uint64_t num_bad_jpegs; + uint64_t num_jpegs_with_411_subsampling; + uint64_t num_jpegs_with_unknown_subsampling; + uint64_t num_jpegs_with_unsupported_resolution; +}; + +/** + * @brief Decodes a batch of JPEG images and optionally saves the decoded images. + * + * @param decode_info parameters info for decoding a batch of jpeg images. + * @param rocjpeg_utils Utility functions for RocJpeg operations. + * @param decode_params Parameters for decoding the JPEG images (output_format, crop_rectangle) + * @param save_images A boolean flag indicating whether to save the decoded images. + * @param output_file_path The file path where the decoded images will be saved. + * @param batch_size The number of images to be processed in each batch. + */ +void DecodeImages(DecodeInfo &decode_info, RocJpegUtils rocjpeg_utils, RocJpegDecodeParams &decode_params, bool save_images, std::string &output_file_path, int batch_size) { + + bool is_roi_valid = false; + uint32_t roi_width; + uint32_t roi_height; + roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left; + roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top; + uint8_t num_components; + uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {}; + std::string chroma_sub_sampling = ""; + uint32_t num_channels = 0; + double image_size_in_mpixels_all = 0; + double total_decode_time_in_milli_sec = 0; + int current_batch_size = 0; + std::vector> batch_images(batch_size); + std::vector> widths(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector> heights(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector> prior_channel_sizes(batch_size, std::vector(ROCJPEG_MAX_COMPONENT, 0)); + std::vector subsamplings(batch_size); + std::vector output_images(batch_size); + std::vector base_file_names(batch_size); + std::vector rocjpeg_stream_handles(batch_size); + std::vector temp_widths(ROCJPEG_MAX_COMPONENT, 0); + std::vector temp_heights(ROCJPEG_MAX_COMPONENT, 0); + RocJpegChromaSubsampling temp_subsampling; + std::string temp_base_file_name; + + for (int i = 0; i < decode_info.file_paths.size(); i += batch_size) { + int batch_end = std::min(i + batch_size, static_cast(decode_info.file_paths.size())); + for (int j = i; j < batch_end; j++) { + int index = j - i; + + temp_base_file_name = decode_info.file_paths[j].substr(decode_info.file_paths[j].find_last_of("/\\") + 1); + // Read an image from disk. + std::ifstream input(decode_info.file_paths[j].c_str(), std::ios::in | std::ios::binary | std::ios::ate); + if (!(input.is_open())) { + std::cerr << "ERROR: Cannot open image: " << decode_info.file_paths[j] << std::endl; + return; + } + // Get the size + std::streamsize file_size = input.tellg(); + input.seekg(0, std::ios::beg); + // resize if buffer is too small + if (batch_images[index].size() < file_size) { + batch_images[index].resize(file_size); + } + if (!input.read(batch_images[index].data(), file_size)) { + std::cerr << "ERROR: Cannot read from file: " << decode_info.file_paths[j] << std::endl; + return; + } + + RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast(batch_images[index].data()), file_size, decode_info.rocjpeg_stream_handles[index]); + if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) { + decode_info.num_bad_jpegs++; + std::cerr << "Skipping decoding input file: " << decode_info.file_paths[j] << std::endl; + continue; + } + + CHECK_ROCJPEG(rocJpegGetImageInfo(decode_info.rocjpeg_handle, decode_info.rocjpeg_stream_handles[index], &num_components, &temp_subsampling, temp_widths.data(), temp_heights.data())); + if (roi_width > 0 && roi_height > 0 && roi_width <= temp_widths[0] && roi_height <= temp_heights[0]) { + is_roi_valid = true; + } + + rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling); + if (temp_widths[0] < 64 || temp_heights[0] < 64) { + decode_info.num_jpegs_with_unsupported_resolution++; + continue; + } + + if (temp_subsampling == ROCJPEG_CSS_411 || temp_subsampling == ROCJPEG_CSS_UNKNOWN) { + if (temp_subsampling == ROCJPEG_CSS_411) { + decode_info.num_jpegs_with_411_subsampling++; + } + if (temp_subsampling == ROCJPEG_CSS_UNKNOWN) { + decode_info.num_jpegs_with_unknown_subsampling++; + } + continue; + } + + if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, temp_subsampling, temp_widths.data(), temp_heights.data(), num_channels, output_images[current_batch_size], channel_sizes)) { + std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl; + return; + } + + // allocate memory for each channel and reuse them if the sizes remain unchanged for a new image. + for (int n = 0; n < num_channels; n++) { + if (prior_channel_sizes[current_batch_size][n] != channel_sizes[n]) { + if (output_images[current_batch_size].channel[n] != nullptr) { + CHECK_HIP(hipFree((void *)output_images[current_batch_size].channel[n])); + output_images[current_batch_size].channel[n] = nullptr; + } + CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n], channel_sizes[n])); + prior_channel_sizes[current_batch_size][n] = channel_sizes[n]; + } + } + + rocjpeg_stream_handles[current_batch_size] = decode_info.rocjpeg_stream_handles[index]; + subsamplings[current_batch_size] = temp_subsampling; + widths[current_batch_size] = temp_widths; + heights[current_batch_size] = temp_heights; + base_file_names[current_batch_size] = temp_base_file_name; + current_batch_size++; + } + + double time_per_batch_in_milli_sec = 0; + if (current_batch_size > 0) { + auto start_time = std::chrono::high_resolution_clock::now(); + CHECK_ROCJPEG(rocJpegDecodeBatched(decode_info.rocjpeg_handle, rocjpeg_stream_handles.data(), current_batch_size, &decode_params, output_images.data())); + auto end_time = std::chrono::high_resolution_clock::now(); + time_per_batch_in_milli_sec = std::chrono::duration(end_time - start_time).count(); + } + + double image_size_in_mpixels = 0; + for (int b = 0; b < current_batch_size; b++) { + image_size_in_mpixels += (static_cast(widths[b][0]) * static_cast(heights[b][0]) / 1000000); + } + + decode_info.num_decoded_images += current_batch_size; + + if (save_images) { + for (int b = 0; b < current_batch_size; b++) { + std::string image_save_path = output_file_path; + //if ROI is present, need to pass roi_width and roi_height + uint32_t width = is_roi_valid ? roi_width : widths[b][0]; + uint32_t height = is_roi_valid ? roi_height : heights[b][0]; + rocjpeg_utils.GetOutputFileExt(decode_params.output_format, base_file_names[b], width, height, subsamplings[b], image_save_path); + rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height, subsamplings[b], decode_params.output_format); + } + } + + total_decode_time_in_milli_sec += time_per_batch_in_milli_sec; + image_size_in_mpixels_all += image_size_in_mpixels; + + current_batch_size = 0; + } + + double avg_time_per_image = decode_info.num_decoded_images > 0 ? total_decode_time_in_milli_sec / decode_info.num_decoded_images : 0; + decode_info.images_per_sec = avg_time_per_image > 0 ? 1000 / avg_time_per_image : 0; + decode_info.image_size_in_mpixels_per_sec = decode_info.num_decoded_images > 0 ? decode_info.images_per_sec * image_size_in_mpixels_all / decode_info.num_decoded_images : 0; + + for (auto& it : output_images) { + for (int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) { + if (it.channel[i] != nullptr) { + CHECK_HIP(hipFree((void *)it.channel[i])); + it.channel[i] = nullptr; + } + } + } +} + +int main(int argc, char **argv) { + int device_id = 0; + bool save_images = false; + int num_threads = 1; + int batch_size = 1; + bool is_dir = false; + bool is_file = false; + RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE; + RocJpegDecodeParams decode_params = {}; + RocJpegUtils rocjpeg_utils; + std::string input_path, output_file_path; + std::vector file_paths = {}; + std::vector decode_info_per_thread; + + RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, &num_threads, &batch_size, argc, argv); + if (!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file)) { + std::cerr << "ERROR: Failed to get input file paths!" << std::endl; + return EXIT_FAILURE; + } + if (!RocJpegUtils::InitHipDevice(device_id)) { + std::cerr << "ERROR: Failed to initialize HIP!" << std::endl; + return EXIT_FAILURE; + } + + if (num_threads > file_paths.size()) { + num_threads = file_paths.size(); + } + + decode_info_per_thread.resize(num_threads); + + for (int i = 0; i < num_threads; i++) { + CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &decode_info_per_thread[i].rocjpeg_handle)); + decode_info_per_thread[i].rocjpeg_stream_handles.resize(batch_size); + for (auto j = 0; j < batch_size; j++) { + CHECK_ROCJPEG(rocJpegStreamCreate(&decode_info_per_thread[i].rocjpeg_stream_handles[j])); + } + decode_info_per_thread[i].num_decoded_images = 0; + decode_info_per_thread[i].images_per_sec = 0; + decode_info_per_thread[i].image_size_in_mpixels_per_sec = 0; + decode_info_per_thread[i].num_bad_jpegs = 0; + decode_info_per_thread[i].num_jpegs_with_411_subsampling = 0; + decode_info_per_thread[i].num_jpegs_with_unknown_subsampling = 0; + decode_info_per_thread[i].num_jpegs_with_unsupported_resolution = 0; + } + + ThreadPool thread_pool(num_threads); + + size_t files_per_thread = file_paths.size() / num_threads; + size_t remaining_files = file_paths.size() % num_threads; + size_t start_index = 0; + for (int i = 0; i < num_threads; i++) { + size_t end_index = start_index + files_per_thread + (i < remaining_files ? 1 : 0); + decode_info_per_thread[i].file_paths.assign(file_paths.begin() + start_index, file_paths.begin() + end_index); + start_index = end_index; + } + + std::cout << "Decoding started with " << num_threads << " threads, please wait!" << std::endl; + for (int i = 0; i < num_threads; ++i) { + thread_pool.ExecuteJob(std::bind(DecodeImages, std::ref(decode_info_per_thread[i]), rocjpeg_utils, std::ref(decode_params), save_images, std::ref(output_file_path), batch_size)); + } + thread_pool.JoinThreads(); + + uint64_t total_decoded_images = 0; + double total_images_per_sec = 0; + double total_image_size_in_mpixels_per_sec = 0; + uint64_t total_num_bad_jpegs = 0; + uint64_t total_num_jpegs_with_411_subsampling = 0; + uint64_t total_num_jpegs_with_unknown_subsampling = 0; + uint64_t total_num_jpegs_with_unsupported_resolution = 0; + + for (auto i = 0; i < num_threads; i++) { + total_decoded_images += decode_info_per_thread[i].num_decoded_images; + total_image_size_in_mpixels_per_sec += decode_info_per_thread[i].image_size_in_mpixels_per_sec; + total_images_per_sec += decode_info_per_thread[i].images_per_sec; + total_num_bad_jpegs += decode_info_per_thread[i].num_bad_jpegs; + total_num_jpegs_with_411_subsampling += decode_info_per_thread[i].num_jpegs_with_411_subsampling; + total_num_jpegs_with_unknown_subsampling += decode_info_per_thread[i].num_jpegs_with_unknown_subsampling; + total_num_jpegs_with_unsupported_resolution += decode_info_per_thread[i].num_jpegs_with_unsupported_resolution; + } + + std::cout << "Total decoded images: " << total_decoded_images << std::endl; + if (total_num_bad_jpegs || total_num_jpegs_with_411_subsampling || total_num_jpegs_with_unknown_subsampling || total_num_jpegs_with_unsupported_resolution) { + std::cout << "Total skipped images: " << total_num_bad_jpegs + total_num_jpegs_with_411_subsampling + total_num_jpegs_with_unknown_subsampling + total_num_jpegs_with_unsupported_resolution; + if (total_num_bad_jpegs) { + std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs; + } + if (total_num_jpegs_with_411_subsampling) { + std::cout << " ,total images with YUV 4:1:1 chroam subsampling: " << total_num_jpegs_with_411_subsampling; + } + if (total_num_jpegs_with_unknown_subsampling) { + std::cout << " ,total images with unknwon chroam subsampling: " << total_num_jpegs_with_unknown_subsampling; + } + if (total_num_jpegs_with_unsupported_resolution) { + std::cout << " ,total images with unsupported_resolution: " << total_num_jpegs_with_unsupported_resolution; + } + std::cout << std::endl; + } + + if (total_decoded_images > 0) { + std::cout << "Average processing time per image (ms): " << 1000 / total_images_per_sec << std::endl; + std::cout << "Average decoded images per sec (Images/Sec): " << total_images_per_sec << std::endl; + std::cout << "Average decoded images size (Mpixels/Sec): " << total_image_size_in_mpixels_per_sec << std::endl; + } + + for (int i = 0; i < num_threads; i++) { + CHECK_ROCJPEG(rocJpegDestroy(decode_info_per_thread[i].rocjpeg_handle)); + for (auto j = 0; j < batch_size; j++) { + CHECK_ROCJPEG(rocJpegStreamDestroy(decode_info_per_thread[i].rocjpeg_stream_handles[j])); + } + } + + std::cout << "Decoding completed!" << std::endl; + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/projects/rocjpeg/samples/rocjpeg_samples_utils.h b/projects/rocjpeg/samples/rocjpeg_samples_utils.h index 83d6d1ba3f..a3a8f7c3c3 100644 --- a/projects/rocjpeg/samples/rocjpeg_samples_utils.h +++ b/projects/rocjpeg/samples/rocjpeg_samples_utils.h @@ -31,6 +31,9 @@ THE SOFTWARE. #include #include #include +#include +#include +#include #if __cplusplus >= 201703L && __has_include() #include namespace fs = std::filesystem; @@ -145,8 +148,12 @@ public: if (++i == argc) { ShowHelpAndExit("-t", num_threads != nullptr, batch_size != nullptr); } - if (num_threads != nullptr) + if (num_threads != nullptr) { *num_threads = atoi(argv[i]); + if (*num_threads <= 0 || *num_threads > 32) { + ShowHelpAndExit(argv[i], num_threads != nullptr, batch_size != nullptr); + } + } continue; } if (!strcmp(argv[i], "-b")) { @@ -204,6 +211,7 @@ public: * @return True if successful, false otherwise. */ static bool GetFilePaths(std::string &input_path, std::vector &file_paths, bool &is_dir, bool &is_file) { + std::cout << "Reading images from disk, please wait!" << std::endl; if (!fs::exists(input_path)) { std::cerr << "ERROR: the input path does not exist!" << std::endl; return false; @@ -639,10 +647,10 @@ private: "-crop [crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]\n" "-d [device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]\n"; if (show_threads) { - std::cout << "-t [threads] - number of threads for parallel JPEG decoding - [optional - default: 2]\n"; + std::cout << "-t [threads] - number of threads (<= 32) for parallel JPEG decoding - [optional - default: 1]\n"; } if (show_batch_size) { - std::cout << "-b [batch_size] - decode images from input by batches of a specified size - [optional - default: 2]\n"; + std::cout << "-b [batch_size] - decode images from input by batches of a specified size - [optional - default: 1]\n"; } exit(0); } @@ -659,4 +667,64 @@ private: return (value + alignment - 1) & ~(alignment - 1); } }; -#endif //ROC_JPEG_SAMPLES_COMMON + +class ThreadPool { + public: + ThreadPool(int nthreads) : shutdown_(false) { + // Create the specified number of threads + threads_.reserve(nthreads); + for (int i = 0; i < nthreads; ++i) + threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i)); + } + + ~ThreadPool() {} + + void JoinThreads() { + { + // Unblock any threads and tell them to stop + std::unique_lock lock(mutex_); + shutdown_ = true; + cond_var_.notify_all(); + } + + // Wait for all threads to stop + for (auto& thread : threads_) + thread.join(); + } + + void ExecuteJob(std::function func) { + // Place a job on the queue and unblock a thread + std::unique_lock lock(mutex_); + decode_jobs_queue_.emplace(std::move(func)); + cond_var_.notify_one(); + } + + protected: + void ThreadEntry(int i) { + std::function execute_decode_job; + + while (true) { + { + std::unique_lock lock(mutex_); + cond_var_.wait(lock, [&] {return shutdown_ || !decode_jobs_queue_.empty();}); + if (decode_jobs_queue_.empty()) { + // No jobs to do; shutting down + return; + } + + execute_decode_job = std::move(decode_jobs_queue_.front()); + decode_jobs_queue_.pop(); + } + + // Execute the decode job without holding any locks + execute_decode_job(); + } + } + + std::mutex mutex_; + std::condition_variable cond_var_; + bool shutdown_; + std::queue> decode_jobs_queue_; + std::vector threads_; +}; +#endif //ROC_JPEG_SAMPLES_COMMON \ No newline at end of file diff --git a/projects/rocjpeg/test/CMakeLists.txt b/projects/rocjpeg/test/CMakeLists.txt index 3290bf3b8e..2a07a10573 100644 --- a/projects/rocjpeg/test/CMakeLists.txt +++ b/projects/rocjpeg/test/CMakeLists.txt @@ -115,11 +115,11 @@ add_test( jpeg-decode-threads-fmt-native COMMAND "${CMAKE_CTEST_COMMAND}" - --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecodeMultiThreads" - "${CMAKE_CURRENT_BINARY_DIR}/jpegDecodeMultiThreads" + --build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecodePerf" + "${CMAKE_CURRENT_BINARY_DIR}/jpegDecodePerf" --build-generator "${CMAKE_GENERATOR}" - --test-command "jpegdecodemultithreads" - -i ${ROCM_PATH}/share/rocjpeg/images/ -t 2 + --test-command "jpegdecodeperf" + -i ${ROCM_PATH}/share/rocjpeg/images/ ) add_test(