Update samples (#82)
* Update samples * Simplify the arguments of the DecodeImages function by grouping some of them into a struct. * Modify the logic for selecting the valid images per batch * Modify the logic for selecting the valid images per batch for jpegDecodeBatched sample too
Dieser Commit ist enthalten in:
committet von
GitHub
Ursprung
241f9d731c
Commit
a4f3daef1e
@@ -9,6 +9,7 @@ Documentation for rocJPEG is available at
|
||||
|
||||
* AMD Clang++ is now the default CXX compiler.
|
||||
* `rocJPEG-setup.py` setup script updates to common package install: Setup no longer installs public compiler package.
|
||||
* The jpegDecodeMultiThreads sample has been renamed to jpegDecodePerf, and batch decoding has been added to this sample instead of single image decoding for improved performance.
|
||||
|
||||
### Removed
|
||||
|
||||
|
||||
+1
-1
@@ -165,7 +165,7 @@ if(HIP_FOUND AND Libva_FOUND)
|
||||
install(DIRECTORY cmake DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME} COMPONENT dev)
|
||||
|
||||
install(FILES samples/jpegDecode/CMakeLists.txt samples/jpegDecode/jpegdecode.cpp samples/jpegDecode/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecode COMPONENT dev)
|
||||
install(FILES samples/jpegDecodeMultiThreads/CMakeLists.txt samples/jpegDecodeMultiThreads/jpegdecodemultithreads.cpp samples/jpegDecodeMultiThreads/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodeMultiThreads COMPONENT dev)
|
||||
install(FILES samples/jpegDecodePerf/CMakeLists.txt samples/jpegDecodePerf/jpegdecodeperf.cpp samples/jpegDecodePerf/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodePerf COMPONENT dev)
|
||||
install(FILES samples/jpegDecodeBatched/CMakeLists.txt samples/jpegDecodeBatched/jpegdecodebatched.cpp samples/jpegDecodeBatched/README.md DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples/jpegDecodeBatched COMPONENT dev)
|
||||
install(FILES samples/rocjpeg_samples_utils.h DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/samples COMPONENT dev)
|
||||
install(DIRECTORY data/images DESTINATION ${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/ COMPONENT dev)
|
||||
|
||||
@@ -84,14 +84,14 @@ add_test(
|
||||
|
||||
add_test(
|
||||
NAME
|
||||
jpeg-decode-threads-fmt-native
|
||||
jpeg-decode-perf-fmt-native
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecodeMultiThreads"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecodeMultiThreads"
|
||||
--build-and-test "${CMAKE_CURRENT_SOURCE_DIR}/jpegDecodePerf"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecodePerf"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecodemultithreads"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/ -t 2
|
||||
--test-command "jpegdecodeperf"
|
||||
-i ${CMAKE_SOURCE_DIR}/data/images/
|
||||
)
|
||||
|
||||
add_test(
|
||||
|
||||
@@ -10,6 +10,6 @@ The jpeg decode sample illustrates decoding a JPEG images using rocJPEG library
|
||||
|
||||
The jpeg decode bacthed sample illustrates decoding JPEG images by batches of specified size using rocJPEG library to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
|
||||
## [JPEG decode multi-threads](jpegDecodeMultiThreads)
|
||||
## [JPEG decode perf](jpegDecodePerf)
|
||||
|
||||
The jpeg decode multi threads sample illustrates decoding JPEG images using rocJPEG library with multiple threads to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
The jpeg decode perf sample illustrates decoding JPEG images by batches of specified size with multiple threads using rocJPEG library to achieve optimal performance. The individual decoded images can be retrieved in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
@@ -24,5 +24,5 @@ make -j
|
||||
-o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]>
|
||||
-d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) - [optional - default: 0]>
|
||||
-crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]>
|
||||
-b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 2]>
|
||||
-b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 1]>
|
||||
```
|
||||
@@ -51,18 +51,16 @@ int main(int argc, char **argv) {
|
||||
RocJpegDecodeParams decode_params = {};
|
||||
RocJpegUtils rocjpeg_utils;
|
||||
std::vector<std::string> base_file_names;
|
||||
std::vector<int> bad_image_indices;
|
||||
std::vector<RocJpegStreamHandle> valid_rocjpeg_stream_handles;
|
||||
std::vector<RocJpegChromaSubsampling> valid_subsamplings;
|
||||
std::vector<std::vector<uint32_t>> valid_widths;
|
||||
std::vector<std::vector<uint32_t>> valid_heights;
|
||||
std::vector<std::vector<uint32_t>> valid_prior_channel_sizes;
|
||||
std::vector<RocJpegImage> valid_output_images;
|
||||
std::vector<std::string> valid_base_file_names;
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_stream_handles_for_current_batch;
|
||||
std::vector<uint32_t> temp_widths(ROCJPEG_MAX_COMPONENT, 0);
|
||||
std::vector<uint32_t> temp_heights(ROCJPEG_MAX_COMPONENT, 0);
|
||||
RocJpegChromaSubsampling temp_subsampling;
|
||||
std::string temp_base_file_name;
|
||||
uint64_t num_bad_jpegs = 0;
|
||||
uint64_t num_jpegs_with_411_subsampling = 0;
|
||||
uint64_t num_jpegs_with_unknown_subsampling = 0;
|
||||
uint64_t num_jpegs_with_unsupported_resolution = 0;
|
||||
int current_batch_size = 0;
|
||||
|
||||
RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, nullptr, &batch_size, argc, argv);
|
||||
|
||||
@@ -96,20 +94,14 @@ int main(int argc, char **argv) {
|
||||
heights.resize(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
subsamplings.resize(batch_size);
|
||||
base_file_names.resize(batch_size);
|
||||
valid_rocjpeg_stream_handles.resize(batch_size);
|
||||
valid_output_images.resize(batch_size);
|
||||
valid_prior_channel_sizes.resize(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
valid_widths.resize(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
valid_heights.resize(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
valid_subsamplings.resize(batch_size);
|
||||
valid_base_file_names.resize(batch_size);
|
||||
rocjpeg_stream_handles_for_current_batch.resize(batch_size);
|
||||
|
||||
std::cout << "Decoding started, please wait! ... " << std::endl;
|
||||
for (int i = 0; i < file_paths.size(); i += batch_size) {
|
||||
int batch_end = std::min(i + batch_size, static_cast<int>(file_paths.size()));
|
||||
for (int j = i; j < batch_end; j++) {
|
||||
int index = j - i;
|
||||
base_file_names[index] = file_paths[j].substr(file_paths[j].find_last_of("/\\") + 1);
|
||||
temp_base_file_name = file_paths[j].substr(file_paths[j].find_last_of("/\\") + 1);
|
||||
// Read an image from disk.
|
||||
std::ifstream input(file_paths[j].c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||
if (!(input.is_open())) {
|
||||
@@ -131,7 +123,6 @@ int main(int argc, char **argv) {
|
||||
RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast<uint8_t*>(batch_images[index].data()), file_size, rocjpeg_stream_handles[index]);
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
if (is_dir) {
|
||||
bad_image_indices.push_back(index);
|
||||
num_bad_jpegs++;
|
||||
std::cerr << "Skipping decoding input file: " << file_paths[j] << std::endl;
|
||||
continue;
|
||||
@@ -141,16 +132,15 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream_handles[index], &num_components, &subsamplings[index], widths[index].data(), heights[index].data()));
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream_handles[index], &num_components, &temp_subsampling, temp_widths.data(), temp_heights.data()));
|
||||
|
||||
if (roi_width > 0 && roi_height > 0 && roi_width <= widths[index][0] && roi_height <= heights[index][0]) {
|
||||
is_roi_valid = true;
|
||||
}
|
||||
|
||||
rocjpeg_utils.GetChromaSubsamplingStr(subsamplings[index], chroma_sub_sampling);
|
||||
if (widths[index][0] < 64 || heights[index][0] < 64) {
|
||||
rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling);
|
||||
if (temp_widths[0] < 64 || temp_heights[0] < 64) {
|
||||
if (is_dir) {
|
||||
bad_image_indices.push_back(index);
|
||||
num_jpegs_with_unsupported_resolution++;
|
||||
continue;
|
||||
} else {
|
||||
@@ -159,83 +149,56 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
if (subsamplings[index] == ROCJPEG_CSS_411 || subsamplings[index] == ROCJPEG_CSS_UNKNOWN) {
|
||||
if (temp_subsampling == ROCJPEG_CSS_411 || temp_subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
if (is_dir) {
|
||||
bad_image_indices.push_back(index);
|
||||
if (subsamplings[index] == ROCJPEG_CSS_411) {
|
||||
num_jpegs_with_411_subsampling++;
|
||||
}
|
||||
if (subsamplings[index] == ROCJPEG_CSS_UNKNOWN) {
|
||||
num_jpegs_with_unknown_subsampling++;
|
||||
}
|
||||
continue;
|
||||
if (temp_subsampling == ROCJPEG_CSS_411) {
|
||||
num_jpegs_with_411_subsampling++;
|
||||
}
|
||||
if (temp_subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
num_jpegs_with_unknown_subsampling++;
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
std::cerr << "The chroma sub-sampling is not supported by VCN Hardware" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, subsamplings[index], widths[index].data(), heights[index].data(), num_channels, output_images[index], channel_sizes)) {
|
||||
if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, temp_subsampling, temp_widths.data(), temp_heights.data(), num_channels, output_images[current_batch_size], channel_sizes)) {
|
||||
std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// allocate memory for each channel and reuse them if the sizes remain unchanged for a new image.
|
||||
for (int n = 0; n < num_channels; n++) {
|
||||
if (prior_channel_sizes[index][n] != channel_sizes[n]) {
|
||||
if (output_images[index].channel[n] != nullptr) {
|
||||
CHECK_HIP(hipFree((void *)output_images[index].channel[n]));
|
||||
output_images[index].channel[n] = nullptr;
|
||||
if (prior_channel_sizes[current_batch_size][n] != channel_sizes[n]) {
|
||||
if (output_images[current_batch_size].channel[n] != nullptr) {
|
||||
CHECK_HIP(hipFree((void *)output_images[current_batch_size].channel[n]));
|
||||
output_images[current_batch_size].channel[n] = nullptr;
|
||||
}
|
||||
CHECK_HIP(hipMalloc(&output_images[index].channel[n], channel_sizes[n]));
|
||||
prior_channel_sizes[index][n] = channel_sizes[n];
|
||||
CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n], channel_sizes[n]));
|
||||
prior_channel_sizes[current_batch_size][n] = channel_sizes[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
int current_batch_size = batch_end - i - bad_image_indices.size();
|
||||
|
||||
// Select valid images for decoding
|
||||
if (current_batch_size > 0) {
|
||||
if (!bad_image_indices.empty()) {
|
||||
// Iterate through the batch images and select only the valid ones
|
||||
int valid_idx = 0;
|
||||
for (int idx = 0; idx < batch_size; idx++) {
|
||||
// Check if the current image index is not in the list of bad image indices
|
||||
if (std::find(bad_image_indices.begin(), bad_image_indices.end(), idx) == bad_image_indices.end()) {
|
||||
// Add the valid image index to the corresponding vectors
|
||||
valid_rocjpeg_stream_handles[valid_idx] = rocjpeg_stream_handles[idx];
|
||||
valid_subsamplings[valid_idx] = subsamplings[idx];
|
||||
valid_widths[valid_idx] = widths[idx];
|
||||
valid_heights[valid_idx] = heights[idx];
|
||||
valid_prior_channel_sizes[valid_idx] = prior_channel_sizes[idx];
|
||||
valid_output_images[valid_idx] = output_images[idx];
|
||||
valid_base_file_names[valid_idx] = base_file_names[idx];
|
||||
valid_idx++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If there are no bad images, select all the batch images
|
||||
valid_rocjpeg_stream_handles = rocjpeg_stream_handles;
|
||||
valid_subsamplings = subsamplings;
|
||||
valid_widths = widths;
|
||||
valid_heights = heights;
|
||||
valid_prior_channel_sizes = prior_channel_sizes;
|
||||
valid_output_images = output_images;
|
||||
valid_base_file_names = base_file_names;
|
||||
}
|
||||
rocjpeg_stream_handles_for_current_batch[current_batch_size] = rocjpeg_stream_handles[index];
|
||||
subsamplings[current_batch_size] = temp_subsampling;
|
||||
widths[current_batch_size] = temp_widths;
|
||||
heights[current_batch_size] = temp_heights;
|
||||
base_file_names[current_batch_size] = temp_base_file_name;
|
||||
current_batch_size++;
|
||||
}
|
||||
|
||||
double time_per_batch_in_milli_sec = 0;
|
||||
if (current_batch_size > 0) {
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
CHECK_ROCJPEG(rocJpegDecodeBatched(rocjpeg_handle, valid_rocjpeg_stream_handles.data(), current_batch_size, &decode_params, valid_output_images.data()));
|
||||
CHECK_ROCJPEG(rocJpegDecodeBatched(rocjpeg_handle, rocjpeg_stream_handles_for_current_batch.data(), current_batch_size, &decode_params, output_images.data()));
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
time_per_batch_in_milli_sec = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
}
|
||||
|
||||
double image_size_in_mpixels = 0;
|
||||
for (int b = 0; b < current_batch_size; b++) {
|
||||
image_size_in_mpixels += (static_cast<double>(valid_widths[b][0]) * static_cast<double>(valid_heights[b][0]) / 1000000);
|
||||
image_size_in_mpixels += (static_cast<double>(widths[b][0]) * static_cast<double>(heights[b][0]) / 1000000);
|
||||
}
|
||||
|
||||
total_images += current_batch_size;
|
||||
@@ -244,12 +207,12 @@ int main(int argc, char **argv) {
|
||||
for (int b = 0; b < current_batch_size; b++) {
|
||||
std::string image_save_path = output_file_path;
|
||||
//if ROI is present, need to pass roi_width and roi_height
|
||||
uint32_t width = is_roi_valid ? roi_width : valid_widths[b][0];
|
||||
uint32_t height = is_roi_valid ? roi_height : valid_heights[b][0];
|
||||
uint32_t width = is_roi_valid ? roi_width : widths[b][0];
|
||||
uint32_t height = is_roi_valid ? roi_height : heights[b][0];
|
||||
if (is_dir) {
|
||||
rocjpeg_utils.GetOutputFileExt(decode_params.output_format, valid_base_file_names[b], width, height, valid_subsamplings[b], image_save_path);
|
||||
rocjpeg_utils.GetOutputFileExt(decode_params.output_format, base_file_names[b], width, height, subsamplings[b], image_save_path);
|
||||
}
|
||||
rocjpeg_utils.SaveImage(image_save_path, &valid_output_images[b], width, height, valid_subsamplings[b], decode_params.output_format);
|
||||
rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height, subsamplings[b], decode_params.output_format);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -258,7 +221,7 @@ int main(int argc, char **argv) {
|
||||
mpixels_all += image_size_in_mpixels;
|
||||
}
|
||||
|
||||
bad_image_indices.clear();
|
||||
current_batch_size = 0;
|
||||
}
|
||||
|
||||
if (is_dir) {
|
||||
|
||||
@@ -1,270 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "../rocjpeg_samples_utils.h"
|
||||
|
||||
void ThreadFunction(std::vector<std::string>& jpegFiles, RocJpegHandle rocjpeg_handle, RocJpegStreamHandle rocjpeg_stream, RocJpegUtils rocjpeg_util, RocJpegImage *output_image, std::mutex &mutex,
|
||||
RocJpegDecodeParams &decode_params, bool save_images, std::string &output_file_path, uint64_t *num_decoded_images, double *image_size_in_mpixels, uint64_t *num_bad_jpegs, uint64_t *num_jpegs_with_411_subsampling,
|
||||
uint64_t *num_jpegs_with_unknown_subsampling, uint64_t *num_jpegs_with_unsupported_resolution) {
|
||||
|
||||
bool is_roi_valid = false;
|
||||
uint32_t roi_width;
|
||||
uint32_t roi_height;
|
||||
roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
|
||||
roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
|
||||
|
||||
std::vector<char> file_data;
|
||||
uint8_t num_components;
|
||||
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t prior_channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
|
||||
RocJpegChromaSubsampling subsampling;
|
||||
std::string chroma_sub_sampling = "";
|
||||
uint32_t num_channels = 0;
|
||||
|
||||
while (true) {
|
||||
// Get the next JPEG file to process
|
||||
std::string file_path;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!jpegFiles.empty()) {
|
||||
file_path = jpegFiles.front();
|
||||
jpegFiles.erase(jpegFiles.begin());
|
||||
}
|
||||
}
|
||||
if (file_path.empty()) {
|
||||
// No more files to process
|
||||
break;
|
||||
}
|
||||
|
||||
std::string base_file_name = file_path.substr(file_path.find_last_of("/\\") + 1);
|
||||
// Read an image from disk.
|
||||
std::ifstream input(file_path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||
if (!(input.is_open())) {
|
||||
std::cerr << "ERROR: Cannot open image: " << file_path << std::endl;
|
||||
return;
|
||||
}
|
||||
// Get the size
|
||||
std::streamsize file_size = input.tellg();
|
||||
input.seekg(0, std::ios::beg);
|
||||
// resize if buffer is too small
|
||||
if (file_data.size() < file_size) {
|
||||
file_data.resize(file_size);
|
||||
}
|
||||
if (!input.read(file_data.data(), file_size)) {
|
||||
std::cerr << "ERROR: Cannot read from file: " << file_path << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast<uint8_t *>(file_data.data()), file_size, rocjpeg_stream);
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
std::cerr << "Skipping decoding input file: " << file_path << std::endl;
|
||||
*num_bad_jpegs += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(rocjpeg_handle, rocjpeg_stream, &num_components, &subsampling, widths, heights));
|
||||
if (roi_width > 0 && roi_height > 0 && roi_width <= widths[0] && roi_height <= heights[0]) {
|
||||
is_roi_valid = true;
|
||||
}
|
||||
|
||||
if (widths[0] < 64 || heights[0] < 64) {
|
||||
*num_jpegs_with_unsupported_resolution += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (subsampling == ROCJPEG_CSS_411 || subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
if (subsampling == ROCJPEG_CSS_411) {
|
||||
*num_jpegs_with_411_subsampling += 1;
|
||||
}
|
||||
if (subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
*num_jpegs_with_unknown_subsampling += 1;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rocjpeg_util.GetChannelPitchAndSizes(decode_params, subsampling, widths, heights, num_channels, *output_image, channel_sizes)) {
|
||||
std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// allocate memory for each channel
|
||||
for (int i = 0; i < num_channels; i++) {
|
||||
if (prior_channel_sizes[i] != channel_sizes[i]) {
|
||||
if (output_image->channel[i] != nullptr) {
|
||||
CHECK_HIP(hipFree((void*)output_image->channel[i]));
|
||||
output_image->channel[i] = nullptr;
|
||||
}
|
||||
CHECK_HIP(hipMalloc(&output_image->channel[i], channel_sizes[i]));
|
||||
}
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegDecode(rocjpeg_handle, rocjpeg_stream, &decode_params, output_image));
|
||||
*image_size_in_mpixels += (static_cast<double>(widths[0]) * static_cast<double>(heights[0]) / 1000000);
|
||||
*num_decoded_images += 1;
|
||||
|
||||
if (save_images) {
|
||||
std::string image_save_path = output_file_path;
|
||||
//if ROI is present, need to pass roi_width and roi_height
|
||||
uint32_t width = is_roi_valid ? roi_width : widths[0];
|
||||
uint32_t height = is_roi_valid ? roi_height : heights[0];
|
||||
rocjpeg_util.GetOutputFileExt(decode_params.output_format, base_file_name, width, height, subsampling, image_save_path);
|
||||
rocjpeg_util.SaveImage(image_save_path, output_image, width, height, subsampling, decode_params.output_format);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) {
|
||||
prior_channel_sizes[i] = channel_sizes[i];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int device_id = 0;
|
||||
bool save_images = false;
|
||||
int num_threads = 2;
|
||||
int total_images_all = 0;
|
||||
double image_per_sec_all = 0;
|
||||
std::string input_path, output_file_path;
|
||||
std::vector<std::string> file_paths = {};
|
||||
bool is_dir = false;
|
||||
bool is_file = false;
|
||||
RocJpegChromaSubsampling subsampling;
|
||||
RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE;
|
||||
RocJpegDecodeParams decode_params = {};
|
||||
std::vector<RocJpegHandle> rocjpeg_handles;
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_streams;
|
||||
std::mutex mutex;
|
||||
std::vector<uint64_t> num_decoded_images_per_thread;
|
||||
std::vector<double> image_size_in_mpixels_per_thread;
|
||||
std::vector<RocJpegImage> rocjpeg_images;
|
||||
RocJpegUtils rocjpeg_utils;
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<uint64_t> num_bad_jpegs;
|
||||
std::vector<uint64_t> num_jpegs_with_411_subsampling;
|
||||
std::vector<uint64_t> num_jpegs_with_unknown_subsampling;
|
||||
std::vector<uint64_t> num_jpegs_with_unsupported_resolution;
|
||||
|
||||
RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, &num_threads, nullptr, argc, argv);
|
||||
if (!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file)) {
|
||||
std::cerr << "ERROR: Failed to get input file paths!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!RocJpegUtils::InitHipDevice(device_id)) {
|
||||
std::cerr << "ERROR: Failed to initialize HIP!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (num_threads > file_paths.size()) {
|
||||
num_threads = file_paths.size();
|
||||
}
|
||||
|
||||
std::cout << "Creating decoder objects, please wait!" << std::endl;
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
RocJpegStreamHandle rocjpeg_stream;
|
||||
RocJpegHandle rocjpeg_handle;
|
||||
CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &rocjpeg_handle));
|
||||
rocjpeg_handles.push_back(std::move(rocjpeg_handle));
|
||||
CHECK_ROCJPEG(rocJpegStreamCreate(&rocjpeg_stream));
|
||||
rocjpeg_streams.push_back(std::move(rocjpeg_stream));
|
||||
}
|
||||
num_decoded_images_per_thread.resize(num_threads, 0);
|
||||
image_size_in_mpixels_per_thread.resize(num_threads, 0);
|
||||
rocjpeg_images.resize(num_threads, {0});
|
||||
num_bad_jpegs.resize(num_threads, 0);
|
||||
num_jpegs_with_411_subsampling.resize(num_threads, 0);
|
||||
num_jpegs_with_unknown_subsampling.resize(num_threads, 0);
|
||||
num_jpegs_with_unsupported_resolution.resize(num_threads, 0);
|
||||
|
||||
std::cout << "Decoding started with " << num_threads << " threads, please wait!" << std::endl;
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back(ThreadFunction, std::ref(file_paths), rocjpeg_handles[i], rocjpeg_streams[i], rocjpeg_utils, &rocjpeg_images[i], std::ref(mutex), std::ref(decode_params), save_images, std::ref(output_file_path),
|
||||
&num_decoded_images_per_thread[i], &image_size_in_mpixels_per_thread[i], &num_bad_jpegs[i], &num_jpegs_with_411_subsampling[i], &num_jpegs_with_unknown_subsampling[i], &num_jpegs_with_unsupported_resolution[i]);
|
||||
}
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto total_time_in_milli_sec = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
|
||||
uint64_t total_decoded_images = 0;
|
||||
double total_image_size_in_mpixels = 0;
|
||||
uint64_t total_num_bad_jpegs = 0;
|
||||
uint64_t total_num_jpegs_with_411_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unknown_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unsupported_resolution = 0;
|
||||
|
||||
for (auto i = 0 ; i < num_threads; i++) {
|
||||
total_decoded_images += num_decoded_images_per_thread[i];
|
||||
total_image_size_in_mpixels += image_size_in_mpixels_per_thread[i];
|
||||
total_num_bad_jpegs += num_bad_jpegs[i];
|
||||
total_num_jpegs_with_411_subsampling += num_jpegs_with_411_subsampling[i];
|
||||
total_num_jpegs_with_unknown_subsampling += num_jpegs_with_unknown_subsampling[i];
|
||||
total_num_jpegs_with_unsupported_resolution += num_jpegs_with_unsupported_resolution[i];
|
||||
for (int j = 0; j < ROCJPEG_MAX_COMPONENT; j++) {
|
||||
if (rocjpeg_images[i].channel[j] != nullptr) {
|
||||
CHECK_HIP(hipFree((void *)rocjpeg_images[i].channel[j]));
|
||||
rocjpeg_images[i].channel[j] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double average_decoding_time_in_milli_sec = total_time_in_milli_sec / total_decoded_images;
|
||||
double avg_images_per_sec = 1000 / average_decoding_time_in_milli_sec;
|
||||
double avg_image_size_in_mpixels_per_sec = total_image_size_in_mpixels * avg_images_per_sec / total_decoded_images;
|
||||
std::cout << "Total elapsed time (ms): " << total_time_in_milli_sec << std::endl;
|
||||
std::cout << "Total decoded images: " << total_decoded_images << std::endl;
|
||||
if (total_num_bad_jpegs || total_num_jpegs_with_411_subsampling || total_num_jpegs_with_unknown_subsampling || total_num_jpegs_with_unsupported_resolution) {
|
||||
std::cout << "Total skipped images: " << total_num_bad_jpegs + total_num_jpegs_with_411_subsampling + total_num_jpegs_with_unknown_subsampling + total_num_jpegs_with_unsupported_resolution;
|
||||
if (total_num_bad_jpegs) {
|
||||
std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs;
|
||||
}
|
||||
if (total_num_jpegs_with_411_subsampling) {
|
||||
std::cout << " ,total images with YUV 4:1:1 chroam subsampling: " << total_num_jpegs_with_411_subsampling;
|
||||
}
|
||||
if (total_num_jpegs_with_unknown_subsampling) {
|
||||
std::cout << " ,total images with unknwon chroam subsampling: " << total_num_jpegs_with_unknown_subsampling;
|
||||
}
|
||||
if (total_num_jpegs_with_unsupported_resolution) {
|
||||
std::cout << " ,total images with unsupported_resolution: " << total_num_jpegs_with_unsupported_resolution;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
if (total_decoded_images > 0) {
|
||||
std::cout << "Average processing time per image (ms): " << average_decoding_time_in_milli_sec << std::endl;
|
||||
std::cout << "Average decoded images per sec (Images/Sec): " << avg_images_per_sec << std::endl;
|
||||
std::cout << "Average decoded images size (Mpixels/Sec): " << avg_image_size_in_mpixels_per_sec << std::endl;
|
||||
}
|
||||
|
||||
for (auto& handle : rocjpeg_handles) {
|
||||
CHECK_ROCJPEG(rocJpegDestroy(handle));
|
||||
}
|
||||
for (auto& rocjpecg_stream : rocjpeg_streams) {
|
||||
CHECK_ROCJPEG(rocJpegStreamDestroy(rocjpecg_stream));
|
||||
}
|
||||
std::cout << "Decoding completed!" << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
+2
-2
@@ -22,7 +22,7 @@
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
project(jpegdecodemultithreads)
|
||||
project(jpegdecodeperf)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# ROCM Path
|
||||
@@ -65,7 +65,7 @@ if(HIP_FOUND AND ROCJPEG_FOUND AND Threads_FOUND)
|
||||
# rocJPEG
|
||||
include_directories (${ROCJPEG_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY})
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodemultithreads.cpp)
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodeperf.cpp)
|
||||
add_executable(${PROJECT_NAME} ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(${PROJECT_NAME} ${LINK_LIBRARY_LIST})
|
||||
@@ -1,6 +1,6 @@
|
||||
# JPEG decode multi-threads sample
|
||||
|
||||
The jpeg decode multi-threads sample illustrates decoding JPEG images using rocJPEG library with multiple threads to get the individual decoded images in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
The jpeg decode perf sample illustrates decoding JPEG images by batches of specified size with multiple threads using rocJPEG library to achieve optimal performance. The individual decoded images can be retrieved in one of the supported output format (i.e., native, yuv, y, rgb, rgb_planar). This sample can be configured with a device ID and optionally able to dump the output to a file.
|
||||
|
||||
## Prerequisites:
|
||||
|
||||
@@ -9,7 +9,7 @@ The jpeg decode multi-threads sample illustrates decoding JPEG images using rocJ
|
||||
## Build
|
||||
|
||||
```shell
|
||||
mkdir jpeg_decode_threads_sample && cd jpeg_decode_threads_sample
|
||||
mkdir jpeg_decode_perf_sample && cd jpeg_decode_perf_sample
|
||||
cmake ../
|
||||
make -j
|
||||
```
|
||||
@@ -17,12 +17,13 @@ make -j
|
||||
## Run
|
||||
|
||||
```shell
|
||||
./jpegdecodemultithreads -i <[input path] - input path to a single JPEG image or a directory containing JPEG images - [required]>
|
||||
./jpegdecodeperf -i <[input path] - input path to a single JPEG image or a directory containing JPEG images - [required]>
|
||||
-be <[backend] - select rocJPEG backend (0 for hardware-accelerated JPEG decoding using VCN,
|
||||
1 for hybrid JPEG decoding using CPU and GPU HIP kernels (currently not supported)) [optional - default: 0]>
|
||||
-fmt <[output format] - select rocJPEG output format for decoding, one of the [native, yuv_planar, y, rgb, rgb_planar] [optional - default: native]>
|
||||
-o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]>
|
||||
-d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]>
|
||||
-crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]>
|
||||
-t <[threads] - number of threads for parallel JPEG decoding [optional - default: 2]>
|
||||
-t <[threads] - number of threads for parallel JPEG decoding [optional - default: 1]>
|
||||
-b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 1]>
|
||||
```
|
||||
@@ -0,0 +1,311 @@
|
||||
/*
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "../rocjpeg_samples_utils.h"
|
||||
|
||||
struct DecodeInfo {
|
||||
std::vector<std::string> file_paths;
|
||||
RocJpegHandle rocjpeg_handle;
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_stream_handles;
|
||||
uint64_t num_decoded_images;
|
||||
double images_per_sec;
|
||||
double image_size_in_mpixels_per_sec;
|
||||
uint64_t num_bad_jpegs;
|
||||
uint64_t num_jpegs_with_411_subsampling;
|
||||
uint64_t num_jpegs_with_unknown_subsampling;
|
||||
uint64_t num_jpegs_with_unsupported_resolution;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decodes a batch of JPEG images and optionally saves the decoded images.
|
||||
*
|
||||
* @param decode_info parameters info for decoding a batch of jpeg images.
|
||||
* @param rocjpeg_utils Utility functions for RocJpeg operations.
|
||||
* @param decode_params Parameters for decoding the JPEG images (output_format, crop_rectangle)
|
||||
* @param save_images A boolean flag indicating whether to save the decoded images.
|
||||
* @param output_file_path The file path where the decoded images will be saved.
|
||||
* @param batch_size The number of images to be processed in each batch.
|
||||
*/
|
||||
void DecodeImages(DecodeInfo &decode_info, RocJpegUtils rocjpeg_utils, RocJpegDecodeParams &decode_params, bool save_images, std::string &output_file_path, int batch_size) {
|
||||
|
||||
bool is_roi_valid = false;
|
||||
uint32_t roi_width;
|
||||
uint32_t roi_height;
|
||||
roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
|
||||
roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
|
||||
uint8_t num_components;
|
||||
uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
|
||||
std::string chroma_sub_sampling = "";
|
||||
uint32_t num_channels = 0;
|
||||
double image_size_in_mpixels_all = 0;
|
||||
double total_decode_time_in_milli_sec = 0;
|
||||
int current_batch_size = 0;
|
||||
std::vector<std::vector<char>> batch_images(batch_size);
|
||||
std::vector<std::vector<uint32_t>> widths(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<std::vector<uint32_t>> heights(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<std::vector<uint32_t>> prior_channel_sizes(batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<RocJpegChromaSubsampling> subsamplings(batch_size);
|
||||
std::vector<RocJpegImage> output_images(batch_size);
|
||||
std::vector<std::string> base_file_names(batch_size);
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_stream_handles(batch_size);
|
||||
std::vector<uint32_t> temp_widths(ROCJPEG_MAX_COMPONENT, 0);
|
||||
std::vector<uint32_t> temp_heights(ROCJPEG_MAX_COMPONENT, 0);
|
||||
RocJpegChromaSubsampling temp_subsampling;
|
||||
std::string temp_base_file_name;
|
||||
|
||||
for (int i = 0; i < decode_info.file_paths.size(); i += batch_size) {
|
||||
int batch_end = std::min(i + batch_size, static_cast<int>(decode_info.file_paths.size()));
|
||||
for (int j = i; j < batch_end; j++) {
|
||||
int index = j - i;
|
||||
|
||||
temp_base_file_name = decode_info.file_paths[j].substr(decode_info.file_paths[j].find_last_of("/\\") + 1);
|
||||
// Read an image from disk.
|
||||
std::ifstream input(decode_info.file_paths[j].c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||
if (!(input.is_open())) {
|
||||
std::cerr << "ERROR: Cannot open image: " << decode_info.file_paths[j] << std::endl;
|
||||
return;
|
||||
}
|
||||
// Get the size
|
||||
std::streamsize file_size = input.tellg();
|
||||
input.seekg(0, std::ios::beg);
|
||||
// resize if buffer is too small
|
||||
if (batch_images[index].size() < file_size) {
|
||||
batch_images[index].resize(file_size);
|
||||
}
|
||||
if (!input.read(batch_images[index].data(), file_size)) {
|
||||
std::cerr << "ERROR: Cannot read from file: " << decode_info.file_paths[j] << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
RocJpegStatus rocjpeg_status = rocJpegStreamParse(reinterpret_cast<uint8_t*>(batch_images[index].data()), file_size, decode_info.rocjpeg_stream_handles[index]);
|
||||
if (rocjpeg_status != ROCJPEG_STATUS_SUCCESS) {
|
||||
decode_info.num_bad_jpegs++;
|
||||
std::cerr << "Skipping decoding input file: " << decode_info.file_paths[j] << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(decode_info.rocjpeg_handle, decode_info.rocjpeg_stream_handles[index], &num_components, &temp_subsampling, temp_widths.data(), temp_heights.data()));
|
||||
if (roi_width > 0 && roi_height > 0 && roi_width <= temp_widths[0] && roi_height <= temp_heights[0]) {
|
||||
is_roi_valid = true;
|
||||
}
|
||||
|
||||
rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling);
|
||||
if (temp_widths[0] < 64 || temp_heights[0] < 64) {
|
||||
decode_info.num_jpegs_with_unsupported_resolution++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (temp_subsampling == ROCJPEG_CSS_411 || temp_subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
if (temp_subsampling == ROCJPEG_CSS_411) {
|
||||
decode_info.num_jpegs_with_411_subsampling++;
|
||||
}
|
||||
if (temp_subsampling == ROCJPEG_CSS_UNKNOWN) {
|
||||
decode_info.num_jpegs_with_unknown_subsampling++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (rocjpeg_utils.GetChannelPitchAndSizes(decode_params, temp_subsampling, temp_widths.data(), temp_heights.data(), num_channels, output_images[current_batch_size], channel_sizes)) {
|
||||
std::cerr << "ERROR: Failed to get the channel pitch and sizes" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// allocate memory for each channel and reuse them if the sizes remain unchanged for a new image.
|
||||
for (int n = 0; n < num_channels; n++) {
|
||||
if (prior_channel_sizes[current_batch_size][n] != channel_sizes[n]) {
|
||||
if (output_images[current_batch_size].channel[n] != nullptr) {
|
||||
CHECK_HIP(hipFree((void *)output_images[current_batch_size].channel[n]));
|
||||
output_images[current_batch_size].channel[n] = nullptr;
|
||||
}
|
||||
CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n], channel_sizes[n]));
|
||||
prior_channel_sizes[current_batch_size][n] = channel_sizes[n];
|
||||
}
|
||||
}
|
||||
|
||||
rocjpeg_stream_handles[current_batch_size] = decode_info.rocjpeg_stream_handles[index];
|
||||
subsamplings[current_batch_size] = temp_subsampling;
|
||||
widths[current_batch_size] = temp_widths;
|
||||
heights[current_batch_size] = temp_heights;
|
||||
base_file_names[current_batch_size] = temp_base_file_name;
|
||||
current_batch_size++;
|
||||
}
|
||||
|
||||
double time_per_batch_in_milli_sec = 0;
|
||||
if (current_batch_size > 0) {
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
CHECK_ROCJPEG(rocJpegDecodeBatched(decode_info.rocjpeg_handle, rocjpeg_stream_handles.data(), current_batch_size, &decode_params, output_images.data()));
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
time_per_batch_in_milli_sec = std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
}
|
||||
|
||||
double image_size_in_mpixels = 0;
|
||||
for (int b = 0; b < current_batch_size; b++) {
|
||||
image_size_in_mpixels += (static_cast<double>(widths[b][0]) * static_cast<double>(heights[b][0]) / 1000000);
|
||||
}
|
||||
|
||||
decode_info.num_decoded_images += current_batch_size;
|
||||
|
||||
if (save_images) {
|
||||
for (int b = 0; b < current_batch_size; b++) {
|
||||
std::string image_save_path = output_file_path;
|
||||
//if ROI is present, need to pass roi_width and roi_height
|
||||
uint32_t width = is_roi_valid ? roi_width : widths[b][0];
|
||||
uint32_t height = is_roi_valid ? roi_height : heights[b][0];
|
||||
rocjpeg_utils.GetOutputFileExt(decode_params.output_format, base_file_names[b], width, height, subsamplings[b], image_save_path);
|
||||
rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height, subsamplings[b], decode_params.output_format);
|
||||
}
|
||||
}
|
||||
|
||||
total_decode_time_in_milli_sec += time_per_batch_in_milli_sec;
|
||||
image_size_in_mpixels_all += image_size_in_mpixels;
|
||||
|
||||
current_batch_size = 0;
|
||||
}
|
||||
|
||||
double avg_time_per_image = decode_info.num_decoded_images > 0 ? total_decode_time_in_milli_sec / decode_info.num_decoded_images : 0;
|
||||
decode_info.images_per_sec = avg_time_per_image > 0 ? 1000 / avg_time_per_image : 0;
|
||||
decode_info.image_size_in_mpixels_per_sec = decode_info.num_decoded_images > 0 ? decode_info.images_per_sec * image_size_in_mpixels_all / decode_info.num_decoded_images : 0;
|
||||
|
||||
for (auto& it : output_images) {
|
||||
for (int i = 0; i < ROCJPEG_MAX_COMPONENT; i++) {
|
||||
if (it.channel[i] != nullptr) {
|
||||
CHECK_HIP(hipFree((void *)it.channel[i]));
|
||||
it.channel[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int device_id = 0;
|
||||
bool save_images = false;
|
||||
int num_threads = 1;
|
||||
int batch_size = 1;
|
||||
bool is_dir = false;
|
||||
bool is_file = false;
|
||||
RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE;
|
||||
RocJpegDecodeParams decode_params = {};
|
||||
RocJpegUtils rocjpeg_utils;
|
||||
std::string input_path, output_file_path;
|
||||
std::vector<std::string> file_paths = {};
|
||||
std::vector<DecodeInfo> decode_info_per_thread;
|
||||
|
||||
RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id, rocjpeg_backend, decode_params, &num_threads, &batch_size, argc, argv);
|
||||
if (!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file)) {
|
||||
std::cerr << "ERROR: Failed to get input file paths!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (!RocJpegUtils::InitHipDevice(device_id)) {
|
||||
std::cerr << "ERROR: Failed to initialize HIP!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (num_threads > file_paths.size()) {
|
||||
num_threads = file_paths.size();
|
||||
}
|
||||
|
||||
decode_info_per_thread.resize(num_threads);
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id, &decode_info_per_thread[i].rocjpeg_handle));
|
||||
decode_info_per_thread[i].rocjpeg_stream_handles.resize(batch_size);
|
||||
for (auto j = 0; j < batch_size; j++) {
|
||||
CHECK_ROCJPEG(rocJpegStreamCreate(&decode_info_per_thread[i].rocjpeg_stream_handles[j]));
|
||||
}
|
||||
decode_info_per_thread[i].num_decoded_images = 0;
|
||||
decode_info_per_thread[i].images_per_sec = 0;
|
||||
decode_info_per_thread[i].image_size_in_mpixels_per_sec = 0;
|
||||
decode_info_per_thread[i].num_bad_jpegs = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_411_subsampling = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_unknown_subsampling = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_unsupported_resolution = 0;
|
||||
}
|
||||
|
||||
ThreadPool thread_pool(num_threads);
|
||||
|
||||
size_t files_per_thread = file_paths.size() / num_threads;
|
||||
size_t remaining_files = file_paths.size() % num_threads;
|
||||
size_t start_index = 0;
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
size_t end_index = start_index + files_per_thread + (i < remaining_files ? 1 : 0);
|
||||
decode_info_per_thread[i].file_paths.assign(file_paths.begin() + start_index, file_paths.begin() + end_index);
|
||||
start_index = end_index;
|
||||
}
|
||||
|
||||
std::cout << "Decoding started with " << num_threads << " threads, please wait!" << std::endl;
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
thread_pool.ExecuteJob(std::bind(DecodeImages, std::ref(decode_info_per_thread[i]), rocjpeg_utils, std::ref(decode_params), save_images, std::ref(output_file_path), batch_size));
|
||||
}
|
||||
thread_pool.JoinThreads();
|
||||
|
||||
uint64_t total_decoded_images = 0;
|
||||
double total_images_per_sec = 0;
|
||||
double total_image_size_in_mpixels_per_sec = 0;
|
||||
uint64_t total_num_bad_jpegs = 0;
|
||||
uint64_t total_num_jpegs_with_411_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unknown_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unsupported_resolution = 0;
|
||||
|
||||
for (auto i = 0; i < num_threads; i++) {
|
||||
total_decoded_images += decode_info_per_thread[i].num_decoded_images;
|
||||
total_image_size_in_mpixels_per_sec += decode_info_per_thread[i].image_size_in_mpixels_per_sec;
|
||||
total_images_per_sec += decode_info_per_thread[i].images_per_sec;
|
||||
total_num_bad_jpegs += decode_info_per_thread[i].num_bad_jpegs;
|
||||
total_num_jpegs_with_411_subsampling += decode_info_per_thread[i].num_jpegs_with_411_subsampling;
|
||||
total_num_jpegs_with_unknown_subsampling += decode_info_per_thread[i].num_jpegs_with_unknown_subsampling;
|
||||
total_num_jpegs_with_unsupported_resolution += decode_info_per_thread[i].num_jpegs_with_unsupported_resolution;
|
||||
}
|
||||
|
||||
std::cout << "Total decoded images: " << total_decoded_images << std::endl;
|
||||
if (total_num_bad_jpegs || total_num_jpegs_with_411_subsampling || total_num_jpegs_with_unknown_subsampling || total_num_jpegs_with_unsupported_resolution) {
|
||||
std::cout << "Total skipped images: " << total_num_bad_jpegs + total_num_jpegs_with_411_subsampling + total_num_jpegs_with_unknown_subsampling + total_num_jpegs_with_unsupported_resolution;
|
||||
if (total_num_bad_jpegs) {
|
||||
std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs;
|
||||
}
|
||||
if (total_num_jpegs_with_411_subsampling) {
|
||||
std::cout << " ,total images with YUV 4:1:1 chroam subsampling: " << total_num_jpegs_with_411_subsampling;
|
||||
}
|
||||
if (total_num_jpegs_with_unknown_subsampling) {
|
||||
std::cout << " ,total images with unknwon chroam subsampling: " << total_num_jpegs_with_unknown_subsampling;
|
||||
}
|
||||
if (total_num_jpegs_with_unsupported_resolution) {
|
||||
std::cout << " ,total images with unsupported_resolution: " << total_num_jpegs_with_unsupported_resolution;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
if (total_decoded_images > 0) {
|
||||
std::cout << "Average processing time per image (ms): " << 1000 / total_images_per_sec << std::endl;
|
||||
std::cout << "Average decoded images per sec (Images/Sec): " << total_images_per_sec << std::endl;
|
||||
std::cout << "Average decoded images size (Mpixels/Sec): " << total_image_size_in_mpixels_per_sec << std::endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_threads; i++) {
|
||||
CHECK_ROCJPEG(rocJpegDestroy(decode_info_per_thread[i].rocjpeg_handle));
|
||||
for (auto j = 0; j < batch_size; j++) {
|
||||
CHECK_ROCJPEG(rocJpegStreamDestroy(decode_info_per_thread[i].rocjpeg_stream_handles[j]));
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Decoding completed!" << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -31,6 +31,9 @@ THE SOFTWARE.
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <condition_variable>
|
||||
#include <queue>
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
#include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
@@ -145,8 +148,12 @@ public:
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-t", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
if (num_threads != nullptr)
|
||||
if (num_threads != nullptr) {
|
||||
*num_threads = atoi(argv[i]);
|
||||
if (*num_threads <= 0 || *num_threads > 32) {
|
||||
ShowHelpAndExit(argv[i], num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "-b")) {
|
||||
@@ -204,6 +211,7 @@ public:
|
||||
* @return True if successful, false otherwise.
|
||||
*/
|
||||
static bool GetFilePaths(std::string &input_path, std::vector<std::string> &file_paths, bool &is_dir, bool &is_file) {
|
||||
std::cout << "Reading images from disk, please wait!" << std::endl;
|
||||
if (!fs::exists(input_path)) {
|
||||
std::cerr << "ERROR: the input path does not exist!" << std::endl;
|
||||
return false;
|
||||
@@ -639,10 +647,10 @@ private:
|
||||
"-crop [crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]\n"
|
||||
"-d [device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]\n";
|
||||
if (show_threads) {
|
||||
std::cout << "-t [threads] - number of threads for parallel JPEG decoding - [optional - default: 2]\n";
|
||||
std::cout << "-t [threads] - number of threads (<= 32) for parallel JPEG decoding - [optional - default: 1]\n";
|
||||
}
|
||||
if (show_batch_size) {
|
||||
std::cout << "-b [batch_size] - decode images from input by batches of a specified size - [optional - default: 2]\n";
|
||||
std::cout << "-b [batch_size] - decode images from input by batches of a specified size - [optional - default: 1]\n";
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
@@ -659,4 +667,64 @@ private:
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
};
|
||||
#endif //ROC_JPEG_SAMPLES_COMMON
|
||||
|
||||
class ThreadPool {
|
||||
public:
|
||||
ThreadPool(int nthreads) : shutdown_(false) {
|
||||
// Create the specified number of threads
|
||||
threads_.reserve(nthreads);
|
||||
for (int i = 0; i < nthreads; ++i)
|
||||
threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i));
|
||||
}
|
||||
|
||||
~ThreadPool() {}
|
||||
|
||||
void JoinThreads() {
|
||||
{
|
||||
// Unblock any threads and tell them to stop
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
shutdown_ = true;
|
||||
cond_var_.notify_all();
|
||||
}
|
||||
|
||||
// Wait for all threads to stop
|
||||
for (auto& thread : threads_)
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ExecuteJob(std::function<void()> func) {
|
||||
// Place a job on the queue and unblock a thread
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
decode_jobs_queue_.emplace(std::move(func));
|
||||
cond_var_.notify_one();
|
||||
}
|
||||
|
||||
protected:
|
||||
void ThreadEntry(int i) {
|
||||
std::function<void()> execute_decode_job;
|
||||
|
||||
while (true) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cond_var_.wait(lock, [&] {return shutdown_ || !decode_jobs_queue_.empty();});
|
||||
if (decode_jobs_queue_.empty()) {
|
||||
// No jobs to do; shutting down
|
||||
return;
|
||||
}
|
||||
|
||||
execute_decode_job = std::move(decode_jobs_queue_.front());
|
||||
decode_jobs_queue_.pop();
|
||||
}
|
||||
|
||||
// Execute the decode job without holding any locks
|
||||
execute_decode_job();
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cond_var_;
|
||||
bool shutdown_;
|
||||
std::queue<std::function<void()>> decode_jobs_queue_;
|
||||
std::vector<std::thread> threads_;
|
||||
};
|
||||
#endif //ROC_JPEG_SAMPLES_COMMON
|
||||
@@ -115,11 +115,11 @@ add_test(
|
||||
jpeg-decode-threads-fmt-native
|
||||
COMMAND
|
||||
"${CMAKE_CTEST_COMMAND}"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecodeMultiThreads"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecodeMultiThreads"
|
||||
--build-and-test "${ROCM_PATH}/share/rocjpeg/samples/jpegDecodePerf"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/jpegDecodePerf"
|
||||
--build-generator "${CMAKE_GENERATOR}"
|
||||
--test-command "jpegdecodemultithreads"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/ -t 2
|
||||
--test-command "jpegdecodeperf"
|
||||
-i ${ROCM_PATH}/share/rocjpeg/images/
|
||||
)
|
||||
|
||||
add_test(
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren