JPEG Activity tracing in Perfetto (#108)
- Add JPEG activity track in perfetto trace - Add JPEG decode tests to the examples - Change existing videodecode test to include JPEG testing - Rename videodecode test file to decode to include jpeg tests too - Fix a bug in the test which checks for total activity of 0 - Disable rocDecode and rocJPEG samples from the github image files
Šī revīzija ir iekļauta:
@@ -102,7 +102,7 @@ jobs:
|
||||
-DROCPROFSYS_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10;py3.11"
|
||||
-DROCPROFSYS_CI_MPI_RUN_AS_ROOT=ON
|
||||
-DROCPROFSYS_MAX_THREADS=64
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode"
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
|
||||
- name: Install
|
||||
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
--
|
||||
-LE "transpose|rccl|videodecode"
|
||||
-LE "transpose|rccl|videodecode|jpegdecode"
|
||||
|
||||
- name: Install
|
||||
timeout-minutes: 10
|
||||
|
||||
@@ -157,7 +157,7 @@ jobs:
|
||||
-DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs
|
||||
-DROCPROFSYS_PYTHON_ENVS="py3.6;py3.7;py3.8;py3.9;py3.10;py3.11"
|
||||
-DROCPROFSYS_MAX_THREADS=64
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode"
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
-DMPI_HEADERS_ALLOW_MPICH=OFF
|
||||
|
||||
@@ -248,7 +248,7 @@ jobs:
|
||||
rocm-version: ['6.3']
|
||||
mpi-headers: ['OFF']
|
||||
build-jobs: ['3']
|
||||
ctest-exclude: ['-LE "transpose|videodecode"']
|
||||
ctest-exclude: ['-LE "transpose|videodecode|jpegdecode"']
|
||||
|
||||
env:
|
||||
BUILD_TYPE: MinSizeRel
|
||||
@@ -489,7 +489,7 @@ jobs:
|
||||
-DDYNINST_BUILD_STATIC_LIBS=OFF
|
||||
-DDYNINST_ELFUTILS_DOWNLOAD_VERSION=${{ env.ELFUTILS_DOWNLOAD_VERSION }}
|
||||
-DROCPROFSYS_MAX_THREADS=64
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode"
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
-DMPI_HEADERS_ALLOW_MPICH=ON
|
||||
|
||||
@@ -628,5 +628,5 @@ jobs:
|
||||
-DROCPROFSYS_USE_ROCM=OFF
|
||||
-DROCPROFSYS_USE_RCCL=OFF
|
||||
-DROCPROFSYS_MAX_THREADS=64
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode"
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
|
||||
@@ -190,7 +190,7 @@ jobs:
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
-DUSE_CLANG_OMP=OFF
|
||||
--
|
||||
-LE "transpose|rccl|videodecode"
|
||||
-LE "transpose|rccl|videodecode|jpegdecode"
|
||||
|
||||
- name: Install
|
||||
timeout-minutes: 10
|
||||
|
||||
@@ -173,6 +173,7 @@ if(ROCPROFSYS_USE_MPI)
|
||||
endif()
|
||||
if(ROCPROFSYS_BUILD_TESTING)
|
||||
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocdecode-test")
|
||||
list(APPEND _DEBIAN_PACKAGE_DEPENDS "rocjpeg-test")
|
||||
endif()
|
||||
string(REPLACE ";" ", " _DEBIAN_PACKAGE_DEPENDS "${_DEBIAN_PACKAGE_DEPENDS}")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS
|
||||
|
||||
@@ -57,3 +57,4 @@ add_subdirectory(causal)
|
||||
add_subdirectory(trace-time-window)
|
||||
add_subdirectory(fork)
|
||||
add_subdirectory(videodecode)
|
||||
add_subdirectory(jpegdecode)
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.18.4 FATAL_ERROR)
|
||||
|
||||
# This example requires hip and rocjpeg.
|
||||
find_package(HIP QUIET)
|
||||
|
||||
if(NOT HIP_FOUND)
|
||||
message(WARNING "hip is not found. Skip jpegdecode example.")
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Set AMD Clang as default compiler
|
||||
if(NOT DEFINED CMAKE_CXX_COMPILER)
|
||||
set(CMAKE_C_COMPILER ${ROCmVersion_DIR}/bin/amdclang)
|
||||
set(CMAKE_CXX_COMPILER ${ROCmVersion_DIR}/bin/amdclang++)
|
||||
endif()
|
||||
|
||||
project(rocprofiler-systems-jpegdecode-example)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED On)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/../../cmake)
|
||||
list(APPEND CMAKE_PREFIX_PATH ${ROCmVersion_DIR}/hip ${ROCmVersion_DIR})
|
||||
list(APPEND CMAKE_MODULE_PATH ${ROCmVersion_DIR}/share/rocjpeg/cmake)
|
||||
|
||||
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
|
||||
string(REPLACE " " ";" _FLAGS "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||
|
||||
if(ROCPROFSYS_DISABLE_EXAMPLES)
|
||||
get_filename_component(_DIR ${CMAKE_CURRENT_LIST_DIR} NAME)
|
||||
|
||||
if(${PROJECT_NAME} IN_LIST ROCPROFSYS_DISABLE_EXAMPLES OR ${_DIR} IN_LIST
|
||||
ROCPROFSYS_DISABLE_EXAMPLES)
|
||||
return()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Copy image files to build directory
|
||||
if(EXISTS "${ROCmVersion_DIR}/share/rocjpeg/images")
|
||||
if(NOT EXISTS "${CMAKE_BINARY_DIR}/images")
|
||||
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/images")
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE image_files "${ROCmVersion_DIR}/share/rocjpeg/images/*")
|
||||
file(COPY ${image_files} DESTINATION ${CMAKE_BINARY_DIR}/images)
|
||||
else()
|
||||
message(
|
||||
AUTHOR_WARNING
|
||||
"Source directory ${ROCmVersion_DIR}/share/rocjpeg/images does not exist")
|
||||
endif()
|
||||
|
||||
find_package(rocJPEG QUIET)
|
||||
find_package(rocprofiler-register QUIET)
|
||||
|
||||
# threads
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
if(HIP_FOUND
|
||||
AND ROCJPEG_FOUND
|
||||
AND Threads_FOUND
|
||||
AND rocprofiler-register_FOUND)
|
||||
# HIP
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} hip::host)
|
||||
# threads
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} Threads::Threads)
|
||||
# std filesystem
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs)
|
||||
# rocprofiler-register
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} rocprofiler-register::rocprofiler-register)
|
||||
|
||||
# rocJPEG
|
||||
include_directories(${ROCJPEG_INCLUDE_DIR})
|
||||
set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} ${ROCJPEG_LIBRARY})
|
||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR} jpegdecodeperf.cpp)
|
||||
add_executable(jpegdecode ${SOURCES})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
|
||||
target_link_libraries(jpegdecode ${LINK_LIBRARY_LIST})
|
||||
target_compile_options(jpegdecode PRIVATE ${_FLAGS})
|
||||
|
||||
if(ROCPROFSYS_INSTALL_EXAMPLES)
|
||||
install(
|
||||
TARGETS jpegdecode
|
||||
DESTINATION bin
|
||||
COMPONENT rocprofiler-systems-examples)
|
||||
install(
|
||||
FILES ${CMAKE_BINARY_DIR}/images
|
||||
DESTINATION share/rocprofiler-systems/tests/images
|
||||
COMPONENT rocprofiler-systems-examples)
|
||||
endif()
|
||||
else()
|
||||
message(
|
||||
"-- ERROR!: ${PROJECT_NAME} excluded! please install all the dependencies and try again!"
|
||||
)
|
||||
if(NOT HIP_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: HIP Not Found! - please install ROCm and HIP!")
|
||||
endif()
|
||||
if(NOT ROCJPEG_FOUND)
|
||||
message(WARNING "-- ERROR!: rocJPEG Not Found! - please install rocJPEG!")
|
||||
endif()
|
||||
if(NOT Threads_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: Threads Not Found! - please insatll Threads!")
|
||||
endif()
|
||||
if(NOT rocprofiler-register_FOUND)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"-- ERROR!: rocprofiler-register Not Found! - please install rocprofiler-register!"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
@@ -0,0 +1,420 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "rocjpeg_samples_utils.h"
|
||||
|
||||
struct DecodeInfo
|
||||
{
|
||||
std::vector<std::string> file_paths;
|
||||
RocJpegHandle rocjpeg_handle;
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_stream_handles;
|
||||
uint64_t num_decoded_images;
|
||||
double images_per_sec;
|
||||
double image_size_in_mpixels_per_sec;
|
||||
uint64_t num_bad_jpegs;
|
||||
uint64_t num_jpegs_with_411_subsampling;
|
||||
uint64_t num_jpegs_with_unknown_subsampling;
|
||||
uint64_t num_jpegs_with_unsupported_resolution;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decodes a batch of JPEG images and optionally saves the decoded images.
|
||||
*
|
||||
* @param decode_info parameters info for decoding a batch of jpeg images.
|
||||
* @param rocjpeg_utils Utility functions for RocJpeg operations.
|
||||
* @param decode_params Parameters for decoding the JPEG images (output_format,
|
||||
* crop_rectangle)
|
||||
* @param save_images A boolean flag indicating whether to save the decoded images.
|
||||
* @param output_file_path The file path where the decoded images will be saved.
|
||||
* @param batch_size The number of images to be processed in each batch.
|
||||
*/
|
||||
void
|
||||
DecodeImages(DecodeInfo& decode_info, RocJpegUtils rocjpeg_utils,
|
||||
RocJpegDecodeParams& decode_params, bool save_images,
|
||||
std::string& output_file_path, int batch_size, int device_id)
|
||||
{
|
||||
bool is_roi_valid = false;
|
||||
uint32_t roi_width;
|
||||
uint32_t roi_height;
|
||||
roi_width = decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
|
||||
roi_height = decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
|
||||
uint8_t num_components;
|
||||
uint32_t channel_sizes[ROCJPEG_MAX_COMPONENT] = {};
|
||||
std::string chroma_sub_sampling = "";
|
||||
uint32_t num_channels = 0;
|
||||
double image_size_in_mpixels_all = 0;
|
||||
double total_decode_time_in_milli_sec = 0;
|
||||
int current_batch_size = 0;
|
||||
std::vector<std::vector<char>> batch_images(batch_size);
|
||||
std::vector<std::vector<uint32_t>> widths(
|
||||
batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<std::vector<uint32_t>> heights(
|
||||
batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<std::vector<uint32_t>> prior_channel_sizes(
|
||||
batch_size, std::vector<uint32_t>(ROCJPEG_MAX_COMPONENT, 0));
|
||||
std::vector<RocJpegChromaSubsampling> subsamplings(batch_size);
|
||||
std::vector<RocJpegImage> output_images(batch_size);
|
||||
std::vector<std::string> base_file_names(batch_size);
|
||||
std::vector<RocJpegStreamHandle> rocjpeg_stream_handles(batch_size);
|
||||
std::vector<uint32_t> temp_widths(ROCJPEG_MAX_COMPONENT, 0);
|
||||
std::vector<uint32_t> temp_heights(ROCJPEG_MAX_COMPONENT, 0);
|
||||
RocJpegChromaSubsampling temp_subsampling;
|
||||
std::string temp_base_file_name;
|
||||
|
||||
CHECK_HIP(hipSetDevice(device_id));
|
||||
for(int i = 0; i < decode_info.file_paths.size(); i += batch_size)
|
||||
{
|
||||
int batch_end =
|
||||
std::min(i + batch_size, static_cast<int>(decode_info.file_paths.size()));
|
||||
for(int j = i; j < batch_end; j++)
|
||||
{
|
||||
int index = j - i;
|
||||
|
||||
temp_base_file_name = decode_info.file_paths[j].substr(
|
||||
decode_info.file_paths[j].find_last_of("/\\") + 1);
|
||||
// Read an image from disk.
|
||||
std::ifstream input(decode_info.file_paths[j].c_str(),
|
||||
std::ios::in | std::ios::binary | std::ios::ate);
|
||||
if(!(input.is_open()))
|
||||
{
|
||||
std::cerr << "ERROR: Cannot open image: " << decode_info.file_paths[j]
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
// Get the size
|
||||
std::streamsize file_size = input.tellg();
|
||||
input.seekg(0, std::ios::beg);
|
||||
// resize if buffer is too small
|
||||
if(batch_images[index].size() < file_size)
|
||||
{
|
||||
batch_images[index].resize(file_size);
|
||||
}
|
||||
if(!input.read(batch_images[index].data(), file_size))
|
||||
{
|
||||
std::cerr << "ERROR: Cannot read from file: " << decode_info.file_paths[j]
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
RocJpegStatus rocjpeg_status =
|
||||
rocJpegStreamParse(reinterpret_cast<uint8_t*>(batch_images[index].data()),
|
||||
file_size, decode_info.rocjpeg_stream_handles[index]);
|
||||
if(rocjpeg_status != ROCJPEG_STATUS_SUCCESS)
|
||||
{
|
||||
decode_info.num_bad_jpegs++;
|
||||
std::cerr << "Skipping decoding input file: " << decode_info.file_paths[j]
|
||||
<< std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
CHECK_ROCJPEG(rocJpegGetImageInfo(decode_info.rocjpeg_handle,
|
||||
decode_info.rocjpeg_stream_handles[index],
|
||||
&num_components, &temp_subsampling,
|
||||
temp_widths.data(), temp_heights.data()));
|
||||
if(roi_width > 0 && roi_height > 0 && roi_width <= temp_widths[0] &&
|
||||
roi_height <= temp_heights[0])
|
||||
{
|
||||
is_roi_valid = true;
|
||||
}
|
||||
|
||||
rocjpeg_utils.GetChromaSubsamplingStr(temp_subsampling, chroma_sub_sampling);
|
||||
if(temp_widths[0] < 64 || temp_heights[0] < 64)
|
||||
{
|
||||
decode_info.num_jpegs_with_unsupported_resolution++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(temp_subsampling == ROCJPEG_CSS_411 ||
|
||||
temp_subsampling == ROCJPEG_CSS_UNKNOWN)
|
||||
{
|
||||
if(temp_subsampling == ROCJPEG_CSS_411)
|
||||
{
|
||||
decode_info.num_jpegs_with_411_subsampling++;
|
||||
}
|
||||
if(temp_subsampling == ROCJPEG_CSS_UNKNOWN)
|
||||
{
|
||||
decode_info.num_jpegs_with_unknown_subsampling++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if(rocjpeg_utils.GetChannelPitchAndSizes(
|
||||
decode_params, temp_subsampling, temp_widths.data(),
|
||||
temp_heights.data(), num_channels, output_images[current_batch_size],
|
||||
channel_sizes))
|
||||
{
|
||||
std::cerr << "ERROR: Failed to get the channel pitch and sizes"
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// allocate memory for each channel and reuse them if the sizes remain
|
||||
// unchanged for a new image.
|
||||
for(int n = 0; n < num_channels; n++)
|
||||
{
|
||||
if(prior_channel_sizes[current_batch_size][n] != channel_sizes[n])
|
||||
{
|
||||
if(output_images[current_batch_size].channel[n] != nullptr)
|
||||
{
|
||||
CHECK_HIP(hipFree(
|
||||
(void*) output_images[current_batch_size].channel[n]));
|
||||
output_images[current_batch_size].channel[n] = nullptr;
|
||||
}
|
||||
CHECK_HIP(hipMalloc(&output_images[current_batch_size].channel[n],
|
||||
channel_sizes[n]));
|
||||
prior_channel_sizes[current_batch_size][n] = channel_sizes[n];
|
||||
}
|
||||
}
|
||||
|
||||
rocjpeg_stream_handles[current_batch_size] =
|
||||
decode_info.rocjpeg_stream_handles[index];
|
||||
subsamplings[current_batch_size] = temp_subsampling;
|
||||
widths[current_batch_size] = temp_widths;
|
||||
heights[current_batch_size] = temp_heights;
|
||||
base_file_names[current_batch_size] = temp_base_file_name;
|
||||
current_batch_size++;
|
||||
}
|
||||
|
||||
double time_per_batch_in_milli_sec = 0;
|
||||
if(current_batch_size > 0)
|
||||
{
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
CHECK_ROCJPEG(rocJpegDecodeBatched(
|
||||
decode_info.rocjpeg_handle, rocjpeg_stream_handles.data(),
|
||||
current_batch_size, &decode_params, output_images.data()));
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
time_per_batch_in_milli_sec =
|
||||
std::chrono::duration<double, std::milli>(end_time - start_time).count();
|
||||
}
|
||||
|
||||
double image_size_in_mpixels = 0;
|
||||
for(int b = 0; b < current_batch_size; b++)
|
||||
{
|
||||
image_size_in_mpixels += (static_cast<double>(widths[b][0]) *
|
||||
static_cast<double>(heights[b][0]) / 1000000);
|
||||
}
|
||||
|
||||
decode_info.num_decoded_images += current_batch_size;
|
||||
|
||||
if(save_images)
|
||||
{
|
||||
for(int b = 0; b < current_batch_size; b++)
|
||||
{
|
||||
std::string image_save_path = output_file_path;
|
||||
// if ROI is present, need to pass roi_width and roi_height
|
||||
uint32_t width = is_roi_valid ? roi_width : widths[b][0];
|
||||
uint32_t height = is_roi_valid ? roi_height : heights[b][0];
|
||||
rocjpeg_utils.GetOutputFileExt(decode_params.output_format,
|
||||
base_file_names[b], width, height,
|
||||
subsamplings[b], image_save_path);
|
||||
rocjpeg_utils.SaveImage(image_save_path, &output_images[b], width, height,
|
||||
subsamplings[b], decode_params.output_format);
|
||||
}
|
||||
}
|
||||
|
||||
total_decode_time_in_milli_sec += time_per_batch_in_milli_sec;
|
||||
image_size_in_mpixels_all += image_size_in_mpixels;
|
||||
|
||||
current_batch_size = 0;
|
||||
}
|
||||
|
||||
double avg_time_per_image =
|
||||
decode_info.num_decoded_images > 0
|
||||
? total_decode_time_in_milli_sec / decode_info.num_decoded_images
|
||||
: 0;
|
||||
decode_info.images_per_sec = avg_time_per_image > 0 ? 1000 / avg_time_per_image : 0;
|
||||
decode_info.image_size_in_mpixels_per_sec = decode_info.num_decoded_images > 0
|
||||
? decode_info.images_per_sec *
|
||||
image_size_in_mpixels_all /
|
||||
decode_info.num_decoded_images
|
||||
: 0;
|
||||
|
||||
for(auto& it : output_images)
|
||||
{
|
||||
for(int i = 0; i < ROCJPEG_MAX_COMPONENT; i++)
|
||||
{
|
||||
if(it.channel[i] != nullptr)
|
||||
{
|
||||
CHECK_HIP(hipFree((void*) it.channel[i]));
|
||||
it.channel[i] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
int device_id = 0;
|
||||
bool save_images = false;
|
||||
int num_threads = 1;
|
||||
int batch_size = 1;
|
||||
bool is_dir = false;
|
||||
bool is_file = false;
|
||||
RocJpegBackend rocjpeg_backend = ROCJPEG_BACKEND_HARDWARE;
|
||||
RocJpegDecodeParams decode_params = {};
|
||||
RocJpegUtils rocjpeg_utils;
|
||||
std::string input_path, output_file_path;
|
||||
std::vector<std::string> file_paths = {};
|
||||
std::vector<DecodeInfo> decode_info_per_thread;
|
||||
|
||||
RocJpegUtils::ParseCommandLine(input_path, output_file_path, save_images, device_id,
|
||||
rocjpeg_backend, decode_params, &num_threads,
|
||||
&batch_size, argc, argv);
|
||||
if(!RocJpegUtils::GetFilePaths(input_path, file_paths, is_dir, is_file))
|
||||
{
|
||||
std::cerr << "ERROR: Failed to get input file paths!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if(!RocJpegUtils::InitHipDevice(device_id))
|
||||
{
|
||||
std::cerr << "ERROR: Failed to initialize HIP!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if(num_threads > file_paths.size())
|
||||
{
|
||||
num_threads = file_paths.size();
|
||||
}
|
||||
|
||||
decode_info_per_thread.resize(num_threads);
|
||||
|
||||
for(int i = 0; i < num_threads; i++)
|
||||
{
|
||||
CHECK_ROCJPEG(rocJpegCreate(rocjpeg_backend, device_id,
|
||||
&decode_info_per_thread[i].rocjpeg_handle));
|
||||
decode_info_per_thread[i].rocjpeg_stream_handles.resize(batch_size);
|
||||
for(auto j = 0; j < batch_size; j++)
|
||||
{
|
||||
CHECK_ROCJPEG(rocJpegStreamCreate(
|
||||
&decode_info_per_thread[i].rocjpeg_stream_handles[j]));
|
||||
}
|
||||
decode_info_per_thread[i].num_decoded_images = 0;
|
||||
decode_info_per_thread[i].images_per_sec = 0;
|
||||
decode_info_per_thread[i].image_size_in_mpixels_per_sec = 0;
|
||||
decode_info_per_thread[i].num_bad_jpegs = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_411_subsampling = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_unknown_subsampling = 0;
|
||||
decode_info_per_thread[i].num_jpegs_with_unsupported_resolution = 0;
|
||||
}
|
||||
|
||||
ThreadPool thread_pool(num_threads);
|
||||
|
||||
size_t files_per_thread = file_paths.size() / num_threads;
|
||||
size_t remaining_files = file_paths.size() % num_threads;
|
||||
size_t start_index = 0;
|
||||
for(int i = 0; i < num_threads; i++)
|
||||
{
|
||||
size_t end_index = start_index + files_per_thread + (i < remaining_files ? 1 : 0);
|
||||
decode_info_per_thread[i].file_paths.assign(file_paths.begin() + start_index,
|
||||
file_paths.begin() + end_index);
|
||||
start_index = end_index;
|
||||
}
|
||||
|
||||
std::cout << "Decoding started with " << num_threads << " threads, please wait!"
|
||||
<< std::endl;
|
||||
for(int i = 0; i < num_threads; ++i)
|
||||
{
|
||||
thread_pool.ExecuteJob(
|
||||
std::bind(DecodeImages, std::ref(decode_info_per_thread[i]), rocjpeg_utils,
|
||||
std::ref(decode_params), save_images, std::ref(output_file_path),
|
||||
batch_size, device_id));
|
||||
}
|
||||
thread_pool.JoinThreads();
|
||||
|
||||
uint64_t total_decoded_images = 0;
|
||||
double total_images_per_sec = 0;
|
||||
double total_image_size_in_mpixels_per_sec = 0;
|
||||
uint64_t total_num_bad_jpegs = 0;
|
||||
uint64_t total_num_jpegs_with_411_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unknown_subsampling = 0;
|
||||
uint64_t total_num_jpegs_with_unsupported_resolution = 0;
|
||||
|
||||
for(auto i = 0; i < num_threads; i++)
|
||||
{
|
||||
total_decoded_images += decode_info_per_thread[i].num_decoded_images;
|
||||
total_image_size_in_mpixels_per_sec +=
|
||||
decode_info_per_thread[i].image_size_in_mpixels_per_sec;
|
||||
total_images_per_sec += decode_info_per_thread[i].images_per_sec;
|
||||
total_num_bad_jpegs += decode_info_per_thread[i].num_bad_jpegs;
|
||||
total_num_jpegs_with_411_subsampling +=
|
||||
decode_info_per_thread[i].num_jpegs_with_411_subsampling;
|
||||
total_num_jpegs_with_unknown_subsampling +=
|
||||
decode_info_per_thread[i].num_jpegs_with_unknown_subsampling;
|
||||
total_num_jpegs_with_unsupported_resolution +=
|
||||
decode_info_per_thread[i].num_jpegs_with_unsupported_resolution;
|
||||
}
|
||||
|
||||
std::cout << "Total decoded images: " << total_decoded_images << std::endl;
|
||||
if(total_num_bad_jpegs || total_num_jpegs_with_411_subsampling ||
|
||||
total_num_jpegs_with_unknown_subsampling ||
|
||||
total_num_jpegs_with_unsupported_resolution)
|
||||
{
|
||||
std::cout << "Total skipped images: "
|
||||
<< total_num_bad_jpegs + total_num_jpegs_with_411_subsampling +
|
||||
total_num_jpegs_with_unknown_subsampling +
|
||||
total_num_jpegs_with_unsupported_resolution;
|
||||
if(total_num_bad_jpegs)
|
||||
{
|
||||
std::cout << " ,total images that cannot be parsed: " << total_num_bad_jpegs;
|
||||
}
|
||||
if(total_num_jpegs_with_411_subsampling)
|
||||
{
|
||||
std::cout << " ,total images with YUV 4:1:1 chroam subsampling: "
|
||||
<< total_num_jpegs_with_411_subsampling;
|
||||
}
|
||||
if(total_num_jpegs_with_unknown_subsampling)
|
||||
{
|
||||
std::cout << " ,total images with unknwon chroam subsampling: "
|
||||
<< total_num_jpegs_with_unknown_subsampling;
|
||||
}
|
||||
if(total_num_jpegs_with_unsupported_resolution)
|
||||
{
|
||||
std::cout << " ,total images with unsupported_resolution: "
|
||||
<< total_num_jpegs_with_unsupported_resolution;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
if(total_decoded_images > 0)
|
||||
{
|
||||
std::cout << "Average processing time per image (ms): "
|
||||
<< 1000 / total_images_per_sec << std::endl;
|
||||
std::cout << "Average decoded images per sec (Images/Sec): "
|
||||
<< total_images_per_sec << std::endl;
|
||||
std::cout << "Average decoded images size (Mpixels/Sec): "
|
||||
<< total_image_size_in_mpixels_per_sec << std::endl;
|
||||
}
|
||||
|
||||
for(int i = 0; i < num_threads; i++)
|
||||
{
|
||||
CHECK_ROCJPEG(rocJpegDestroy(decode_info_per_thread[i].rocjpeg_handle));
|
||||
for(auto j = 0; j < batch_size; j++)
|
||||
{
|
||||
CHECK_ROCJPEG(rocJpegStreamDestroy(
|
||||
decode_info_per_thread[i].rocjpeg_stream_handles[j]));
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Decoding completed!" << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,903 @@
|
||||
/*
|
||||
Copyright (c) 2024 - 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef ROC_JPEG_SAMPLES_COMMON
|
||||
#define ROC_JPEG_SAMPLES_COMMON
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <condition_variable>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#if __cplusplus >= 201703L && __has_include(<filesystem>)
|
||||
# include <filesystem>
|
||||
namespace fs = std::filesystem;
|
||||
#else
|
||||
# include <experimental/filesystem>
|
||||
namespace fs = std::experimental::filesystem;
|
||||
#endif
|
||||
#include "rocjpeg.h"
|
||||
#include <chrono>
|
||||
|
||||
#define CHECK_ROCJPEG(call) \
|
||||
{ \
|
||||
RocJpegStatus rocjpeg_status = (call); \
|
||||
if(rocjpeg_status != ROCJPEG_STATUS_SUCCESS) \
|
||||
{ \
|
||||
std::cerr << #call << " returned " << rocJpegGetErrorName(rocjpeg_status) \
|
||||
<< " at " << __FILE__ << ":" << __LINE__ << std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHECK_HIP(call) \
|
||||
{ \
|
||||
hipError_t hip_status = (call); \
|
||||
if(hip_status != hipSuccess) \
|
||||
{ \
|
||||
std::cout << "rocJPEG failure: '#" << hip_status << "' at " << __FILE__ \
|
||||
<< ":" << __LINE__ << std::endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
* @class RocJpegUtils
|
||||
* @brief Utility class for rocJPEG samples.
|
||||
*
|
||||
* This class provides utility functions for rocJPEG samples, such as parsing command line
|
||||
* arguments, getting file paths, initializing HIP device, getting chroma subsampling
|
||||
* string, getting channel pitch and sizes, getting output file extension, and saving
|
||||
* images.
|
||||
*/
|
||||
class RocJpegUtils
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Parses the command line arguments.
|
||||
*
|
||||
* This function parses the command line arguments and sets the corresponding
|
||||
* variables.
|
||||
*
|
||||
* @param input_path The input path.
|
||||
* @param output_file_path The output file path.
|
||||
* @param save_images Flag indicating whether to save images.
|
||||
* @param device_id The device ID.
|
||||
* @param rocjpeg_backend The rocJPEG backend.
|
||||
* @param decode_params The rocJPEG decode parameters.
|
||||
* @param num_threads The number of threads.
|
||||
* @param crop The crop rectangle.
|
||||
* @param argc The number of command line arguments.
|
||||
* @param argv The command line arguments.
|
||||
*/
|
||||
static void ParseCommandLine(std::string& input_path, std::string& output_file_path,
|
||||
bool& save_images, int& device_id,
|
||||
RocJpegBackend& rocjpeg_backend,
|
||||
RocJpegDecodeParams& decode_params, int* num_threads,
|
||||
int* batch_size, int argc, char* argv[])
|
||||
{
|
||||
if(argc <= 1)
|
||||
{
|
||||
ShowHelpAndExit("", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
for(int i = 1; i < argc; i++)
|
||||
{
|
||||
if(!strcmp(argv[i], "-h"))
|
||||
{
|
||||
ShowHelpAndExit("", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
if(!strcmp(argv[i], "-i"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-i", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
input_path = argv[i];
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-o"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-o", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
output_file_path = argv[i];
|
||||
save_images = true;
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-d"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-d", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
device_id = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-be"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-be", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
rocjpeg_backend = static_cast<RocJpegBackend>(atoi(argv[i]));
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-fmt"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-fmt", num_threads != nullptr,
|
||||
batch_size != nullptr);
|
||||
}
|
||||
std::string selected_output_format = argv[i];
|
||||
if(selected_output_format == "native")
|
||||
{
|
||||
decode_params.output_format = ROCJPEG_OUTPUT_NATIVE;
|
||||
}
|
||||
else if(selected_output_format == "yuv_planar")
|
||||
{
|
||||
decode_params.output_format = ROCJPEG_OUTPUT_YUV_PLANAR;
|
||||
}
|
||||
else if(selected_output_format == "y")
|
||||
{
|
||||
decode_params.output_format = ROCJPEG_OUTPUT_Y;
|
||||
}
|
||||
else if(selected_output_format == "rgb")
|
||||
{
|
||||
decode_params.output_format = ROCJPEG_OUTPUT_RGB;
|
||||
}
|
||||
else if(selected_output_format == "rgb_planar")
|
||||
{
|
||||
decode_params.output_format = ROCJPEG_OUTPUT_RGB_PLANAR;
|
||||
}
|
||||
else
|
||||
{
|
||||
ShowHelpAndExit(argv[i], num_threads != nullptr);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-t"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-t", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
if(num_threads != nullptr)
|
||||
{
|
||||
*num_threads = atoi(argv[i]);
|
||||
if(*num_threads <= 0 || *num_threads > 32)
|
||||
{
|
||||
ShowHelpAndExit(argv[i], num_threads != nullptr,
|
||||
batch_size != nullptr);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-b"))
|
||||
{
|
||||
if(++i == argc)
|
||||
{
|
||||
ShowHelpAndExit("-b", num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
if(batch_size != nullptr) *batch_size = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if(!strcmp(argv[i], "-crop"))
|
||||
{
|
||||
if(++i == argc || 4 != sscanf(argv[i], "%hd,%hd,%hd,%hd",
|
||||
&decode_params.crop_rectangle.left,
|
||||
&decode_params.crop_rectangle.top,
|
||||
&decode_params.crop_rectangle.right,
|
||||
&decode_params.crop_rectangle.bottom))
|
||||
{
|
||||
ShowHelpAndExit("-crop");
|
||||
}
|
||||
if((&decode_params.crop_rectangle.right -
|
||||
&decode_params.crop_rectangle.left) %
|
||||
2 ==
|
||||
1 ||
|
||||
(&decode_params.crop_rectangle.bottom -
|
||||
&decode_params.crop_rectangle.top) %
|
||||
2 ==
|
||||
1)
|
||||
{
|
||||
std::cout << "output crop rectangle must have width and height of "
|
||||
"even numbers"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i], num_threads != nullptr, batch_size != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file is a JPEG file.
|
||||
*
|
||||
* @param filePath The path to the file to be checked.
|
||||
* @return True if the file is a JPEG file, false otherwise.
|
||||
*/
|
||||
static bool IsJPEG(const std::string& filePath)
|
||||
{
|
||||
std::ifstream file(filePath, std::ios::binary);
|
||||
if(!file.is_open())
|
||||
{
|
||||
std::cerr << "Failed to open file: " << filePath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned char buffer[2];
|
||||
file.read(reinterpret_cast<char*>(buffer), 2);
|
||||
file.close();
|
||||
|
||||
// The first two bytes of every JPEG stream are always 0xFFD8, which represents
|
||||
// the Start of Image (SOI) marker.
|
||||
return buffer[0] == 0xFF && buffer[1] == 0xD8;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the file paths.
|
||||
*
|
||||
* This function gets the file paths based on the input path and sets the
|
||||
* corresponding variables.
|
||||
*
|
||||
* @param input_path The input path.
|
||||
* @param file_paths The vector to store the file paths.
|
||||
* @param is_dir Flag indicating whether the input path is a directory.
|
||||
* @param is_file Flag indicating whether the input path is a file.
|
||||
* @return True if successful, false otherwise.
|
||||
*/
|
||||
static bool GetFilePaths(std::string& input_path,
|
||||
std::vector<std::string>& file_paths, bool& is_dir,
|
||||
bool& is_file)
|
||||
{
|
||||
std::cout << "Reading images from disk, please wait!" << std::endl;
|
||||
if(!fs::exists(input_path))
|
||||
{
|
||||
std::cerr << "ERROR: the input path does not exist!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
is_dir = fs::is_directory(input_path);
|
||||
is_file = fs::is_regular_file(input_path);
|
||||
if(is_dir)
|
||||
{
|
||||
for(const auto& entry : fs::recursive_directory_iterator(input_path))
|
||||
{
|
||||
if(fs::is_regular_file(entry) && IsJPEG(entry.path().string()))
|
||||
{
|
||||
file_paths.push_back(entry.path().string());
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(is_file && IsJPEG(input_path))
|
||||
{
|
||||
file_paths.push_back(input_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "ERROR: the input path does not contain JPEG files!"
|
||||
<< std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initializes the HIP device.
|
||||
*
|
||||
* This function initializes the HIP device with the specified device ID.
|
||||
*
|
||||
* @param device_id The device ID.
|
||||
* @return True if successful, false otherwise.
|
||||
*/
|
||||
static bool InitHipDevice(int device_id)
|
||||
{
|
||||
int num_devices;
|
||||
hipDeviceProp_t hip_dev_prop;
|
||||
CHECK_HIP(hipGetDeviceCount(&num_devices));
|
||||
if(num_devices < 1)
|
||||
{
|
||||
std::cerr << "ERROR: didn't find any GPU!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if(device_id >= num_devices)
|
||||
{
|
||||
std::cerr << "ERROR: the requested device_id is not found!" << std::endl;
|
||||
return false;
|
||||
}
|
||||
CHECK_HIP(hipSetDevice(device_id));
|
||||
CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id));
|
||||
|
||||
std::cout << "Using GPU device " << device_id << ": " << hip_dev_prop.name << "["
|
||||
<< hip_dev_prop.gcnArchName << "] on PCI bus " << std::setfill('0')
|
||||
<< std::setw(2) << std::right << std::hex << hip_dev_prop.pciBusID
|
||||
<< ":" << std::setfill('0') << std::setw(2) << std::right << std::hex
|
||||
<< hip_dev_prop.pciDomainID << "." << hip_dev_prop.pciDeviceID
|
||||
<< std::dec << std::endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the chroma subsampling string.
|
||||
*
|
||||
* This function gets the chroma subsampling string based on the specified subsampling
|
||||
* value.
|
||||
*
|
||||
* @param subsampling The chroma subsampling value.
|
||||
* @param chroma_sub_sampling The string to store the chroma subsampling.
|
||||
*/
|
||||
void GetChromaSubsamplingStr(RocJpegChromaSubsampling subsampling,
|
||||
std::string& chroma_sub_sampling)
|
||||
{
|
||||
switch(subsampling)
|
||||
{
|
||||
case ROCJPEG_CSS_444: chroma_sub_sampling = "YUV 4:4:4"; break;
|
||||
case ROCJPEG_CSS_440: chroma_sub_sampling = "YUV 4:4:0"; break;
|
||||
case ROCJPEG_CSS_422: chroma_sub_sampling = "YUV 4:2:2"; break;
|
||||
case ROCJPEG_CSS_420: chroma_sub_sampling = "YUV 4:2:0"; break;
|
||||
case ROCJPEG_CSS_411: chroma_sub_sampling = "YUV 4:1:1"; break;
|
||||
case ROCJPEG_CSS_400: chroma_sub_sampling = "YUV 4:0:0"; break;
|
||||
case ROCJPEG_CSS_UNKNOWN: chroma_sub_sampling = "UNKNOWN"; break;
|
||||
default: chroma_sub_sampling = ""; break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the channel pitch and sizes.
|
||||
*
|
||||
* This function gets the channel pitch and sizes based on the specified output
|
||||
* format, chroma subsampling, output image, and channel sizes.
|
||||
*
|
||||
* @param decode_params The decode parameters that specify the output format and crop
|
||||
* rectangle.
|
||||
* @param subsampling The chroma subsampling.
|
||||
* @param widths The array to store the channel widths.
|
||||
* @param heights The array to store the channel heights.
|
||||
* @param num_channels The number of channels.
|
||||
* @param output_image The output image.
|
||||
* @param channel_sizes The array to store the channel sizes.
|
||||
* @return The channel pitch.
|
||||
*/
|
||||
int GetChannelPitchAndSizes(RocJpegDecodeParams decode_params,
|
||||
RocJpegChromaSubsampling subsampling, uint32_t* widths,
|
||||
uint32_t* heights, uint32_t& num_channels,
|
||||
RocJpegImage& output_image, uint32_t* channel_sizes)
|
||||
{
|
||||
bool is_roi_valid = false;
|
||||
uint32_t roi_width;
|
||||
uint32_t roi_height;
|
||||
roi_width =
|
||||
decode_params.crop_rectangle.right - decode_params.crop_rectangle.left;
|
||||
roi_height =
|
||||
decode_params.crop_rectangle.bottom - decode_params.crop_rectangle.top;
|
||||
if(roi_width > 0 && roi_height > 0 && roi_width <= widths[0] &&
|
||||
roi_height <= heights[0])
|
||||
{
|
||||
is_roi_valid = true;
|
||||
}
|
||||
switch(decode_params.output_format)
|
||||
{
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
switch(subsampling)
|
||||
{
|
||||
case ROCJPEG_CSS_444:
|
||||
num_channels = 3;
|
||||
output_image.pitch[2] = output_image.pitch[1] =
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] =
|
||||
align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_CSS_440:
|
||||
num_channels = 3;
|
||||
output_image.pitch[2] = output_image.pitch[1] =
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[0] =
|
||||
align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
channel_sizes[2] = channel_sizes[1] =
|
||||
align(output_image.pitch[0] *
|
||||
((is_roi_valid ? roi_height : heights[0]) >> 1),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] =
|
||||
(is_roi_valid ? roi_width : widths[0]) * 2;
|
||||
channel_sizes[0] =
|
||||
align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
num_channels = 2;
|
||||
output_image.pitch[1] = output_image.pitch[0] =
|
||||
is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[0] =
|
||||
align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
channel_sizes[1] =
|
||||
align(output_image.pitch[1] *
|
||||
((is_roi_valid ? roi_height : heights[0]) >> 1),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[0] =
|
||||
align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
if(subsampling == ROCJPEG_CSS_400)
|
||||
{
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[0] = align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
}
|
||||
else
|
||||
{
|
||||
num_channels = 3;
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
output_image.pitch[1] = is_roi_valid ? roi_width : widths[1];
|
||||
output_image.pitch[2] = is_roi_valid ? roi_width : widths[2];
|
||||
channel_sizes[0] = align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
channel_sizes[1] = align(output_image.pitch[1] *
|
||||
(is_roi_valid ? roi_height : heights[1]),
|
||||
mem_alignment);
|
||||
channel_sizes[2] = align(output_image.pitch[2] *
|
||||
(is_roi_valid ? roi_height : heights[2]),
|
||||
mem_alignment);
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[0] = align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
num_channels = 1;
|
||||
output_image.pitch[0] = (is_roi_valid ? roi_width : widths[0]) * 3;
|
||||
channel_sizes[0] = align(output_image.pitch[0] *
|
||||
(is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB_PLANAR:
|
||||
num_channels = 3;
|
||||
output_image.pitch[2] = output_image.pitch[1] = output_image.pitch[0] =
|
||||
is_roi_valid ? roi_width : widths[0];
|
||||
channel_sizes[2] = channel_sizes[1] = channel_sizes[0] = align(
|
||||
output_image.pitch[0] * (is_roi_valid ? roi_height : heights[0]),
|
||||
mem_alignment);
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown output format!" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the output file extension.
|
||||
*
|
||||
* This function gets the output file extension based on the specified output format,
|
||||
* base file name, image width, image height, and file name for saving.
|
||||
*
|
||||
* @param output_format The output format.
|
||||
* @param base_file_name The base file name.
|
||||
* @param image_width The image width.
|
||||
* @param image_height The image height.
|
||||
* @param file_name_for_saving The string to store the file name for saving.
|
||||
*/
|
||||
void GetOutputFileExt(RocJpegOutputFormat output_format, std::string& base_file_name,
|
||||
uint32_t image_width, uint32_t image_height,
|
||||
RocJpegChromaSubsampling subsampling,
|
||||
std::string& file_name_for_saving)
|
||||
{
|
||||
std::string file_extension;
|
||||
std::string::size_type const p(base_file_name.find_last_of('.'));
|
||||
std::string file_name_no_ext = base_file_name.substr(0, p);
|
||||
std::string format_description = "";
|
||||
switch(output_format)
|
||||
{
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
file_extension = "yuv";
|
||||
switch(subsampling)
|
||||
{
|
||||
case ROCJPEG_CSS_444: format_description = "444"; break;
|
||||
case ROCJPEG_CSS_440: format_description = "440"; break;
|
||||
case ROCJPEG_CSS_422: format_description = "422_yuyv"; break;
|
||||
case ROCJPEG_CSS_420: format_description = "nv12"; break;
|
||||
case ROCJPEG_CSS_400: format_description = "400"; break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
file_extension = "yuv";
|
||||
format_description = "planar";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
file_extension = "yuv";
|
||||
format_description = "400";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
file_extension = "rgb";
|
||||
format_description = "packed";
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB_PLANAR:
|
||||
file_extension = "rgb";
|
||||
format_description = "planar";
|
||||
break;
|
||||
default: file_extension = ""; break;
|
||||
}
|
||||
file_name_for_saving += "//" + file_name_no_ext + "_" +
|
||||
std::to_string(image_width) + "x" +
|
||||
std::to_string(image_height) + "_" + format_description +
|
||||
"." + file_extension;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Saves the image.
|
||||
*
|
||||
* This function saves the image to the specified output file name based on the output
|
||||
* image, image width, image height, chroma subsampling, and output format.
|
||||
*
|
||||
* @param output_file_name The output file name.
|
||||
* @param output_image The output image.
|
||||
* @param img_width The image width.
|
||||
* @param img_height The image height.
|
||||
* @param subsampling The chroma subsampling.
|
||||
* @param output_format The output format.
|
||||
*/
|
||||
void SaveImage(std::string output_file_name, RocJpegImage* output_image,
|
||||
uint32_t img_width, uint32_t img_height,
|
||||
RocJpegChromaSubsampling subsampling,
|
||||
RocJpegOutputFormat output_format)
|
||||
{
|
||||
uint8_t* hst_ptr = nullptr;
|
||||
FILE* fp;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
if(output_image == nullptr || output_image->channel[0] == nullptr ||
|
||||
output_image->pitch[0] == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t widths[ROCJPEG_MAX_COMPONENT] = {};
|
||||
uint32_t heights[ROCJPEG_MAX_COMPONENT] = {};
|
||||
|
||||
switch(output_format)
|
||||
{
|
||||
case ROCJPEG_OUTPUT_NATIVE:
|
||||
switch(subsampling)
|
||||
{
|
||||
case ROCJPEG_CSS_444:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_440:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
heights[2] = heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
widths[0] = img_width * 2;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
widths[1] = widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_YUV_PLANAR:
|
||||
switch(subsampling)
|
||||
{
|
||||
case ROCJPEG_CSS_444:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_440:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
heights[2] = heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_422:
|
||||
widths[0] = img_width;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_CSS_420:
|
||||
widths[0] = img_width;
|
||||
widths[2] = widths[1] = widths[0] >> 1;
|
||||
heights[0] = img_height;
|
||||
heights[2] = heights[1] = img_height >> 1;
|
||||
break;
|
||||
case ROCJPEG_CSS_400:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unknown chroma subsampling!" << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_Y:
|
||||
widths[0] = img_width;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB:
|
||||
widths[0] = img_width * 3;
|
||||
heights[0] = img_height;
|
||||
break;
|
||||
case ROCJPEG_OUTPUT_RGB_PLANAR:
|
||||
widths[2] = widths[1] = widths[0] = img_width;
|
||||
heights[2] = heights[1] = heights[0] = img_height;
|
||||
break;
|
||||
default: std::cout << "Unknown output format!" << std::endl; return;
|
||||
}
|
||||
|
||||
uint32_t channel0_size = output_image->pitch[0] * heights[0];
|
||||
uint32_t channel1_size = output_image->pitch[1] * heights[1];
|
||||
uint32_t channel2_size = output_image->pitch[2] * heights[2];
|
||||
|
||||
uint32_t output_image_size = channel0_size + channel1_size + channel2_size;
|
||||
|
||||
if(hst_ptr == nullptr)
|
||||
{
|
||||
hst_ptr = new uint8_t[output_image_size];
|
||||
}
|
||||
|
||||
CHECK_HIP(
|
||||
hipMemcpyDtoH((void*) hst_ptr, output_image->channel[0], channel0_size));
|
||||
|
||||
uint8_t* tmp_hst_ptr = hst_ptr;
|
||||
fp = fopen(output_file_name.c_str(), "wb");
|
||||
if(fp)
|
||||
{
|
||||
// write channel0
|
||||
if(widths[0] == output_image->pitch[0])
|
||||
{
|
||||
fwrite(hst_ptr, 1, channel0_size, fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < heights[0]; i++)
|
||||
{
|
||||
fwrite(tmp_hst_ptr, 1, widths[0], fp);
|
||||
tmp_hst_ptr += output_image->pitch[0];
|
||||
}
|
||||
}
|
||||
// write channel1
|
||||
if(channel1_size != 0 && output_image->channel[1] != nullptr)
|
||||
{
|
||||
uint8_t* channel1_hst_ptr = hst_ptr + channel0_size;
|
||||
CHECK_HIP(hipMemcpyDtoH((void*) channel1_hst_ptr,
|
||||
output_image->channel[1], channel1_size));
|
||||
if(widths[1] == output_image->pitch[1])
|
||||
{
|
||||
fwrite(channel1_hst_ptr, 1, channel1_size, fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < heights[1]; i++)
|
||||
{
|
||||
fwrite(channel1_hst_ptr, 1, widths[1], fp);
|
||||
channel1_hst_ptr += output_image->pitch[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
// write channel2
|
||||
if(channel2_size != 0 && output_image->channel[2] != nullptr)
|
||||
{
|
||||
uint8_t* channel2_hst_ptr = hst_ptr + channel0_size + channel1_size;
|
||||
CHECK_HIP(hipMemcpyDtoH((void*) channel2_hst_ptr,
|
||||
output_image->channel[2], channel2_size));
|
||||
if(widths[2] == output_image->pitch[2])
|
||||
{
|
||||
fwrite(channel2_hst_ptr, 1, channel2_size, fp);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < heights[2]; i++)
|
||||
{
|
||||
fwrite(channel2_hst_ptr, 1, widths[2], fp);
|
||||
channel2_hst_ptr += output_image->pitch[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if(hst_ptr != nullptr)
|
||||
{
|
||||
delete[] hst_ptr;
|
||||
hst_ptr = nullptr;
|
||||
tmp_hst_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const int mem_alignment = 4 * 1024 * 1024;
|
||||
/**
|
||||
* @brief Shows the help message and exits.
|
||||
*
|
||||
* This function shows the help message and exits the program.
|
||||
*
|
||||
* @param option The option to display in the help message (optional).
|
||||
* @param show_threads Flag indicating whether to show the number of threads in the
|
||||
* help message.
|
||||
*/
|
||||
static void ShowHelpAndExit(const char* option = nullptr, bool show_threads = false,
|
||||
bool show_batch_size = false)
|
||||
{
|
||||
std::cout << "Options:\n"
|
||||
"-i [input path] - input path to a single JPEG image or a "
|
||||
"directory containing JPEG images - [required]\n"
|
||||
"-be [backend] - select rocJPEG backend (0 for "
|
||||
"hardware-accelerated JPEG decoding using VCN,\n"
|
||||
" 1 for hybrid JPEG "
|
||||
"decoding using CPU and GPU HIP kernels (currently not supported)) "
|
||||
"[optional - default: 0]\n"
|
||||
"-fmt [output format] - select rocJPEG output format for "
|
||||
"decoding, one of the [native, yuv_planar, y, rgb, rgb_planar] - "
|
||||
"[optional - default: native]\n"
|
||||
"-o [output path] - path to an output file or a path to an "
|
||||
"existing directory - write decoded images to a file or an existing "
|
||||
"directory based on selected output format - [optional]\n"
|
||||
"-crop [crop rectangle] - crop rectangle for output in a "
|
||||
"comma-separated format: left,top,right,bottom - [optional]\n"
|
||||
"-d [device id] - specify the GPU device id for the desired "
|
||||
"device (use 0 for the first device, 1 for the second device, and "
|
||||
"so on) [optional - default: 0]\n";
|
||||
if(show_threads)
|
||||
{
|
||||
std::cout << "-t [threads] - number of threads (<= 32) for parallel JPEG "
|
||||
"decoding - [optional - default: 1]\n";
|
||||
}
|
||||
if(show_batch_size)
|
||||
{
|
||||
std::cout << "-b [batch_size] - decode images from input by batches of a "
|
||||
"specified size - [optional - default: 1]\n";
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
/**
|
||||
* @brief Aligns a value to a specified alignment.
|
||||
*
|
||||
* This function takes a value and aligns it to the specified alignment. It returns
|
||||
* the aligned value.
|
||||
*
|
||||
* @param value The value to be aligned.
|
||||
* @param alignment The alignment value.
|
||||
* @return The aligned value.
|
||||
*/
|
||||
static inline int align(int value, int alignment)
|
||||
{
|
||||
return (value + alignment - 1) & ~(alignment - 1);
|
||||
}
|
||||
};
|
||||
|
||||
class ThreadPool
|
||||
{
|
||||
public:
|
||||
ThreadPool(int nthreads)
|
||||
: shutdown_(false)
|
||||
{
|
||||
// Create the specified number of threads
|
||||
threads_.reserve(nthreads);
|
||||
for(int i = 0; i < nthreads; ++i)
|
||||
threads_.emplace_back(std::bind(&ThreadPool::ThreadEntry, this, i));
|
||||
}
|
||||
|
||||
~ThreadPool() {}
|
||||
|
||||
void JoinThreads()
|
||||
{
|
||||
{
|
||||
// Unblock any threads and tell them to stop
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
shutdown_ = true;
|
||||
cond_var_.notify_all();
|
||||
}
|
||||
|
||||
// Wait for all threads to stop
|
||||
for(auto& thread : threads_)
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void ExecuteJob(std::function<void()> func)
|
||||
{
|
||||
// Place a job on the queue and unblock a thread
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
decode_jobs_queue_.emplace(std::move(func));
|
||||
cond_var_.notify_one();
|
||||
}
|
||||
|
||||
protected:
|
||||
void ThreadEntry(int i)
|
||||
{
|
||||
std::function<void()> execute_decode_job;
|
||||
|
||||
while(true)
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cond_var_.wait(lock,
|
||||
[&] { return shutdown_ || !decode_jobs_queue_.empty(); });
|
||||
if(decode_jobs_queue_.empty())
|
||||
{
|
||||
// No jobs to do; shutting down
|
||||
return;
|
||||
}
|
||||
|
||||
execute_decode_job = std::move(decode_jobs_queue_.front());
|
||||
decode_jobs_queue_.pop();
|
||||
}
|
||||
|
||||
// Execute the decode job without holding any locks
|
||||
execute_decode_job();
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex mutex_;
|
||||
std::condition_variable cond_var_;
|
||||
bool shutdown_;
|
||||
std::queue<std::function<void()>> decode_jobs_queue_;
|
||||
std::vector<std::thread> threads_;
|
||||
};
|
||||
#endif // ROC_JPEG_SAMPLES_COMMON
|
||||
@@ -170,9 +170,9 @@ else()
|
||||
"-- ERROR!: videodecode excluded! please install all the dependencies and try again!"
|
||||
)
|
||||
if(NOT FFMPEG_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!")
|
||||
message(WARNING "-- ERROR!: FFMPEG Not Found! - please install FFMPEG!")
|
||||
endif()
|
||||
if(NOT ROCDECODE_FOUND)
|
||||
message(FATAL_ERROR "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
message(WARNING "-- ERROR!: rocDecode Not Found! - please install rocDecode!")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -106,6 +106,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_temp, ROCPROFSYS_CATEGORY_ROCM_SMI
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_power, ROCPROFSYS_CATEGORY_ROCM_SMI_POWER, "device_power", "Power consumption of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_memory_usage, ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE, "device_memory_usage", "Memory usage of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_vcn_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY, "device_vcn_activity", "VCN Activity of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_smi_jpeg_activity, ROCPROFSYS_CATEGORY_ROCM_SMI_JPEG_ACTIVITY, "device_jpeg_activity", "JPEG Activity of a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rocdecode_api, ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API, "rocm_rocdecode_api", "ROCm ROCDecode API")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl, ROCPROFSYS_CATEGORY_ROCM_RCCL, "rccl", "ROCm Communication Collectives Library (RCCL) regions")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, pthread, ROCPROFSYS_CATEGORY_PTHREAD, "pthread", "POSIX threading functions")
|
||||
@@ -171,6 +172,7 @@ using name = perfetto_category<Tp...>;
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_power), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_memory_usage), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_vcn_activity), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_smi_jpeg_activity), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rocdecode_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::pthread), \
|
||||
|
||||
@@ -82,6 +82,8 @@ struct backtrace_gpu_memory
|
||||
{};
|
||||
struct backtrace_gpu_vcn
|
||||
{};
|
||||
struct backtrace_gpu_jpeg
|
||||
{};
|
||||
using sampling_wall_clock = data_tracker<double, backtrace_wall_clock>;
|
||||
using sampling_cpu_clock = data_tracker<double, backtrace_cpu_clock>;
|
||||
using sampling_percent = data_tracker<double, backtrace_fraction>;
|
||||
@@ -90,6 +92,7 @@ using sampling_gpu_temp = data_tracker<double, backtrace_gpu_temp>;
|
||||
using sampling_gpu_power = data_tracker<double, backtrace_gpu_power>;
|
||||
using sampling_gpu_memory = data_tracker<double, backtrace_gpu_memory>;
|
||||
using sampling_gpu_vcn = data_tracker<double, backtrace_gpu_vcn>;
|
||||
using sampling_gpu_jpeg = data_tracker<double, backtrace_gpu_jpeg>;
|
||||
|
||||
template <typename ApiT, typename StartFuncT = default_functor_t,
|
||||
typename StopFuncT = default_functor_t>
|
||||
@@ -123,6 +126,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_temp, fal
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_power, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_memory, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_vcn, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(is_available, component::sampling_gpu_jpeg, false_type)
|
||||
#endif
|
||||
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_wall_clock,
|
||||
@@ -151,6 +155,9 @@ TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_temp, project::ro
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_vcn, project::rocprofsys,
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::sampling, category::process_sampling)
|
||||
TIMEMORY_SET_COMPONENT_API(rocprofsys::component::sampling_gpu_jpeg, project::rocprofsys,
|
||||
tpls::rocm, device::gpu, os::supports_linux,
|
||||
category::sampling, category::process_sampling)
|
||||
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_wall_clock,
|
||||
"sampling_wall_clock", "Wall-clock timing",
|
||||
@@ -179,6 +186,10 @@ TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_vcn,
|
||||
"sampling_gpu_vcn",
|
||||
"GPU VCN Utilization (% activity) via ROCm-SMI",
|
||||
"Derived from sampling")
|
||||
TIMEMORY_METADATA_SPECIALIZATION(rocprofsys::component::sampling_gpu_jpeg,
|
||||
"sampling_gpu_jpeg",
|
||||
"GPU JPEG Utilization (% activity) via ROCm-SMI",
|
||||
"Derived from sampling")
|
||||
|
||||
// statistics type
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_wall_clock, double)
|
||||
@@ -188,6 +199,7 @@ TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_temp, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_power, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_memory, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_vcn, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::sampling_gpu_jpeg, double)
|
||||
TIMEMORY_STATISTICS_TYPE(rocprofsys::component::comm_data_tracker_t, float)
|
||||
|
||||
// enable timing units
|
||||
@@ -220,6 +232,7 @@ ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_temp, false
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_power, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_memory, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_vcn, false_type)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_sum, component::sampling_gpu_jpeg, false_type)
|
||||
|
||||
// reporting categories (mean)
|
||||
ROCPROFSYS_DEFINE_CONCRETE_TRAIT(report_mean, component::sampling_percent, false_type)
|
||||
|
||||
@@ -315,10 +315,10 @@ configure_settings(bool _init)
|
||||
"Enable ROCm API and kernel tracing", true, "backend",
|
||||
"rocm");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
bool, "ROCPROFSYS_USE_ROCM_SMI",
|
||||
"Enable sampling GPU power, temp, utilization, vcn_activity and memory usage",
|
||||
true, "backend", "rocm_smi", "rocm", "process_sampling");
|
||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_ROCM_SMI",
|
||||
"Enable sampling GPU power, temp, utilization, "
|
||||
"vcn_activity, jpeg_activity and memory usage",
|
||||
true, "backend", "rocm_smi", "rocm", "process_sampling");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_SAMPLING",
|
||||
"Enable statistical sampling of call-stack", false,
|
||||
@@ -626,11 +626,11 @@ configure_settings(bool _init)
|
||||
|
||||
rocprofiler_sdk::config_settings(_config);
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
|
||||
"rocm-smi metrics to collect: busy, temp, power, vcn_activity, mem_usage",
|
||||
"busy,temp,power,vcn_activity,mem_usage", "backend", "rocm_smi", "rocm",
|
||||
"process_sampling", "advanced");
|
||||
ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_ROCM_SMI_METRICS",
|
||||
"rocm-smi metrics to collect: busy, temp, power, "
|
||||
"vcn_activity, jpeg_activity, mem_usage",
|
||||
"busy,temp,power,mem_usage", "backend", "rocm_smi", "rocm",
|
||||
"process_sampling", "advanced");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(size_t, "ROCPROFSYS_PERFETTO_SHMEM_SIZE_HINT_KB",
|
||||
"Hint for shared-memory buffer size in perfetto (in KB)",
|
||||
|
||||
@@ -57,8 +57,9 @@ extern "C"
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_TEMP,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_POWER,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_MEMORY_USAGE,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_VCN_ACTIVITY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SMI_JPEG_ACTIVITY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_RCCL,
|
||||
ROCPROFSYS_CATEGORY_SAMPLING,
|
||||
ROCPROFSYS_CATEGORY_PTHREAD,
|
||||
|
||||
@@ -160,12 +160,15 @@ data::sample(uint32_t _dev_id)
|
||||
&m_power, &power_type)
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).mem_usage, rsmi_dev_memory_usage_get,
|
||||
_dev_id, RSMI_MEM_TYPE_VRAM, &m_mem_usage);
|
||||
ROCPROFSYS_RSMI_GET(get_settings(m_dev_id).vcn_activity,
|
||||
rsmi_dev_gpu_metrics_info_get, _dev_id, &_gpu_metrics);
|
||||
ROCPROFSYS_ROCM_SMI_CALL(rsmi_dev_gpu_metrics_info_get(_dev_id, &_gpu_metrics));
|
||||
|
||||
for(const auto& activity : _gpu_metrics.vcn_activity)
|
||||
for(const auto& v_activity : _gpu_metrics.vcn_activity)
|
||||
{
|
||||
if(activity != UINT16_MAX) m_vcn_metrics.push_back(activity);
|
||||
if(v_activity != UINT16_MAX) m_vcn_metrics[_dev_id].push_back(v_activity);
|
||||
}
|
||||
for(const auto& j_activity : _gpu_metrics.jpeg_activity)
|
||||
{
|
||||
if(j_activity != UINT16_MAX) m_jpeg_metrics[_dev_id].push_back(j_activity);
|
||||
}
|
||||
|
||||
#undef ROCPROFSYS_RSMI_GET
|
||||
@@ -262,6 +265,7 @@ void
|
||||
data::post_process(uint32_t _dev_id)
|
||||
{
|
||||
using component::sampling_gpu_busy;
|
||||
using component::sampling_gpu_jpeg;
|
||||
using component::sampling_gpu_memory;
|
||||
using component::sampling_gpu_power;
|
||||
using component::sampling_gpu_temp;
|
||||
@@ -282,7 +286,7 @@ data::post_process(uint32_t _dev_id)
|
||||
auto _settings = get_settings(_dev_id);
|
||||
|
||||
auto _process_perfetto = [&]() {
|
||||
auto _idx = std::array<uint64_t, 5>{};
|
||||
auto _idx = std::array<uint64_t, 6>{};
|
||||
{
|
||||
_idx.fill(_idx.size());
|
||||
uint64_t nidx = 0;
|
||||
@@ -291,6 +295,7 @@ data::post_process(uint32_t _dev_id)
|
||||
if(_settings.power) _idx.at(2) = nidx++;
|
||||
if(_settings.mem_usage) _idx.at(3) = nidx++;
|
||||
if(_settings.vcn_activity) _idx.at(4) = nidx++;
|
||||
if(_settings.jpeg_activity) _idx.at(5) = nidx++;
|
||||
}
|
||||
|
||||
for(auto& itr : _rocm_smi)
|
||||
@@ -302,6 +307,18 @@ data::post_process(uint32_t _dev_id)
|
||||
auto addendum = [&](const char* _v) {
|
||||
return JOIN(" ", "GPU", _v, JOIN("", '[', _dev_id, ']'), "(S)");
|
||||
};
|
||||
auto addendum_blk = [&](std::size_t _i, const char* _metric) {
|
||||
if(_i < 10)
|
||||
{
|
||||
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", "[0", _i, ']'), "(S)");
|
||||
}
|
||||
else
|
||||
{
|
||||
return JOIN(" ", "GPU", JOIN("", '[', _dev_id, ']'), _metric,
|
||||
JOIN("", '[', _i, ']'), "(S)");
|
||||
}
|
||||
};
|
||||
|
||||
if(_settings.busy) counter_track::emplace(_dev_id, addendum("Busy"), "%");
|
||||
if(_settings.temp)
|
||||
@@ -313,11 +330,25 @@ data::post_process(uint32_t _dev_id)
|
||||
"megabytes");
|
||||
if(_settings.vcn_activity)
|
||||
{
|
||||
for(std::size_t i = 0; i < std::size(itr.m_vcn_metrics); ++i)
|
||||
counter_track::emplace(
|
||||
_dev_id,
|
||||
addendum(("VCN Activity on " + std::to_string(i)).c_str()),
|
||||
"%");
|
||||
for(const auto& [dev_id, metrics] : itr.m_vcn_metrics)
|
||||
{
|
||||
for(std::size_t i = 0; i < std::size(metrics); ++i)
|
||||
{
|
||||
counter_track::emplace(
|
||||
_dev_id, addendum_blk(i, " VCN Activity"), "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
if(_settings.jpeg_activity)
|
||||
{
|
||||
for(const auto& [dev_id, metrics] : itr.m_jpeg_metrics)
|
||||
{
|
||||
for(std::size_t i = 0; i < std::size(metrics); ++i)
|
||||
{
|
||||
counter_track::emplace(_dev_id,
|
||||
addendum_blk(i, "JPEG Activity"), "%");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
uint64_t _ts = itr.m_ts;
|
||||
@@ -342,12 +373,28 @@ data::post_process(uint32_t _dev_id)
|
||||
counter_track::at(_dev_id, _idx.at(3)), _ts, _usage);
|
||||
if(_settings.vcn_activity)
|
||||
{
|
||||
uint64_t idx = _idx.at(4);
|
||||
for(const auto& temp : itr.m_vcn_metrics)
|
||||
for(const auto& [dev_id, metrics] : itr.m_vcn_metrics)
|
||||
{
|
||||
TRACE_COUNTER("device_vcn_activity", counter_track::at(_dev_id, idx),
|
||||
_ts, temp);
|
||||
++idx;
|
||||
for(std::size_t i = 0; i < std::size(metrics); ++i)
|
||||
{
|
||||
double _vcn_activity = metrics[i];
|
||||
TRACE_COUNTER("device_vcn_activity",
|
||||
counter_track::at(_dev_id, _idx.at(4) + i), _ts,
|
||||
_vcn_activity);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(_settings.jpeg_activity)
|
||||
{
|
||||
for(const auto& [dev_id, metrics] : itr.m_jpeg_metrics)
|
||||
{
|
||||
for(std::size_t i = 0; i < std::size(metrics); ++i)
|
||||
{
|
||||
double _jpeg_activity = metrics[i];
|
||||
TRACE_COUNTER("device_jpeg_activity",
|
||||
counter_track::at(_dev_id, _idx.at(5) + i), _ts,
|
||||
_jpeg_activity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -440,9 +487,10 @@ setup()
|
||||
key_pair_t{ "power", get_settings(dev_id).power },
|
||||
key_pair_t{ "mem_usage", get_settings(dev_id).mem_usage },
|
||||
key_pair_t{ "vcn_activity", get_settings(dev_id).vcn_activity },
|
||||
key_pair_t{ "jpeg_activity", get_settings(dev_id).jpeg_activity },
|
||||
};
|
||||
|
||||
get_settings(dev_id) = { false, false, false, false };
|
||||
get_settings(dev_id) = { false, false, false, false, false, false };
|
||||
for(const auto& metric : tim::delimit(*_metrics, ",;:\t\n "))
|
||||
{
|
||||
auto iitr = supported.find(metric);
|
||||
@@ -524,3 +572,7 @@ ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_vcn>), true,
|
||||
double)
|
||||
|
||||
ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_jpeg>), true,
|
||||
double)
|
||||
|
||||
@@ -71,11 +71,12 @@ device_count();
|
||||
|
||||
struct settings
|
||||
{
|
||||
bool busy = true;
|
||||
bool temp = true;
|
||||
bool power = true;
|
||||
bool mem_usage = true;
|
||||
bool vcn_activity = true;
|
||||
bool busy = true;
|
||||
bool temp = true;
|
||||
bool power = true;
|
||||
bool mem_usage = true;
|
||||
bool vcn_activity = true;
|
||||
bool jpeg_activity = true;
|
||||
};
|
||||
|
||||
struct data
|
||||
@@ -100,13 +101,14 @@ struct data
|
||||
|
||||
static void post_process(uint32_t _dev_id);
|
||||
|
||||
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
|
||||
timestamp_t m_ts = 0;
|
||||
busy_perc_t m_busy_perc = 0;
|
||||
temp_t m_temp = 0;
|
||||
power_t m_power = 0;
|
||||
mem_usage_t m_mem_usage = 0;
|
||||
std::vector<uint16_t> m_vcn_metrics = {};
|
||||
uint32_t m_dev_id = std::numeric_limits<uint32_t>::max();
|
||||
timestamp_t m_ts = 0;
|
||||
busy_perc_t m_busy_perc = 0;
|
||||
temp_t m_temp = 0;
|
||||
power_t m_power = 0;
|
||||
mem_usage_t m_mem_usage = 0;
|
||||
std::unordered_map<uint32_t, std::vector<double>> m_vcn_metrics = {};
|
||||
std::unordered_map<uint32_t, std::vector<double>> m_jpeg_metrics = {};
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& _os, const data& _v)
|
||||
{
|
||||
@@ -185,5 +187,9 @@ ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_vcn>), true,
|
||||
double)
|
||||
|
||||
ROCPROFSYS_DECLARE_EXTERN_COMPONENT(
|
||||
TIMEMORY_ESC(data_tracker<double, rocprofsys::component::backtrace_gpu_jpeg>), true,
|
||||
double)
|
||||
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -126,6 +126,7 @@ using component::backtrace_wall_clock; // NOLINT
|
||||
using component::callchain;
|
||||
using component::sampling_cpu_clock;
|
||||
using component::sampling_gpu_busy;
|
||||
using component::sampling_gpu_jpeg;
|
||||
using component::sampling_gpu_memory;
|
||||
using component::sampling_gpu_power;
|
||||
using component::sampling_gpu_temp;
|
||||
@@ -1575,10 +1576,16 @@ struct sampling_initialization
|
||||
sampling_gpu_temp::set_format_flags(sampling_gpu_temp::get_format_flags());
|
||||
|
||||
sampling_gpu_vcn::label() = "sampling_gpu_vcn_percent";
|
||||
sampling_gpu_vcn::description() = "Utilization of VCN(s)";
|
||||
sampling_gpu_vcn::description() = "VCN instance(s) activity";
|
||||
sampling_gpu_vcn::set_precision(0);
|
||||
sampling_gpu_vcn::set_format_flags(sampling_gpu_vcn::get_format_flags() &
|
||||
std::ios_base::showpoint);
|
||||
|
||||
sampling_gpu_jpeg::label() = "sampling_gpu_jpeg_percent";
|
||||
sampling_gpu_jpeg::description() = "JPEG instance(s) activity";
|
||||
sampling_gpu_jpeg::set_precision(0);
|
||||
sampling_gpu_jpeg::set_format_flags(sampling_gpu_jpeg::get_format_flags() &
|
||||
std::ios_base::showpoint);
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@@ -23,6 +23,6 @@ include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-overflow-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-annotate-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-causal-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-python-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-videodecode-tests.cmake)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/rocprof-sys-decode-tests.cmake)
|
||||
|
||||
add_subdirectory(source)
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
#
|
||||
# video decode tests
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
set(_decode_environment
|
||||
"${_base_environment}"
|
||||
"ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,rocdecode_api"
|
||||
"ROCPROFSYS_ROCM_SMI_METRICS=busy,temp,power,vcn_activity,jpeg_activity,mem_usage"
|
||||
"ROCPROFSYS_SAMPLING_CPUS=none")
|
||||
|
||||
rocprofiler_systems_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE
|
||||
NAME video-decode
|
||||
TARGET videodecode
|
||||
GPU ON
|
||||
ENVIRONMENT "${_decode_environment}"
|
||||
RUN_ARGS -i ${PROJECT_BINARY_DIR}/videos -t 1
|
||||
LABELS "decode")
|
||||
|
||||
rocprofiler_systems_add_validation_test(
|
||||
NAME video-decode-sampling
|
||||
PERFETTO_METRIC "rocm_rocdecode_api"
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "decode"
|
||||
ARGS -l rocDecCreateVideoParser -c 2 -d 1 --counter-names "VCN Activity")
|
||||
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
#
|
||||
# image decode tests
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
rocprofiler_systems_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE
|
||||
NAME image-decode
|
||||
TARGET jpegdecode
|
||||
GPU ON
|
||||
ENVIRONMENT "${_decode_environment}"
|
||||
RUN_ARGS -i ${PROJECT_BINARY_DIR}/images -b 32
|
||||
LABELS "decode")
|
||||
|
||||
rocprofiler_systems_add_validation_test(
|
||||
NAME image-decode-sampling
|
||||
PERFETTO_METRIC "host"
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "decode"
|
||||
ARGS -l jpegdecode -c 1 -d 0 --counter-names "JPEG Activity")
|
||||
@@ -1,22 +0,0 @@
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
#
|
||||
# video decode tests
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
rocprofiler_systems_add_test(
|
||||
SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE
|
||||
NAME videodecode
|
||||
TARGET videodecode
|
||||
GPU ON
|
||||
ENVIRONMENT
|
||||
"${_base_environment};ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,rocdecode_api"
|
||||
RUN_ARGS -i ${PROJECT_BINARY_DIR}/videos -t 1
|
||||
LABELS "videodecode")
|
||||
|
||||
rocprofiler_systems_add_validation_test(
|
||||
NAME videodecode-sampling
|
||||
PERFETTO_METRIC "rocm_rocdecode_api"
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
LABELS "videodecode"
|
||||
ARGS -l rocDecCreateVideoParser -c 2 -d 1 --counter-names "GPU VCN Activity")
|
||||
@@ -186,7 +186,7 @@ if __name__ == "__main__":
|
||||
sum_counter_values = tp.query(
|
||||
f"""SELECT SUM(counter.value) AS total_value FROM counter_track JOIN counter ON
|
||||
counter.track_id = counter_track.id WHERE counter_track.name LIKE
|
||||
'{counter_name}%'"""
|
||||
'%{counter_name}%'"""
|
||||
)
|
||||
total_value = 0
|
||||
for row in sum_counter_values:
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user