From 2fc0e4ecd4bde4b0beee43880285d65f9be06580 Mon Sep 17 00:00:00 2001 From: "Trowbridge, Ian" Date: Mon, 27 Jan 2025 11:50:12 -0600 Subject: [PATCH] Support for rocJPEG Tracing (#5) * Add initial support for rocJPEG API Trace * Added tests for rocjpeg * Formatting [ROCm/rocprofiler-register commit: 366716af06806caa6f8a834e20bceabc0f752403] --- .../rocprofiler_register.cpp | 6 + .../rocprofiler-register/tests/CMakeLists.txt | 1 + .../rocprofiler-register/tests/common/fwd.h | 12 ++ .../rocprofiler-register/tests/common/fwd.hpp | 10 ++ .../tests/rocjpeg/CMakeLists.txt | 33 ++++ .../tests/rocjpeg/rocjpeg.cpp | 149 ++++++++++++++++++ .../tests/rocjpeg/rocjpeg.hpp | 49 ++++++ .../tests/rocprofiler/rocprofiler.cpp | 14 ++ 8 files changed, 274 insertions(+) create mode 100644 projects/rocprofiler-register/tests/rocjpeg/CMakeLists.txt create mode 100644 projects/rocprofiler-register/tests/rocjpeg/rocjpeg.cpp create mode 100644 projects/rocprofiler-register/tests/rocjpeg/rocjpeg.hpp diff --git a/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp b/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp index 619deba9b8..0905eddd66 100644 --- a/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp +++ b/projects/rocprofiler-register/source/lib/rocprofiler-register/rocprofiler_register.cpp @@ -107,6 +107,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size) ROCP_REG_HIP_COMPILER, ROCP_REG_RCCL, ROCP_REG_ROCDECODE, + ROCP_REG_ROCJPEG, ROCP_REG_LAST, }; @@ -171,6 +172,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE, "rocprofiler_register_import_rocdecode", "librocdecode.so.[0-9]($|\\.[0-9\\.]+)") +ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCJPEG, + "rocjpeg", + "rocprofiler_register_import_rocjpeg", + "librocjpeg.so.[0-9]($|\\.[0-9\\.]+)") + ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked") diff --git a/projects/rocprofiler-register/tests/CMakeLists.txt b/projects/rocprofiler-register/tests/CMakeLists.txt index 3482ca7005..d0da1bcc22 100644 --- a/projects/rocprofiler-register/tests/CMakeLists.txt +++ b/projects/rocprofiler-register/tests/CMakeLists.txt @@ -73,6 +73,7 @@ add_subdirectory(amdhip) add_subdirectory(roctx) add_subdirectory(rccl) add_subdirectory(rocdecode) +add_subdirectory(rocjpeg) add_subdirectory(rocprofiler) # diff --git a/projects/rocprofiler-register/tests/common/fwd.h b/projects/rocprofiler-register/tests/common/fwd.h index 057b3e1b70..bd7be6030a 100644 --- a/projects/rocprofiler-register/tests/common/fwd.h +++ b/projects/rocprofiler-register/tests/common/fwd.h @@ -14,6 +14,7 @@ extern "C" { # pragma weak roctxRangePop # pragma weak ncclGetVersion # pragma weak rocDecCreateDecoder +# pragma weak rocJpegStreamCreate #endif extern void @@ -51,6 +52,17 @@ extern rocDecStatus rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, RocDecoderCreateInfo* decoder_create_info); +enum RocJpegStatus +{ +}; + +enum RocJpegStreamHandle +{ +}; + +extern RocJpegStatus +rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle); + #ifdef __cplusplus } #endif diff --git a/projects/rocprofiler-register/tests/common/fwd.hpp b/projects/rocprofiler-register/tests/common/fwd.hpp index 91e30c35ef..d740658dea 100644 --- a/projects/rocprofiler-register/tests/common/fwd.hpp +++ b/projects/rocprofiler-register/tests/common/fwd.hpp @@ -22,6 +22,7 @@ decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr; decltype(roctxRangePush)* roctxRangePush_fn = nullptr; decltype(roctxRangePush)* roctxRangePop_fn = nullptr; decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr; +decltype(rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr; enum rocp_reg_test_modes : uint8_t { @@ -31,6 +32,7 @@ enum rocp_reg_test_modes : uint8_t ROCP_REG_TEST_ROCTX = (1 << 2), ROCP_REG_TEST_RCCL = (1 << 3), ROCP_REG_TEST_ROCDECODE = (1 << 4), + ROCP_REG_TEST_ROCJPEG = (1 << 5), }; template @@ -79,6 +81,7 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY) void* roctx_handle = nullptr; void* rccl_handle = nullptr; void* rocdecode_handle = nullptr; + void* rocjpeg_handle = nullptr; if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP) { @@ -117,5 +120,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY) if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so"); _resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder"); } + + if constexpr((Idx & ROCP_REG_TEST_ROCJPEG) == ROCP_REG_TEST_ROCJPEG) + { + rocJpegStreamCreate_fn = rocJpegStreamCreate; + if(!rocJpegStreamCreate_fn) _resolve_dlopen(rocjpeg_handle, "librocjpeg.so"); + _resolve_dlsym(rocJpegStreamCreate_fn, rocjpeg_handle, "rocJpegStreamCreate"); + } } } // namespace diff --git a/projects/rocprofiler-register/tests/rocjpeg/CMakeLists.txt b/projects/rocprofiler-register/tests/rocjpeg/CMakeLists.txt new file mode 100644 index 0000000000..ffa306ab3b --- /dev/null +++ b/projects/rocprofiler-register/tests/rocjpeg/CMakeLists.txt @@ -0,0 +1,33 @@ +# +# +# + +if(NOT TARGET rocprofiler-register::rocprofiler-register) + # find_package(rocprofiler-register REQUIRED) +endif() + +add_library(rocjpeg SHARED) +add_library(rocjpeg::rocjpeg ALIAS rocjpeg) +target_sources(rocjpeg PRIVATE rocjpeg.cpp rocjpeg.hpp) +target_include_directories(rocjpeg PUBLIC $) +target_link_libraries(rocjpeg PRIVATE rocprofiler-register::rocprofiler-register) +set_target_properties( + rocjpeg + PROPERTIES OUTPUT_NAME rocjpeg + SOVERSION 1 + VERSION 1.0) +rocp_register_strip_target(rocjpeg) + +add_library(rocjpeg-invalid SHARED) +add_library(rocjpeg::rocjpeg-invalid ALIAS rocjpeg-invalid) +target_sources(rocjpeg-invalid PRIVATE rocjpeg.cpp rocjpeg.hpp) +target_include_directories(rocjpeg-invalid + PUBLIC $) +target_link_libraries(rocjpeg-invalid PRIVATE rocprofiler-register::rocprofiler-register) +set_target_properties( + rocjpeg-invalid + PROPERTIES OUTPUT_NAME rocjpeg + SOVERSION 1 + VERSION 1.0 + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid) +rocp_register_strip_target(rocjpeg-invalid) diff --git a/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.cpp b/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.cpp new file mode 100644 index 0000000000..e77944439a --- /dev/null +++ b/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.cpp @@ -0,0 +1,149 @@ +#include "rocjpeg.hpp" + +#include + +#include +#include +#include +#include + +#define ROCP_REG_VERSION \ + ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \ + ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION) + +ROCPROFILER_REGISTER_DEFINE_IMPORT(rocjpeg, ROCP_REG_VERSION) + +#ifndef ROCP_REG_FILE_NAME +# define ROCP_REG_FILE_NAME \ + ::std::string{ __FILE__ } \ + .substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \ + .c_str() +#endif + +namespace rocjpeg +{ +namespace +{ +auto& +get_rocjpeg_api_table_impl() +{ + static auto _table = std::atomic{ nullptr }; + return _table; +} + +void +register_profiler_impl() +{ + static auto _const_api_table = rocjpegApiFuncTable{}; + initialize_rocjpeg_api_table(&_const_api_table); + + // set this before any recursive opportunity arises + get_rocjpeg_api_table_impl().exchange(&_const_api_table); + + // create a copy of the api table for modification by registration + static auto _profiler_api_table = rocjpegApiFuncTable{}; + copy_rocjpeg_api_table(&_profiler_api_table, &_const_api_table); + + void* _profiler_api_table_v = static_cast(&_profiler_api_table); + + auto lib_id = rocprofiler_register_library_indentifier_t{}; + auto success = + rocprofiler_register_library_api_table("rocjpeg", + &ROCPROFILER_REGISTER_IMPORT_FUNC(rocjpeg), + ROCP_REG_VERSION, + &_profiler_api_table_v, + 1, + &lib_id); + + if(success == 0) + { + printf("[%s] rocjpeg identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle); + auto* _api_table = &_const_api_table; + if(!get_rocjpeg_api_table_impl().compare_exchange_strong(_api_table, + &_profiler_api_table)) + { + // with the current impl, if we ever get here, someone is calling one the + // functions in this anonymous namespace that shouldn't + std::cerr + << "register_profiler_impl expected the API table to be the internal " + "implementation and yet it is not. something went wrong.\n"; + abort(); + } + } + else if(success != ROCP_REG_NO_TOOLS) + { + std::cerr << "rocjpeg library failed to register with rocprofiler-register: " + << rocprofiler_register_error_string(success) << "\n"; + exit(EXIT_FAILURE); + } +} + +void +register_profiler() +{ + // this registration scheme is designed to minimize overhead once + // registered (only pay cost of checking atomic boolean) + // once the profiler is registered. If the library has not + // been registered and two or more threads try to register concurrently + // the first thread to acquire the lock below, will block the + // threads until registration is complete. However, + // if the same thread performing the registration re-enters this function + // i.e. this library's API is called during registration, this function + // will prevent a deadlock by not attempting to re-enter the + // the call-once and not releasing any waiting threads by flipping + // the _is_registered field to true. + static auto _is_registered = std::atomic{ false }; + + if(!_is_registered.load(std::memory_order_acquire)) + { + using mutex_t = std::recursive_mutex; + using auto_lock_t = std::unique_lock; + static auto _once = std::once_flag{}; + static auto _mutex = mutex_t{}; + + // defer the lock so we can check for recursion + auto _lk = auto_lock_t{ _mutex, std::defer_lock }; + + // this will be true if the same thread currently executing the call_once invokes + // the library's API while registering the profiler (e.g. tool which wants to + // instrument rocjpeg API invokes a rocjpeg function while registering with + // the profiler) we allow this thread to proceed and access the "const" API table + // but return so it does not flip _is_registered to true, which would result in + // any subsequent threads not waiting until the library is fully registered, + // resulting in missed callbacks for the tools + if(_lk.owns_lock()) return; + + // ensures any subsequent threads wait until the first thread + // finishes registration + _lk.lock(); + // call_once to ensure that we only register once + std::call_once(_once, register_profiler_impl); + // the first thread has completed registration and all + // threads waiting on lock will be released and this + // block will not be entered again + _is_registered.exchange(true, std::memory_order_release); + } +} +} // namespace + +rocjpegApiFuncTable* +get_rocjpeg_api_table() +{ + register_profiler(); + return get_rocjpeg_api_table_impl().load(std::memory_order_relaxed); +} + +void +rocjpeg_init() +{ + printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__); +} +} // namespace rocjpeg + +extern "C" { +void +rocjpeg_init(void) +{ + rocjpeg::get_rocjpeg_api_table()->rocJpegStreamCreate_fn({}); +} +} diff --git a/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.hpp b/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.hpp new file mode 100644 index 0000000000..dbd053b2ef --- /dev/null +++ b/projects/rocprofiler-register/tests/rocjpeg/rocjpeg.hpp @@ -0,0 +1,49 @@ +#pragma once + +#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0 +#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1 + +#include +#include + +extern "C" { +// fake rccl function +enum RocJpegStatus +{ +}; + +enum RocJpegStreamHandle +{ +}; + +RocJpegStatus +rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle) + __attribute__((visibility("default"))); +} + +namespace rocjpeg +{ +struct rocjpegApiFuncTable +{ + uint64_t size = 0; + decltype(::rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr; +}; + +RocJpegStatus +rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle); + +// populates rocjpeg api table with function pointers +inline void +initialize_rocjpeg_api_table(rocjpegApiFuncTable* dst) +{ + dst->size = sizeof(rocjpegApiFuncTable); + dst->rocJpegStreamCreate_fn = &::rocjpeg::rocJpegStreamCreate; +} + +// copies the api table from src to dst +inline void +copy_rocjpeg_api_table(rocjpegApiFuncTable* dst, const rocjpegApiFuncTable* src) +{ + *dst = *src; +} +} // namespace rocjpeg diff --git a/projects/rocprofiler-register/tests/rocprofiler/rocprofiler.cpp b/projects/rocprofiler-register/tests/rocprofiler/rocprofiler.cpp index 63418badd6..eac61cadff 100644 --- a/projects/rocprofiler-register/tests/rocprofiler/rocprofiler.cpp +++ b/projects/rocprofiler-register/tests/rocprofiler/rocprofiler.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,13 @@ rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*) return {}; } +RocJpegStatus +rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle) +{ + printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__); + return {}; +} + void roctx_range_push(const char* name) { @@ -96,6 +104,7 @@ rocprofiler_set_api_table(const char* name, using roctx_table_t = roctx::ROCTxApiTable; using rccl_table_t = rccl::rcclApiFuncTable; using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable; + using rocjpeg_table_t = rocjpeg::rocjpegApiFuncTable; auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP"); bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0); @@ -135,6 +144,11 @@ rocprofiler_set_api_table(const char* name, rocdecode_table_t* _table = static_cast(tables[0]); _table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder; } + else if(std::string_view{ name } == "rocjpeg") + { + rocjpeg_table_t* _table = static_cast(tables[0]); + _table->rocJpegStreamCreate_fn = &rocprofiler::rocJpegStreamCreate; + } } return 0;