Support for rocJPEG Tracing (#5)

* Add initial support for rocJPEG API Trace

* Added tests for rocjpeg

* Formatting
This commit is contained in:
Trowbridge, Ian
2025-01-27 11:50:12 -06:00
committed by GitHub
vanhempi de3a874b1a
commit 366716af06
8 muutettua tiedostoa jossa 274 lisäystä ja 0 poistoa
@@ -107,6 +107,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size)
ROCP_REG_HIP_COMPILER,
ROCP_REG_RCCL,
ROCP_REG_ROCDECODE,
ROCP_REG_ROCJPEG,
ROCP_REG_LAST,
};
@@ -171,6 +172,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE,
"rocprofiler_register_import_rocdecode",
"librocdecode.so.[0-9]($|\\.[0-9\\.]+)")
ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCJPEG,
"rocjpeg",
"rocprofiler_register_import_rocjpeg",
"librocjpeg.so.[0-9]($|\\.[0-9\\.]+)")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked")
+1
Näytä tiedosto
@@ -73,6 +73,7 @@ add_subdirectory(amdhip)
add_subdirectory(roctx)
add_subdirectory(rccl)
add_subdirectory(rocdecode)
add_subdirectory(rocjpeg)
add_subdirectory(rocprofiler)
#
+12
Näytä tiedosto
@@ -14,6 +14,7 @@ extern "C" {
# pragma weak roctxRangePop
# pragma weak ncclGetVersion
# pragma weak rocDecCreateDecoder
# pragma weak rocJpegStreamCreate
#endif
extern void
@@ -51,6 +52,17 @@ extern rocDecStatus
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
RocDecoderCreateInfo* decoder_create_info);
enum RocJpegStatus
{
};
enum RocJpegStreamHandle
{
};
extern RocJpegStatus
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle);
#ifdef __cplusplus
}
#endif
+10
Näytä tiedosto
@@ -22,6 +22,7 @@ decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
decltype(rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr;
enum rocp_reg_test_modes : uint8_t
{
@@ -31,6 +32,7 @@ enum rocp_reg_test_modes : uint8_t
ROCP_REG_TEST_ROCTX = (1 << 2),
ROCP_REG_TEST_RCCL = (1 << 3),
ROCP_REG_TEST_ROCDECODE = (1 << 4),
ROCP_REG_TEST_ROCJPEG = (1 << 5),
};
template <uint8_t Idx = ROCP_REG_TEST_NONE>
@@ -79,6 +81,7 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
void* roctx_handle = nullptr;
void* rccl_handle = nullptr;
void* rocdecode_handle = nullptr;
void* rocjpeg_handle = nullptr;
if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP)
{
@@ -117,5 +120,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so");
_resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder");
}
if constexpr((Idx & ROCP_REG_TEST_ROCJPEG) == ROCP_REG_TEST_ROCJPEG)
{
rocJpegStreamCreate_fn = rocJpegStreamCreate;
if(!rocJpegStreamCreate_fn) _resolve_dlopen(rocjpeg_handle, "librocjpeg.so");
_resolve_dlsym(rocJpegStreamCreate_fn, rocjpeg_handle, "rocJpegStreamCreate");
}
}
} // namespace
+33
Näytä tiedosto
@@ -0,0 +1,33 @@
#
#
#
if(NOT TARGET rocprofiler-register::rocprofiler-register)
# find_package(rocprofiler-register REQUIRED)
endif()
add_library(rocjpeg SHARED)
add_library(rocjpeg::rocjpeg ALIAS rocjpeg)
target_sources(rocjpeg PRIVATE rocjpeg.cpp rocjpeg.hpp)
target_include_directories(rocjpeg PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
target_link_libraries(rocjpeg PRIVATE rocprofiler-register::rocprofiler-register)
set_target_properties(
rocjpeg
PROPERTIES OUTPUT_NAME rocjpeg
SOVERSION 1
VERSION 1.0)
rocp_register_strip_target(rocjpeg)
add_library(rocjpeg-invalid SHARED)
add_library(rocjpeg::rocjpeg-invalid ALIAS rocjpeg-invalid)
target_sources(rocjpeg-invalid PRIVATE rocjpeg.cpp rocjpeg.hpp)
target_include_directories(rocjpeg-invalid
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
target_link_libraries(rocjpeg-invalid PRIVATE rocprofiler-register::rocprofiler-register)
set_target_properties(
rocjpeg-invalid
PROPERTIES OUTPUT_NAME rocjpeg
SOVERSION 1
VERSION 1.0
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid)
rocp_register_strip_target(rocjpeg-invalid)
+149
Näytä tiedosto
@@ -0,0 +1,149 @@
#include "rocjpeg.hpp"
#include <rocprofiler-register/rocprofiler-register.h>
#include <atomic>
#include <iostream>
#include <mutex>
#include <string_view>
#define ROCP_REG_VERSION \
ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \
ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION)
ROCPROFILER_REGISTER_DEFINE_IMPORT(rocjpeg, ROCP_REG_VERSION)
#ifndef ROCP_REG_FILE_NAME
# define ROCP_REG_FILE_NAME \
::std::string{ __FILE__ } \
.substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \
.c_str()
#endif
namespace rocjpeg
{
namespace
{
auto&
get_rocjpeg_api_table_impl()
{
static auto _table = std::atomic<rocjpegApiFuncTable*>{ nullptr };
return _table;
}
void
register_profiler_impl()
{
static auto _const_api_table = rocjpegApiFuncTable{};
initialize_rocjpeg_api_table(&_const_api_table);
// set this before any recursive opportunity arises
get_rocjpeg_api_table_impl().exchange(&_const_api_table);
// create a copy of the api table for modification by registration
static auto _profiler_api_table = rocjpegApiFuncTable{};
copy_rocjpeg_api_table(&_profiler_api_table, &_const_api_table);
void* _profiler_api_table_v = static_cast<void*>(&_profiler_api_table);
auto lib_id = rocprofiler_register_library_indentifier_t{};
auto success =
rocprofiler_register_library_api_table("rocjpeg",
&ROCPROFILER_REGISTER_IMPORT_FUNC(rocjpeg),
ROCP_REG_VERSION,
&_profiler_api_table_v,
1,
&lib_id);
if(success == 0)
{
printf("[%s] rocjpeg identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle);
auto* _api_table = &_const_api_table;
if(!get_rocjpeg_api_table_impl().compare_exchange_strong(_api_table,
&_profiler_api_table))
{
// with the current impl, if we ever get here, someone is calling one the
// functions in this anonymous namespace that shouldn't
std::cerr
<< "register_profiler_impl expected the API table to be the internal "
"implementation and yet it is not. something went wrong.\n";
abort();
}
}
else if(success != ROCP_REG_NO_TOOLS)
{
std::cerr << "rocjpeg library failed to register with rocprofiler-register: "
<< rocprofiler_register_error_string(success) << "\n";
exit(EXIT_FAILURE);
}
}
void
register_profiler()
{
// this registration scheme is designed to minimize overhead once
// registered (only pay cost of checking atomic boolean)
// once the profiler is registered. If the library has not
// been registered and two or more threads try to register concurrently
// the first thread to acquire the lock below, will block the
// threads until registration is complete. However,
// if the same thread performing the registration re-enters this function
// i.e. this library's API is called during registration, this function
// will prevent a deadlock by not attempting to re-enter the
// the call-once and not releasing any waiting threads by flipping
// the _is_registered field to true.
static auto _is_registered = std::atomic<bool>{ false };
if(!_is_registered.load(std::memory_order_acquire))
{
using mutex_t = std::recursive_mutex;
using auto_lock_t = std::unique_lock<mutex_t>;
static auto _once = std::once_flag{};
static auto _mutex = mutex_t{};
// defer the lock so we can check for recursion
auto _lk = auto_lock_t{ _mutex, std::defer_lock };
// this will be true if the same thread currently executing the call_once invokes
// the library's API while registering the profiler (e.g. tool which wants to
// instrument rocjpeg API invokes a rocjpeg function while registering with
// the profiler) we allow this thread to proceed and access the "const" API table
// but return so it does not flip _is_registered to true, which would result in
// any subsequent threads not waiting until the library is fully registered,
// resulting in missed callbacks for the tools
if(_lk.owns_lock()) return;
// ensures any subsequent threads wait until the first thread
// finishes registration
_lk.lock();
// call_once to ensure that we only register once
std::call_once(_once, register_profiler_impl);
// the first thread has completed registration and all
// threads waiting on lock will be released and this
// block will not be entered again
_is_registered.exchange(true, std::memory_order_release);
}
}
} // namespace
rocjpegApiFuncTable*
get_rocjpeg_api_table()
{
register_profiler();
return get_rocjpeg_api_table_impl().load(std::memory_order_relaxed);
}
void
rocjpeg_init()
{
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
}
} // namespace rocjpeg
extern "C" {
void
rocjpeg_init(void)
{
rocjpeg::get_rocjpeg_api_table()->rocJpegStreamCreate_fn({});
}
}
+49
Näytä tiedosto
@@ -0,0 +1,49 @@
#pragma once
#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0
#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1
#include <cstddef>
#include <cstdint>
extern "C" {
// fake rccl function
enum RocJpegStatus
{
};
enum RocJpegStreamHandle
{
};
RocJpegStatus
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle)
__attribute__((visibility("default")));
}
namespace rocjpeg
{
struct rocjpegApiFuncTable
{
uint64_t size = 0;
decltype(::rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr;
};
RocJpegStatus
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle);
// populates rocjpeg api table with function pointers
inline void
initialize_rocjpeg_api_table(rocjpegApiFuncTable* dst)
{
dst->size = sizeof(rocjpegApiFuncTable);
dst->rocJpegStreamCreate_fn = &::rocjpeg::rocJpegStreamCreate;
}
// copies the api table from src to dst
inline void
copy_rocjpeg_api_table(rocjpegApiFuncTable* dst, const rocjpegApiFuncTable* src)
{
*dst = *src;
}
} // namespace rocjpeg
+14
Näytä tiedosto
@@ -3,6 +3,7 @@
#include <hsa-runtime/hsa-runtime.hpp>
#include <rccl/rccl.hpp>
#include <rocdecode/rocdecode.hpp>
#include <rocjpeg/rocjpeg.hpp>
#include <roctx/roctx.hpp>
#include <dlfcn.h>
@@ -45,6 +46,13 @@ rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*)
return {};
}
RocJpegStatus
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle)
{
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
return {};
}
void
roctx_range_push(const char* name)
{
@@ -96,6 +104,7 @@ rocprofiler_set_api_table(const char* name,
using roctx_table_t = roctx::ROCTxApiTable;
using rccl_table_t = rccl::rcclApiFuncTable;
using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable;
using rocjpeg_table_t = rocjpeg::rocjpegApiFuncTable;
auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP");
bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0);
@@ -135,6 +144,11 @@ rocprofiler_set_api_table(const char* name,
rocdecode_table_t* _table = static_cast<rocdecode_table_t*>(tables[0]);
_table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder;
}
else if(std::string_view{ name } == "rocjpeg")
{
rocjpeg_table_t* _table = static_cast<rocjpeg_table_t*>(tables[0]);
_table->rocJpegStreamCreate_fn = &rocprofiler::rocJpegStreamCreate;
}
}
return 0;