Support for rocJPEG Tracing (#5)
* Add initial support for rocJPEG API Trace
* Added tests for rocjpeg
* Formatting
[ROCm/rocprofiler-register commit: 366716af06]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
515f4957f4
Коммит
2fc0e4ecd4
@@ -107,6 +107,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size)
|
||||
ROCP_REG_HIP_COMPILER,
|
||||
ROCP_REG_RCCL,
|
||||
ROCP_REG_ROCDECODE,
|
||||
ROCP_REG_ROCJPEG,
|
||||
ROCP_REG_LAST,
|
||||
};
|
||||
|
||||
@@ -171,6 +172,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE,
|
||||
"rocprofiler_register_import_rocdecode",
|
||||
"librocdecode.so.[0-9]($|\\.[0-9\\.]+)")
|
||||
|
||||
ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCJPEG,
|
||||
"rocjpeg",
|
||||
"rocprofiler_register_import_rocjpeg",
|
||||
"librocjpeg.so.[0-9]($|\\.[0-9\\.]+)")
|
||||
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success")
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools")
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked")
|
||||
|
||||
@@ -73,6 +73,7 @@ add_subdirectory(amdhip)
|
||||
add_subdirectory(roctx)
|
||||
add_subdirectory(rccl)
|
||||
add_subdirectory(rocdecode)
|
||||
add_subdirectory(rocjpeg)
|
||||
add_subdirectory(rocprofiler)
|
||||
|
||||
#
|
||||
|
||||
@@ -14,6 +14,7 @@ extern "C" {
|
||||
# pragma weak roctxRangePop
|
||||
# pragma weak ncclGetVersion
|
||||
# pragma weak rocDecCreateDecoder
|
||||
# pragma weak rocJpegStreamCreate
|
||||
#endif
|
||||
|
||||
extern void
|
||||
@@ -51,6 +52,17 @@ extern rocDecStatus
|
||||
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
|
||||
RocDecoderCreateInfo* decoder_create_info);
|
||||
|
||||
enum RocJpegStatus
|
||||
{
|
||||
};
|
||||
|
||||
enum RocJpegStreamHandle
|
||||
{
|
||||
};
|
||||
|
||||
extern RocJpegStatus
|
||||
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -22,6 +22,7 @@ decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
|
||||
decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
|
||||
decltype(rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr;
|
||||
|
||||
enum rocp_reg_test_modes : uint8_t
|
||||
{
|
||||
@@ -31,6 +32,7 @@ enum rocp_reg_test_modes : uint8_t
|
||||
ROCP_REG_TEST_ROCTX = (1 << 2),
|
||||
ROCP_REG_TEST_RCCL = (1 << 3),
|
||||
ROCP_REG_TEST_ROCDECODE = (1 << 4),
|
||||
ROCP_REG_TEST_ROCJPEG = (1 << 5),
|
||||
};
|
||||
|
||||
template <uint8_t Idx = ROCP_REG_TEST_NONE>
|
||||
@@ -79,6 +81,7 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
|
||||
void* roctx_handle = nullptr;
|
||||
void* rccl_handle = nullptr;
|
||||
void* rocdecode_handle = nullptr;
|
||||
void* rocjpeg_handle = nullptr;
|
||||
|
||||
if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP)
|
||||
{
|
||||
@@ -117,5 +120,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
|
||||
if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so");
|
||||
_resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder");
|
||||
}
|
||||
|
||||
if constexpr((Idx & ROCP_REG_TEST_ROCJPEG) == ROCP_REG_TEST_ROCJPEG)
|
||||
{
|
||||
rocJpegStreamCreate_fn = rocJpegStreamCreate;
|
||||
if(!rocJpegStreamCreate_fn) _resolve_dlopen(rocjpeg_handle, "librocjpeg.so");
|
||||
_resolve_dlsym(rocJpegStreamCreate_fn, rocjpeg_handle, "rocJpegStreamCreate");
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
if(NOT TARGET rocprofiler-register::rocprofiler-register)
|
||||
# find_package(rocprofiler-register REQUIRED)
|
||||
endif()
|
||||
|
||||
add_library(rocjpeg SHARED)
|
||||
add_library(rocjpeg::rocjpeg ALIAS rocjpeg)
|
||||
target_sources(rocjpeg PRIVATE rocjpeg.cpp rocjpeg.hpp)
|
||||
target_include_directories(rocjpeg PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
|
||||
target_link_libraries(rocjpeg PRIVATE rocprofiler-register::rocprofiler-register)
|
||||
set_target_properties(
|
||||
rocjpeg
|
||||
PROPERTIES OUTPUT_NAME rocjpeg
|
||||
SOVERSION 1
|
||||
VERSION 1.0)
|
||||
rocp_register_strip_target(rocjpeg)
|
||||
|
||||
add_library(rocjpeg-invalid SHARED)
|
||||
add_library(rocjpeg::rocjpeg-invalid ALIAS rocjpeg-invalid)
|
||||
target_sources(rocjpeg-invalid PRIVATE rocjpeg.cpp rocjpeg.hpp)
|
||||
target_include_directories(rocjpeg-invalid
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
|
||||
target_link_libraries(rocjpeg-invalid PRIVATE rocprofiler-register::rocprofiler-register)
|
||||
set_target_properties(
|
||||
rocjpeg-invalid
|
||||
PROPERTIES OUTPUT_NAME rocjpeg
|
||||
SOVERSION 1
|
||||
VERSION 1.0
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid)
|
||||
rocp_register_strip_target(rocjpeg-invalid)
|
||||
@@ -0,0 +1,149 @@
|
||||
#include "rocjpeg.hpp"
|
||||
|
||||
#include <rocprofiler-register/rocprofiler-register.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string_view>
|
||||
|
||||
#define ROCP_REG_VERSION \
|
||||
ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \
|
||||
ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION)
|
||||
|
||||
ROCPROFILER_REGISTER_DEFINE_IMPORT(rocjpeg, ROCP_REG_VERSION)
|
||||
|
||||
#ifndef ROCP_REG_FILE_NAME
|
||||
# define ROCP_REG_FILE_NAME \
|
||||
::std::string{ __FILE__ } \
|
||||
.substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \
|
||||
.c_str()
|
||||
#endif
|
||||
|
||||
namespace rocjpeg
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_rocjpeg_api_table_impl()
|
||||
{
|
||||
static auto _table = std::atomic<rocjpegApiFuncTable*>{ nullptr };
|
||||
return _table;
|
||||
}
|
||||
|
||||
void
|
||||
register_profiler_impl()
|
||||
{
|
||||
static auto _const_api_table = rocjpegApiFuncTable{};
|
||||
initialize_rocjpeg_api_table(&_const_api_table);
|
||||
|
||||
// set this before any recursive opportunity arises
|
||||
get_rocjpeg_api_table_impl().exchange(&_const_api_table);
|
||||
|
||||
// create a copy of the api table for modification by registration
|
||||
static auto _profiler_api_table = rocjpegApiFuncTable{};
|
||||
copy_rocjpeg_api_table(&_profiler_api_table, &_const_api_table);
|
||||
|
||||
void* _profiler_api_table_v = static_cast<void*>(&_profiler_api_table);
|
||||
|
||||
auto lib_id = rocprofiler_register_library_indentifier_t{};
|
||||
auto success =
|
||||
rocprofiler_register_library_api_table("rocjpeg",
|
||||
&ROCPROFILER_REGISTER_IMPORT_FUNC(rocjpeg),
|
||||
ROCP_REG_VERSION,
|
||||
&_profiler_api_table_v,
|
||||
1,
|
||||
&lib_id);
|
||||
|
||||
if(success == 0)
|
||||
{
|
||||
printf("[%s] rocjpeg identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle);
|
||||
auto* _api_table = &_const_api_table;
|
||||
if(!get_rocjpeg_api_table_impl().compare_exchange_strong(_api_table,
|
||||
&_profiler_api_table))
|
||||
{
|
||||
// with the current impl, if we ever get here, someone is calling one the
|
||||
// functions in this anonymous namespace that shouldn't
|
||||
std::cerr
|
||||
<< "register_profiler_impl expected the API table to be the internal "
|
||||
"implementation and yet it is not. something went wrong.\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
else if(success != ROCP_REG_NO_TOOLS)
|
||||
{
|
||||
std::cerr << "rocjpeg library failed to register with rocprofiler-register: "
|
||||
<< rocprofiler_register_error_string(success) << "\n";
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
register_profiler()
|
||||
{
|
||||
// this registration scheme is designed to minimize overhead once
|
||||
// registered (only pay cost of checking atomic boolean)
|
||||
// once the profiler is registered. If the library has not
|
||||
// been registered and two or more threads try to register concurrently
|
||||
// the first thread to acquire the lock below, will block the
|
||||
// threads until registration is complete. However,
|
||||
// if the same thread performing the registration re-enters this function
|
||||
// i.e. this library's API is called during registration, this function
|
||||
// will prevent a deadlock by not attempting to re-enter the
|
||||
// the call-once and not releasing any waiting threads by flipping
|
||||
// the _is_registered field to true.
|
||||
static auto _is_registered = std::atomic<bool>{ false };
|
||||
|
||||
if(!_is_registered.load(std::memory_order_acquire))
|
||||
{
|
||||
using mutex_t = std::recursive_mutex;
|
||||
using auto_lock_t = std::unique_lock<mutex_t>;
|
||||
static auto _once = std::once_flag{};
|
||||
static auto _mutex = mutex_t{};
|
||||
|
||||
// defer the lock so we can check for recursion
|
||||
auto _lk = auto_lock_t{ _mutex, std::defer_lock };
|
||||
|
||||
// this will be true if the same thread currently executing the call_once invokes
|
||||
// the library's API while registering the profiler (e.g. tool which wants to
|
||||
// instrument rocjpeg API invokes a rocjpeg function while registering with
|
||||
// the profiler) we allow this thread to proceed and access the "const" API table
|
||||
// but return so it does not flip _is_registered to true, which would result in
|
||||
// any subsequent threads not waiting until the library is fully registered,
|
||||
// resulting in missed callbacks for the tools
|
||||
if(_lk.owns_lock()) return;
|
||||
|
||||
// ensures any subsequent threads wait until the first thread
|
||||
// finishes registration
|
||||
_lk.lock();
|
||||
// call_once to ensure that we only register once
|
||||
std::call_once(_once, register_profiler_impl);
|
||||
// the first thread has completed registration and all
|
||||
// threads waiting on lock will be released and this
|
||||
// block will not be entered again
|
||||
_is_registered.exchange(true, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
rocjpegApiFuncTable*
|
||||
get_rocjpeg_api_table()
|
||||
{
|
||||
register_profiler();
|
||||
return get_rocjpeg_api_table_impl().load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void
|
||||
rocjpeg_init()
|
||||
{
|
||||
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
|
||||
}
|
||||
} // namespace rocjpeg
|
||||
|
||||
extern "C" {
|
||||
void
|
||||
rocjpeg_init(void)
|
||||
{
|
||||
rocjpeg::get_rocjpeg_api_table()->rocJpegStreamCreate_fn({});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
// fake rccl function
|
||||
enum RocJpegStatus
|
||||
{
|
||||
};
|
||||
|
||||
enum RocJpegStreamHandle
|
||||
{
|
||||
};
|
||||
|
||||
RocJpegStatus
|
||||
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle)
|
||||
__attribute__((visibility("default")));
|
||||
}
|
||||
|
||||
namespace rocjpeg
|
||||
{
|
||||
struct rocjpegApiFuncTable
|
||||
{
|
||||
uint64_t size = 0;
|
||||
decltype(::rocJpegStreamCreate)* rocJpegStreamCreate_fn = nullptr;
|
||||
};
|
||||
|
||||
RocJpegStatus
|
||||
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle);
|
||||
|
||||
// populates rocjpeg api table with function pointers
|
||||
inline void
|
||||
initialize_rocjpeg_api_table(rocjpegApiFuncTable* dst)
|
||||
{
|
||||
dst->size = sizeof(rocjpegApiFuncTable);
|
||||
dst->rocJpegStreamCreate_fn = &::rocjpeg::rocJpegStreamCreate;
|
||||
}
|
||||
|
||||
// copies the api table from src to dst
|
||||
inline void
|
||||
copy_rocjpeg_api_table(rocjpegApiFuncTable* dst, const rocjpegApiFuncTable* src)
|
||||
{
|
||||
*dst = *src;
|
||||
}
|
||||
} // namespace rocjpeg
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <hsa-runtime/hsa-runtime.hpp>
|
||||
#include <rccl/rccl.hpp>
|
||||
#include <rocdecode/rocdecode.hpp>
|
||||
#include <rocjpeg/rocjpeg.hpp>
|
||||
#include <roctx/roctx.hpp>
|
||||
|
||||
#include <dlfcn.h>
|
||||
@@ -45,6 +46,13 @@ rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*)
|
||||
return {};
|
||||
}
|
||||
|
||||
RocJpegStatus
|
||||
rocJpegStreamCreate(RocJpegStreamHandle* jpeg_stream_handle)
|
||||
{
|
||||
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
|
||||
return {};
|
||||
}
|
||||
|
||||
void
|
||||
roctx_range_push(const char* name)
|
||||
{
|
||||
@@ -96,6 +104,7 @@ rocprofiler_set_api_table(const char* name,
|
||||
using roctx_table_t = roctx::ROCTxApiTable;
|
||||
using rccl_table_t = rccl::rcclApiFuncTable;
|
||||
using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable;
|
||||
using rocjpeg_table_t = rocjpeg::rocjpegApiFuncTable;
|
||||
|
||||
auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP");
|
||||
bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0);
|
||||
@@ -135,6 +144,11 @@ rocprofiler_set_api_table(const char* name,
|
||||
rocdecode_table_t* _table = static_cast<rocdecode_table_t*>(tables[0]);
|
||||
_table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder;
|
||||
}
|
||||
else if(std::string_view{ name } == "rocjpeg")
|
||||
{
|
||||
rocjpeg_table_t* _table = static_cast<rocjpeg_table_t*>(tables[0]);
|
||||
_table->rocJpegStreamCreate_fn = &rocprofiler::rocJpegStreamCreate;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
Ссылка в новой задаче
Block a user