Add rocDecode API support (#4)

* Add rocDecode API support

* Update CI

* CI update: Sanitizers run on ubuntu 22.04

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
This commit is contained in:
Trowbridge, Ian
2025-01-21 21:00:43 -06:00
کامیت شده توسط GitHub
والد 164eda5231
کامیت de3a874b1a
10فایلهای تغییر یافته به همراه313 افزوده شده و 23 حذف شده
@@ -20,7 +20,7 @@ jobs:
fail-fast: false
matrix:
runner: ['ubuntu-latest']
compiler: ['clang-13', 'clang-14', 'clang-15', 'gcc-11', 'gcc-12']
compiler: ['clang-14', 'clang-15', 'gcc-11', 'gcc-12']
ci-args: ['']
ci-tag: ['']
include:
@@ -40,15 +40,19 @@ jobs:
compiler: 'clang-10'
ci-args: ''
ci-tag: ''
- runner: 'ubuntu-latest'
- runner: 'ubuntu-22.04'
compiler: 'clang-13'
ci-args: ''
ci-tag: ''
- runner: 'ubuntu-22.04'
compiler: 'gcc-12'
ci-args: '--memcheck ThreadSanitizer'
ci-tag: '-thread-sanitizer'
- runner: 'ubuntu-latest'
- runner: 'ubuntu-22.04'
compiler: 'gcc-12'
ci-args: '--memcheck AddressSanitizer'
ci-tag: '-address-sanitizer'
- runner: 'ubuntu-latest'
- runner: 'ubuntu-22.04'
compiler: 'gcc-12'
ci-args: '--memcheck LeakSanitizer'
ci-tag: '-leak-sanitizer'
+1 -1
مشاهده پرونده
@@ -13,7 +13,7 @@ import multiprocessing
# this constant is used to define CTEST_PROJECT_NAME
# and default value for CTEST_SUBMIT_URL
_PROJECT_NAME = "rocprofiler-register"
_BASE_URL = "10.194.116.31/cdash"
_BASE_URL = "cdash.rocprofiler.amd.com"
def which(cmd, require):
@@ -106,6 +106,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size)
ROCP_REG_ROCTX,
ROCP_REG_HIP_COMPILER,
ROCP_REG_RCCL,
ROCP_REG_ROCDECODE,
ROCP_REG_LAST,
};
@@ -165,6 +166,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_RCCL,
"rocprofiler_register_import_rccl",
"librccl.so.[6-9]($|\\.[0-9\\.]+)")
ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE,
"rocdecode",
"rocprofiler_register_import_rocdecode",
"librocdecode.so.[0-9]($|\\.[0-9\\.]+)")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools")
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked")
+1
مشاهده پرونده
@@ -72,6 +72,7 @@ add_subdirectory(hsa-runtime)
add_subdirectory(amdhip)
add_subdirectory(roctx)
add_subdirectory(rccl)
add_subdirectory(rocdecode)
add_subdirectory(rocprofiler)
#
+17
مشاهده پرونده
@@ -13,6 +13,7 @@ extern "C" {
# pragma weak roctxRangePush
# pragma weak roctxRangePop
# pragma weak ncclGetVersion
# pragma weak rocDecCreateDecoder
#endif
extern void
@@ -34,6 +35,22 @@ enum ncclResult_t
extern ncclResult_t
ncclGetVersion(int* version);
enum rocDecStatus
{
};
enum rocDecDecoderHandle
{
};
enum RocDecoderCreateInfo
{
};
extern rocDecStatus
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
RocDecoderCreateInfo* decoder_create_info);
#ifdef __cplusplus
}
#endif
+24 -14
مشاهده پرونده
@@ -16,19 +16,21 @@
namespace
{
decltype(hip_init)* hip_init_fn = nullptr;
decltype(hsa_init)* hsa_init_fn = nullptr;
decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
decltype(hip_init)* hip_init_fn = nullptr;
decltype(hsa_init)* hsa_init_fn = nullptr;
decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
enum rocp_reg_test_modes : uint8_t
{
ROCP_REG_TEST_NONE = 0x0,
ROCP_REG_TEST_HIP = (1 << 0),
ROCP_REG_TEST_HSA = (1 << 1),
ROCP_REG_TEST_ROCTX = (1 << 2),
ROCP_REG_TEST_RCCL = (1 << 3),
ROCP_REG_TEST_NONE = 0x0,
ROCP_REG_TEST_HIP = (1 << 0),
ROCP_REG_TEST_HSA = (1 << 1),
ROCP_REG_TEST_ROCTX = (1 << 2),
ROCP_REG_TEST_RCCL = (1 << 3),
ROCP_REG_TEST_ROCDECODE = (1 << 4),
};
template <uint8_t Idx = ROCP_REG_TEST_NONE>
@@ -72,10 +74,11 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
}
};
void* amdhip_handle = nullptr;
void* hsart_handle = nullptr;
void* roctx_handle = nullptr;
void* rccl_handle = nullptr;
void* amdhip_handle = nullptr;
void* hsart_handle = nullptr;
void* roctx_handle = nullptr;
void* rccl_handle = nullptr;
void* rocdecode_handle = nullptr;
if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP)
{
@@ -107,5 +110,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
if(!ncclGetVersion_fn) _resolve_dlopen(rccl_handle, "librccl.so");
_resolve_dlsym(ncclGetVersion_fn, rccl_handle, "ncclGetVersion");
}
if constexpr((Idx & ROCP_REG_TEST_ROCDECODE) == ROCP_REG_TEST_ROCDECODE)
{
rocDecCreateDecoder_fn = rocDecCreateDecoder;
if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so");
_resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder");
}
}
} // namespace
@@ -0,0 +1,34 @@
#
#
#
if(NOT TARGET rocprofiler-register::rocprofiler-register)
# find_package(rocprofiler-register REQUIRED)
endif()
add_library(rocdecode SHARED)
add_library(rocdecode::rocdecode ALIAS rocdecode)
target_sources(rocdecode PRIVATE rocdecode.cpp rocdecode.hpp)
target_include_directories(rocdecode PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
target_link_libraries(rocdecode PRIVATE rocprofiler-register::rocprofiler-register)
set_target_properties(
rocdecode
PROPERTIES OUTPUT_NAME rocdecode
SOVERSION 1
VERSION 1.0)
rocp_register_strip_target(rocdecode)
add_library(rocdecode-invalid SHARED)
add_library(rocdecode::rocdecode-invalid ALIAS rocdecode-invalid)
target_sources(rocdecode-invalid PRIVATE rocdecode.cpp rocdecode.hpp)
target_include_directories(rocdecode-invalid
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
target_link_libraries(rocdecode-invalid
PRIVATE rocprofiler-register::rocprofiler-register)
set_target_properties(
rocdecode-invalid
PROPERTIES OUTPUT_NAME rocdecode
SOVERSION 1
VERSION 1.0
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid)
rocp_register_strip_target(rocdecode-invalid)
@@ -0,0 +1,149 @@
#include "rocdecode.hpp"
#include <rocprofiler-register/rocprofiler-register.h>
#include <atomic>
#include <iostream>
#include <mutex>
#include <string_view>
#define ROCP_REG_VERSION \
ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \
ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION)
ROCPROFILER_REGISTER_DEFINE_IMPORT(rocdecode, ROCP_REG_VERSION)
#ifndef ROCP_REG_FILE_NAME
# define ROCP_REG_FILE_NAME \
::std::string{ __FILE__ } \
.substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \
.c_str()
#endif
namespace rocdecode
{
namespace
{
auto&
get_rocdecode_api_table_impl()
{
static auto _table = std::atomic<rocdecodeApiFuncTable*>{ nullptr };
return _table;
}
void
register_profiler_impl()
{
static auto _const_api_table = rocdecodeApiFuncTable{};
initialize_rocdecode_api_table(&_const_api_table);
// set this before any recursive opportunity arises
get_rocdecode_api_table_impl().exchange(&_const_api_table);
// create a copy of the api table for modification by registration
static auto _profiler_api_table = rocdecodeApiFuncTable{};
copy_rocdecode_api_table(&_profiler_api_table, &_const_api_table);
void* _profiler_api_table_v = static_cast<void*>(&_profiler_api_table);
auto lib_id = rocprofiler_register_library_indentifier_t{};
auto success = rocprofiler_register_library_api_table(
"rocdecode",
&ROCPROFILER_REGISTER_IMPORT_FUNC(rocdecode),
ROCP_REG_VERSION,
&_profiler_api_table_v,
1,
&lib_id);
if(success == 0)
{
printf("[%s] rocdecode identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle);
auto* _api_table = &_const_api_table;
if(!get_rocdecode_api_table_impl().compare_exchange_strong(_api_table,
&_profiler_api_table))
{
// with the current impl, if we ever get here, someone is calling one the
// functions in this anonymous namespace that shouldn't
std::cerr
<< "register_profiler_impl expected the API table to be the internal "
"implementation and yet it is not. something went wrong.\n";
abort();
}
}
else if(success != ROCP_REG_NO_TOOLS)
{
std::cerr << "rocdecode library failed to register with rocprofiler-register: "
<< rocprofiler_register_error_string(success) << "\n";
exit(EXIT_FAILURE);
}
}
void
register_profiler()
{
// this registration scheme is designed to minimize overhead once
// registered (only pay cost of checking atomic boolean)
// once the profiler is registered. If the library has not
// been registered and two or more threads try to register concurrently
// the first thread to acquire the lock below, will block the
// threads until registration is complete. However,
// if the same thread performing the registration re-enters this function
// i.e. this library's API is called during registration, this function
// will prevent a deadlock by not attempting to re-enter the
// the call-once and not releasing any waiting threads by flipping
// the _is_registered field to true.
static auto _is_registered = std::atomic<bool>{ false };
if(!_is_registered.load(std::memory_order_acquire))
{
using mutex_t = std::recursive_mutex;
using auto_lock_t = std::unique_lock<mutex_t>;
static auto _once = std::once_flag{};
static auto _mutex = mutex_t{};
// defer the lock so we can check for recursion
auto _lk = auto_lock_t{ _mutex, std::defer_lock };
// this will be true if the same thread currently executing the call_once invokes
// the library's API while registering the profiler (e.g. tool which wants to
// instrument rocdecode API invokes a rocdecode function while registering with
// the profiler) we allow this thread to proceed and access the "const" API table
// but return so it does not flip _is_registered to true, which would result in
// any subsequent threads not waiting until the library is fully registered,
// resulting in missed callbacks for the tools
if(_lk.owns_lock()) return;
// ensures any subsequent threads wait until the first thread
// finishes registration
_lk.lock();
// call_once to ensure that we only register once
std::call_once(_once, register_profiler_impl);
// the first thread has completed registration and all
// threads waiting on lock will be released and this
// block will not be entered again
_is_registered.exchange(true, std::memory_order_release);
}
}
} // namespace
rocdecodeApiFuncTable*
get_rocdecode_api_table()
{
register_profiler();
return get_rocdecode_api_table_impl().load(std::memory_order_relaxed);
}
void
rocdecode_init()
{
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
}
} // namespace rocdecode
extern "C" {
void
rocdecode_init(void)
{
rocdecode::get_rocdecode_api_table()->rocDecCreateDecoder_fn({}, {});
}
}
@@ -0,0 +1,55 @@
#pragma once
#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0
#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1
#include <cstddef>
#include <cstdint>
extern "C" {
// fake rccl function
enum rocDecStatus
{
};
enum rocDecDecoderHandle
{
};
enum RocDecoderCreateInfo
{
};
rocDecStatus
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
RocDecoderCreateInfo* decoder_create_info)
__attribute__((visibility("default")));
}
namespace rocdecode
{
struct rocdecodeApiFuncTable
{
uint64_t size = 0;
decltype(::rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
};
rocDecStatus
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
RocDecoderCreateInfo* decoder_create_info);
// populates rocdecode api table with function pointers
inline void
initialize_rocdecode_api_table(rocdecodeApiFuncTable* dst)
{
dst->size = sizeof(rocdecodeApiFuncTable);
dst->rocDecCreateDecoder_fn = &::rocdecode::rocDecCreateDecoder;
}
// copies the api table from src to dst
inline void
copy_rocdecode_api_table(rocdecodeApiFuncTable* dst, const rocdecodeApiFuncTable* src)
{
*dst = *src;
}
} // namespace rocdecode
@@ -2,6 +2,7 @@
#include <amdhip/amdhip.hpp>
#include <hsa-runtime/hsa-runtime.hpp>
#include <rccl/rccl.hpp>
#include <rocdecode/rocdecode.hpp>
#include <roctx/roctx.hpp>
#include <dlfcn.h>
@@ -37,6 +38,13 @@ ncclGetVersion(int*)
return {};
}
rocDecStatus
rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*)
{
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
return {};
}
void
roctx_range_push(const char* name)
{
@@ -83,10 +91,11 @@ rocprofiler_set_api_table(const char* name,
" did not contain rocprofiler_configure symbol" };
}
using hip_table_t = hip::HipApiTable;
using hsa_table_t = hsa::HsaApiTable;
using roctx_table_t = roctx::ROCTxApiTable;
using rccl_table_t = rccl::rcclApiFuncTable;
using hip_table_t = hip::HipApiTable;
using hsa_table_t = hsa::HsaApiTable;
using roctx_table_t = roctx::ROCTxApiTable;
using rccl_table_t = rccl::rcclApiFuncTable;
using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable;
auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP");
bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0);
@@ -121,6 +130,11 @@ rocprofiler_set_api_table(const char* name,
rccl_table_t* _table = static_cast<rccl_table_t*>(tables[0]);
_table->ncclGetVersion_fn = &rocprofiler::ncclGetVersion;
}
else if(std::string_view{ name } == "rocdecode")
{
rocdecode_table_t* _table = static_cast<rocdecode_table_t*>(tables[0]);
_table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder;
}
}
return 0;