Add rocDecode API support (#4)
* Add rocDecode API support * Update CI * CI update: Sanitizers run on ubuntu 22.04 --------- Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
This commit is contained in:
@@ -20,7 +20,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: ['ubuntu-latest']
|
||||
compiler: ['clang-13', 'clang-14', 'clang-15', 'gcc-11', 'gcc-12']
|
||||
compiler: ['clang-14', 'clang-15', 'gcc-11', 'gcc-12']
|
||||
ci-args: ['']
|
||||
ci-tag: ['']
|
||||
include:
|
||||
@@ -40,15 +40,19 @@ jobs:
|
||||
compiler: 'clang-10'
|
||||
ci-args: ''
|
||||
ci-tag: ''
|
||||
- runner: 'ubuntu-latest'
|
||||
- runner: 'ubuntu-22.04'
|
||||
compiler: 'clang-13'
|
||||
ci-args: ''
|
||||
ci-tag: ''
|
||||
- runner: 'ubuntu-22.04'
|
||||
compiler: 'gcc-12'
|
||||
ci-args: '--memcheck ThreadSanitizer'
|
||||
ci-tag: '-thread-sanitizer'
|
||||
- runner: 'ubuntu-latest'
|
||||
- runner: 'ubuntu-22.04'
|
||||
compiler: 'gcc-12'
|
||||
ci-args: '--memcheck AddressSanitizer'
|
||||
ci-tag: '-address-sanitizer'
|
||||
- runner: 'ubuntu-latest'
|
||||
- runner: 'ubuntu-22.04'
|
||||
compiler: 'gcc-12'
|
||||
ci-args: '--memcheck LeakSanitizer'
|
||||
ci-tag: '-leak-sanitizer'
|
||||
|
||||
@@ -13,7 +13,7 @@ import multiprocessing
|
||||
# this constant is used to define CTEST_PROJECT_NAME
|
||||
# and default value for CTEST_SUBMIT_URL
|
||||
_PROJECT_NAME = "rocprofiler-register"
|
||||
_BASE_URL = "10.194.116.31/cdash"
|
||||
_BASE_URL = "cdash.rocprofiler.amd.com"
|
||||
|
||||
|
||||
def which(cmd, require):
|
||||
|
||||
@@ -106,6 +106,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size)
|
||||
ROCP_REG_ROCTX,
|
||||
ROCP_REG_HIP_COMPILER,
|
||||
ROCP_REG_RCCL,
|
||||
ROCP_REG_ROCDECODE,
|
||||
ROCP_REG_LAST,
|
||||
};
|
||||
|
||||
@@ -165,6 +166,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_RCCL,
|
||||
"rocprofiler_register_import_rccl",
|
||||
"librccl.so.[6-9]($|\\.[0-9\\.]+)")
|
||||
|
||||
ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE,
|
||||
"rocdecode",
|
||||
"rocprofiler_register_import_rocdecode",
|
||||
"librocdecode.so.[0-9]($|\\.[0-9\\.]+)")
|
||||
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success")
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools")
|
||||
ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked")
|
||||
|
||||
@@ -72,6 +72,7 @@ add_subdirectory(hsa-runtime)
|
||||
add_subdirectory(amdhip)
|
||||
add_subdirectory(roctx)
|
||||
add_subdirectory(rccl)
|
||||
add_subdirectory(rocdecode)
|
||||
add_subdirectory(rocprofiler)
|
||||
|
||||
#
|
||||
|
||||
@@ -13,6 +13,7 @@ extern "C" {
|
||||
# pragma weak roctxRangePush
|
||||
# pragma weak roctxRangePop
|
||||
# pragma weak ncclGetVersion
|
||||
# pragma weak rocDecCreateDecoder
|
||||
#endif
|
||||
|
||||
extern void
|
||||
@@ -34,6 +35,22 @@ enum ncclResult_t
|
||||
extern ncclResult_t
|
||||
ncclGetVersion(int* version);
|
||||
|
||||
enum rocDecStatus
|
||||
{
|
||||
};
|
||||
|
||||
enum rocDecDecoderHandle
|
||||
{
|
||||
};
|
||||
|
||||
enum RocDecoderCreateInfo
|
||||
{
|
||||
};
|
||||
|
||||
extern rocDecStatus
|
||||
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
|
||||
RocDecoderCreateInfo* decoder_create_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
+24
-14
@@ -16,19 +16,21 @@
|
||||
|
||||
namespace
|
||||
{
|
||||
decltype(hip_init)* hip_init_fn = nullptr;
|
||||
decltype(hsa_init)* hsa_init_fn = nullptr;
|
||||
decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
|
||||
decltype(hip_init)* hip_init_fn = nullptr;
|
||||
decltype(hsa_init)* hsa_init_fn = nullptr;
|
||||
decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePush_fn = nullptr;
|
||||
decltype(roctxRangePush)* roctxRangePop_fn = nullptr;
|
||||
decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
|
||||
|
||||
enum rocp_reg_test_modes : uint8_t
|
||||
{
|
||||
ROCP_REG_TEST_NONE = 0x0,
|
||||
ROCP_REG_TEST_HIP = (1 << 0),
|
||||
ROCP_REG_TEST_HSA = (1 << 1),
|
||||
ROCP_REG_TEST_ROCTX = (1 << 2),
|
||||
ROCP_REG_TEST_RCCL = (1 << 3),
|
||||
ROCP_REG_TEST_NONE = 0x0,
|
||||
ROCP_REG_TEST_HIP = (1 << 0),
|
||||
ROCP_REG_TEST_HSA = (1 << 1),
|
||||
ROCP_REG_TEST_ROCTX = (1 << 2),
|
||||
ROCP_REG_TEST_RCCL = (1 << 3),
|
||||
ROCP_REG_TEST_ROCDECODE = (1 << 4),
|
||||
};
|
||||
|
||||
template <uint8_t Idx = ROCP_REG_TEST_NONE>
|
||||
@@ -72,10 +74,11 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
|
||||
}
|
||||
};
|
||||
|
||||
void* amdhip_handle = nullptr;
|
||||
void* hsart_handle = nullptr;
|
||||
void* roctx_handle = nullptr;
|
||||
void* rccl_handle = nullptr;
|
||||
void* amdhip_handle = nullptr;
|
||||
void* hsart_handle = nullptr;
|
||||
void* roctx_handle = nullptr;
|
||||
void* rccl_handle = nullptr;
|
||||
void* rocdecode_handle = nullptr;
|
||||
|
||||
if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP)
|
||||
{
|
||||
@@ -107,5 +110,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY)
|
||||
if(!ncclGetVersion_fn) _resolve_dlopen(rccl_handle, "librccl.so");
|
||||
_resolve_dlsym(ncclGetVersion_fn, rccl_handle, "ncclGetVersion");
|
||||
}
|
||||
|
||||
if constexpr((Idx & ROCP_REG_TEST_ROCDECODE) == ROCP_REG_TEST_ROCDECODE)
|
||||
{
|
||||
rocDecCreateDecoder_fn = rocDecCreateDecoder;
|
||||
if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so");
|
||||
_resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder");
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
if(NOT TARGET rocprofiler-register::rocprofiler-register)
|
||||
# find_package(rocprofiler-register REQUIRED)
|
||||
endif()
|
||||
|
||||
add_library(rocdecode SHARED)
|
||||
add_library(rocdecode::rocdecode ALIAS rocdecode)
|
||||
target_sources(rocdecode PRIVATE rocdecode.cpp rocdecode.hpp)
|
||||
target_include_directories(rocdecode PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
|
||||
target_link_libraries(rocdecode PRIVATE rocprofiler-register::rocprofiler-register)
|
||||
set_target_properties(
|
||||
rocdecode
|
||||
PROPERTIES OUTPUT_NAME rocdecode
|
||||
SOVERSION 1
|
||||
VERSION 1.0)
|
||||
rocp_register_strip_target(rocdecode)
|
||||
|
||||
add_library(rocdecode-invalid SHARED)
|
||||
add_library(rocdecode::rocdecode-invalid ALIAS rocdecode-invalid)
|
||||
target_sources(rocdecode-invalid PRIVATE rocdecode.cpp rocdecode.hpp)
|
||||
target_include_directories(rocdecode-invalid
|
||||
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}>)
|
||||
target_link_libraries(rocdecode-invalid
|
||||
PRIVATE rocprofiler-register::rocprofiler-register)
|
||||
set_target_properties(
|
||||
rocdecode-invalid
|
||||
PROPERTIES OUTPUT_NAME rocdecode
|
||||
SOVERSION 1
|
||||
VERSION 1.0
|
||||
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid)
|
||||
rocp_register_strip_target(rocdecode-invalid)
|
||||
@@ -0,0 +1,149 @@
|
||||
#include "rocdecode.hpp"
|
||||
|
||||
#include <rocprofiler-register/rocprofiler-register.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <string_view>
|
||||
|
||||
#define ROCP_REG_VERSION \
|
||||
ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \
|
||||
ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION)
|
||||
|
||||
ROCPROFILER_REGISTER_DEFINE_IMPORT(rocdecode, ROCP_REG_VERSION)
|
||||
|
||||
#ifndef ROCP_REG_FILE_NAME
|
||||
# define ROCP_REG_FILE_NAME \
|
||||
::std::string{ __FILE__ } \
|
||||
.substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \
|
||||
.c_str()
|
||||
#endif
|
||||
|
||||
namespace rocdecode
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto&
|
||||
get_rocdecode_api_table_impl()
|
||||
{
|
||||
static auto _table = std::atomic<rocdecodeApiFuncTable*>{ nullptr };
|
||||
return _table;
|
||||
}
|
||||
|
||||
void
|
||||
register_profiler_impl()
|
||||
{
|
||||
static auto _const_api_table = rocdecodeApiFuncTable{};
|
||||
initialize_rocdecode_api_table(&_const_api_table);
|
||||
|
||||
// set this before any recursive opportunity arises
|
||||
get_rocdecode_api_table_impl().exchange(&_const_api_table);
|
||||
|
||||
// create a copy of the api table for modification by registration
|
||||
static auto _profiler_api_table = rocdecodeApiFuncTable{};
|
||||
copy_rocdecode_api_table(&_profiler_api_table, &_const_api_table);
|
||||
|
||||
void* _profiler_api_table_v = static_cast<void*>(&_profiler_api_table);
|
||||
|
||||
auto lib_id = rocprofiler_register_library_indentifier_t{};
|
||||
auto success = rocprofiler_register_library_api_table(
|
||||
"rocdecode",
|
||||
&ROCPROFILER_REGISTER_IMPORT_FUNC(rocdecode),
|
||||
ROCP_REG_VERSION,
|
||||
&_profiler_api_table_v,
|
||||
1,
|
||||
&lib_id);
|
||||
|
||||
if(success == 0)
|
||||
{
|
||||
printf("[%s] rocdecode identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle);
|
||||
auto* _api_table = &_const_api_table;
|
||||
if(!get_rocdecode_api_table_impl().compare_exchange_strong(_api_table,
|
||||
&_profiler_api_table))
|
||||
{
|
||||
// with the current impl, if we ever get here, someone is calling one the
|
||||
// functions in this anonymous namespace that shouldn't
|
||||
std::cerr
|
||||
<< "register_profiler_impl expected the API table to be the internal "
|
||||
"implementation and yet it is not. something went wrong.\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
else if(success != ROCP_REG_NO_TOOLS)
|
||||
{
|
||||
std::cerr << "rocdecode library failed to register with rocprofiler-register: "
|
||||
<< rocprofiler_register_error_string(success) << "\n";
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
register_profiler()
|
||||
{
|
||||
// this registration scheme is designed to minimize overhead once
|
||||
// registered (only pay cost of checking atomic boolean)
|
||||
// once the profiler is registered. If the library has not
|
||||
// been registered and two or more threads try to register concurrently
|
||||
// the first thread to acquire the lock below, will block the
|
||||
// threads until registration is complete. However,
|
||||
// if the same thread performing the registration re-enters this function
|
||||
// i.e. this library's API is called during registration, this function
|
||||
// will prevent a deadlock by not attempting to re-enter the
|
||||
// the call-once and not releasing any waiting threads by flipping
|
||||
// the _is_registered field to true.
|
||||
static auto _is_registered = std::atomic<bool>{ false };
|
||||
|
||||
if(!_is_registered.load(std::memory_order_acquire))
|
||||
{
|
||||
using mutex_t = std::recursive_mutex;
|
||||
using auto_lock_t = std::unique_lock<mutex_t>;
|
||||
static auto _once = std::once_flag{};
|
||||
static auto _mutex = mutex_t{};
|
||||
|
||||
// defer the lock so we can check for recursion
|
||||
auto _lk = auto_lock_t{ _mutex, std::defer_lock };
|
||||
|
||||
// this will be true if the same thread currently executing the call_once invokes
|
||||
// the library's API while registering the profiler (e.g. tool which wants to
|
||||
// instrument rocdecode API invokes a rocdecode function while registering with
|
||||
// the profiler) we allow this thread to proceed and access the "const" API table
|
||||
// but return so it does not flip _is_registered to true, which would result in
|
||||
// any subsequent threads not waiting until the library is fully registered,
|
||||
// resulting in missed callbacks for the tools
|
||||
if(_lk.owns_lock()) return;
|
||||
|
||||
// ensures any subsequent threads wait until the first thread
|
||||
// finishes registration
|
||||
_lk.lock();
|
||||
// call_once to ensure that we only register once
|
||||
std::call_once(_once, register_profiler_impl);
|
||||
// the first thread has completed registration and all
|
||||
// threads waiting on lock will be released and this
|
||||
// block will not be entered again
|
||||
_is_registered.exchange(true, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
rocdecodeApiFuncTable*
|
||||
get_rocdecode_api_table()
|
||||
{
|
||||
register_profiler();
|
||||
return get_rocdecode_api_table_impl().load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void
|
||||
rocdecode_init()
|
||||
{
|
||||
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
|
||||
}
|
||||
} // namespace rocdecode
|
||||
|
||||
extern "C" {
|
||||
void
|
||||
rocdecode_init(void)
|
||||
{
|
||||
rocdecode::get_rocdecode_api_table()->rocDecCreateDecoder_fn({}, {});
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
#pragma once
|
||||
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0
|
||||
#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
// fake rccl function
|
||||
enum rocDecStatus
|
||||
{
|
||||
};
|
||||
|
||||
enum rocDecDecoderHandle
|
||||
{
|
||||
};
|
||||
|
||||
enum RocDecoderCreateInfo
|
||||
{
|
||||
};
|
||||
|
||||
rocDecStatus
|
||||
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
|
||||
RocDecoderCreateInfo* decoder_create_info)
|
||||
__attribute__((visibility("default")));
|
||||
}
|
||||
|
||||
namespace rocdecode
|
||||
{
|
||||
struct rocdecodeApiFuncTable
|
||||
{
|
||||
uint64_t size = 0;
|
||||
decltype(::rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr;
|
||||
};
|
||||
|
||||
rocDecStatus
|
||||
rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle,
|
||||
RocDecoderCreateInfo* decoder_create_info);
|
||||
|
||||
// populates rocdecode api table with function pointers
|
||||
inline void
|
||||
initialize_rocdecode_api_table(rocdecodeApiFuncTable* dst)
|
||||
{
|
||||
dst->size = sizeof(rocdecodeApiFuncTable);
|
||||
dst->rocDecCreateDecoder_fn = &::rocdecode::rocDecCreateDecoder;
|
||||
}
|
||||
|
||||
// copies the api table from src to dst
|
||||
inline void
|
||||
copy_rocdecode_api_table(rocdecodeApiFuncTable* dst, const rocdecodeApiFuncTable* src)
|
||||
{
|
||||
*dst = *src;
|
||||
}
|
||||
} // namespace rocdecode
|
||||
@@ -2,6 +2,7 @@
|
||||
#include <amdhip/amdhip.hpp>
|
||||
#include <hsa-runtime/hsa-runtime.hpp>
|
||||
#include <rccl/rccl.hpp>
|
||||
#include <rocdecode/rocdecode.hpp>
|
||||
#include <roctx/roctx.hpp>
|
||||
|
||||
#include <dlfcn.h>
|
||||
@@ -37,6 +38,13 @@ ncclGetVersion(int*)
|
||||
return {};
|
||||
}
|
||||
|
||||
rocDecStatus
|
||||
rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*)
|
||||
{
|
||||
printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__);
|
||||
return {};
|
||||
}
|
||||
|
||||
void
|
||||
roctx_range_push(const char* name)
|
||||
{
|
||||
@@ -83,10 +91,11 @@ rocprofiler_set_api_table(const char* name,
|
||||
" did not contain rocprofiler_configure symbol" };
|
||||
}
|
||||
|
||||
using hip_table_t = hip::HipApiTable;
|
||||
using hsa_table_t = hsa::HsaApiTable;
|
||||
using roctx_table_t = roctx::ROCTxApiTable;
|
||||
using rccl_table_t = rccl::rcclApiFuncTable;
|
||||
using hip_table_t = hip::HipApiTable;
|
||||
using hsa_table_t = hsa::HsaApiTable;
|
||||
using roctx_table_t = roctx::ROCTxApiTable;
|
||||
using rccl_table_t = rccl::rcclApiFuncTable;
|
||||
using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable;
|
||||
|
||||
auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP");
|
||||
bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0);
|
||||
@@ -121,6 +130,11 @@ rocprofiler_set_api_table(const char* name,
|
||||
rccl_table_t* _table = static_cast<rccl_table_t*>(tables[0]);
|
||||
_table->ncclGetVersion_fn = &rocprofiler::ncclGetVersion;
|
||||
}
|
||||
else if(std::string_view{ name } == "rocdecode")
|
||||
{
|
||||
rocdecode_table_t* _table = static_cast<rocdecode_table_t*>(tables[0]);
|
||||
_table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
مرجع در شماره جدید
Block a user