From de3a874b1a9b2d5ba26f9728333f00e24695aa6d Mon Sep 17 00:00:00 2001 From: "Trowbridge, Ian" Date: Tue, 21 Jan 2025 21:00:43 -0600 Subject: [PATCH] Add rocDecode API support (#4) * Add rocDecode API support * Update CI * CI update: Sanitizers run on ubuntu 22.04 --------- Co-authored-by: Jonathan R. Madsen --- .github/workflows/continuous-integration.yml | 12 +- scripts/run-ci.py | 2 +- .../rocprofiler_register.cpp | 6 + tests/CMakeLists.txt | 1 + tests/common/fwd.h | 17 ++ tests/common/fwd.hpp | 38 +++-- tests/rocdecode/CMakeLists.txt | 34 ++++ tests/rocdecode/rocdecode.cpp | 149 ++++++++++++++++++ tests/rocdecode/rocdecode.hpp | 55 +++++++ tests/rocprofiler/rocprofiler.cpp | 22 ++- 10 files changed, 313 insertions(+), 23 deletions(-) create mode 100644 tests/rocdecode/CMakeLists.txt create mode 100644 tests/rocdecode/rocdecode.cpp create mode 100644 tests/rocdecode/rocdecode.hpp diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index f2a14a77c6..7c1220f87c 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -20,7 +20,7 @@ jobs: fail-fast: false matrix: runner: ['ubuntu-latest'] - compiler: ['clang-13', 'clang-14', 'clang-15', 'gcc-11', 'gcc-12'] + compiler: ['clang-14', 'clang-15', 'gcc-11', 'gcc-12'] ci-args: [''] ci-tag: [''] include: @@ -40,15 +40,19 @@ jobs: compiler: 'clang-10' ci-args: '' ci-tag: '' - - runner: 'ubuntu-latest' + - runner: 'ubuntu-22.04' + compiler: 'clang-13' + ci-args: '' + ci-tag: '' + - runner: 'ubuntu-22.04' compiler: 'gcc-12' ci-args: '--memcheck ThreadSanitizer' ci-tag: '-thread-sanitizer' - - runner: 'ubuntu-latest' + - runner: 'ubuntu-22.04' compiler: 'gcc-12' ci-args: '--memcheck AddressSanitizer' ci-tag: '-address-sanitizer' - - runner: 'ubuntu-latest' + - runner: 'ubuntu-22.04' compiler: 'gcc-12' ci-args: '--memcheck LeakSanitizer' ci-tag: '-leak-sanitizer' diff --git a/scripts/run-ci.py b/scripts/run-ci.py index c4d9b4eb13..7e2309fcf4 100755 --- a/scripts/run-ci.py +++ b/scripts/run-ci.py @@ -13,7 +13,7 @@ import multiprocessing # this constant is used to define CTEST_PROJECT_NAME # and default value for CTEST_SUBMIT_URL _PROJECT_NAME = "rocprofiler-register" -_BASE_URL = "10.194.116.31/cdash" +_BASE_URL = "cdash.rocprofiler.amd.com" def which(cmd, require): diff --git a/source/lib/rocprofiler-register/rocprofiler_register.cpp b/source/lib/rocprofiler-register/rocprofiler_register.cpp index 3cabb87ab0..619deba9b8 100644 --- a/source/lib/rocprofiler-register/rocprofiler_register.cpp +++ b/source/lib/rocprofiler-register/rocprofiler_register.cpp @@ -106,6 +106,7 @@ enum rocp_reg_supported_library // NOLINT(performance-enum-size) ROCP_REG_ROCTX, ROCP_REG_HIP_COMPILER, ROCP_REG_RCCL, + ROCP_REG_ROCDECODE, ROCP_REG_LAST, }; @@ -165,6 +166,11 @@ ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_RCCL, "rocprofiler_register_import_rccl", "librccl.so.[6-9]($|\\.[0-9\\.]+)") +ROCP_REG_DEFINE_LIBRARY_TRAITS(ROCP_REG_ROCDECODE, + "rocdecode", + "rocprofiler_register_import_rocdecode", + "librocdecode.so.[0-9]($|\\.[0-9\\.]+)") + ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_SUCCESS, "Success") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_NO_TOOLS, "rocprofiler-register found no tools") ROCP_REG_DEFINE_ERROR_MESSAGE(ROCP_REG_DEADLOCK, "rocprofiler-register deadlocked") diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 36aca52be9..3482ca7005 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -72,6 +72,7 @@ add_subdirectory(hsa-runtime) add_subdirectory(amdhip) add_subdirectory(roctx) add_subdirectory(rccl) +add_subdirectory(rocdecode) add_subdirectory(rocprofiler) # diff --git a/tests/common/fwd.h b/tests/common/fwd.h index 5d6dcb21c1..057b3e1b70 100644 --- a/tests/common/fwd.h +++ b/tests/common/fwd.h @@ -13,6 +13,7 @@ extern "C" { # pragma weak roctxRangePush # pragma weak roctxRangePop # pragma weak ncclGetVersion +# pragma weak rocDecCreateDecoder #endif extern void @@ -34,6 +35,22 @@ enum ncclResult_t extern ncclResult_t ncclGetVersion(int* version); +enum rocDecStatus +{ +}; + +enum rocDecDecoderHandle +{ +}; + +enum RocDecoderCreateInfo +{ +}; + +extern rocDecStatus +rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, + RocDecoderCreateInfo* decoder_create_info); + #ifdef __cplusplus } #endif diff --git a/tests/common/fwd.hpp b/tests/common/fwd.hpp index c9956568e2..91e30c35ef 100644 --- a/tests/common/fwd.hpp +++ b/tests/common/fwd.hpp @@ -16,19 +16,21 @@ namespace { -decltype(hip_init)* hip_init_fn = nullptr; -decltype(hsa_init)* hsa_init_fn = nullptr; -decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr; -decltype(roctxRangePush)* roctxRangePush_fn = nullptr; -decltype(roctxRangePush)* roctxRangePop_fn = nullptr; +decltype(hip_init)* hip_init_fn = nullptr; +decltype(hsa_init)* hsa_init_fn = nullptr; +decltype(ncclGetVersion)* ncclGetVersion_fn = nullptr; +decltype(roctxRangePush)* roctxRangePush_fn = nullptr; +decltype(roctxRangePush)* roctxRangePop_fn = nullptr; +decltype(rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr; enum rocp_reg_test_modes : uint8_t { - ROCP_REG_TEST_NONE = 0x0, - ROCP_REG_TEST_HIP = (1 << 0), - ROCP_REG_TEST_HSA = (1 << 1), - ROCP_REG_TEST_ROCTX = (1 << 2), - ROCP_REG_TEST_RCCL = (1 << 3), + ROCP_REG_TEST_NONE = 0x0, + ROCP_REG_TEST_HIP = (1 << 0), + ROCP_REG_TEST_HSA = (1 << 1), + ROCP_REG_TEST_ROCTX = (1 << 2), + ROCP_REG_TEST_RCCL = (1 << 3), + ROCP_REG_TEST_ROCDECODE = (1 << 4), }; template @@ -72,10 +74,11 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY) } }; - void* amdhip_handle = nullptr; - void* hsart_handle = nullptr; - void* roctx_handle = nullptr; - void* rccl_handle = nullptr; + void* amdhip_handle = nullptr; + void* hsart_handle = nullptr; + void* roctx_handle = nullptr; + void* rccl_handle = nullptr; + void* rocdecode_handle = nullptr; if constexpr((Idx & ROCP_REG_TEST_HIP) == ROCP_REG_TEST_HIP) { @@ -107,5 +110,12 @@ resolve_symbols(int _open_mode = RTLD_LOCAL | RTLD_LAZY) if(!ncclGetVersion_fn) _resolve_dlopen(rccl_handle, "librccl.so"); _resolve_dlsym(ncclGetVersion_fn, rccl_handle, "ncclGetVersion"); } + + if constexpr((Idx & ROCP_REG_TEST_ROCDECODE) == ROCP_REG_TEST_ROCDECODE) + { + rocDecCreateDecoder_fn = rocDecCreateDecoder; + if(!rocDecCreateDecoder_fn) _resolve_dlopen(rocdecode_handle, "librocdecode.so"); + _resolve_dlsym(rocDecCreateDecoder_fn, rocdecode_handle, "rocDecCreateDecoder"); + } } } // namespace diff --git a/tests/rocdecode/CMakeLists.txt b/tests/rocdecode/CMakeLists.txt new file mode 100644 index 0000000000..5f5e4112c7 --- /dev/null +++ b/tests/rocdecode/CMakeLists.txt @@ -0,0 +1,34 @@ +# +# +# + +if(NOT TARGET rocprofiler-register::rocprofiler-register) + # find_package(rocprofiler-register REQUIRED) +endif() + +add_library(rocdecode SHARED) +add_library(rocdecode::rocdecode ALIAS rocdecode) +target_sources(rocdecode PRIVATE rocdecode.cpp rocdecode.hpp) +target_include_directories(rocdecode PUBLIC $) +target_link_libraries(rocdecode PRIVATE rocprofiler-register::rocprofiler-register) +set_target_properties( + rocdecode + PROPERTIES OUTPUT_NAME rocdecode + SOVERSION 1 + VERSION 1.0) +rocp_register_strip_target(rocdecode) + +add_library(rocdecode-invalid SHARED) +add_library(rocdecode::rocdecode-invalid ALIAS rocdecode-invalid) +target_sources(rocdecode-invalid PRIVATE rocdecode.cpp rocdecode.hpp) +target_include_directories(rocdecode-invalid + PUBLIC $) +target_link_libraries(rocdecode-invalid + PRIVATE rocprofiler-register::rocprofiler-register) +set_target_properties( + rocdecode-invalid + PROPERTIES OUTPUT_NAME rocdecode + SOVERSION 1 + VERSION 1.0 + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/invalid) +rocp_register_strip_target(rocdecode-invalid) diff --git a/tests/rocdecode/rocdecode.cpp b/tests/rocdecode/rocdecode.cpp new file mode 100644 index 0000000000..3a99fe04ff --- /dev/null +++ b/tests/rocdecode/rocdecode.cpp @@ -0,0 +1,149 @@ +#include "rocdecode.hpp" + +#include + +#include +#include +#include +#include + +#define ROCP_REG_VERSION \ + ROCPROFILER_REGISTER_COMPUTE_VERSION_2(ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION, \ + ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION) + +ROCPROFILER_REGISTER_DEFINE_IMPORT(rocdecode, ROCP_REG_VERSION) + +#ifndef ROCP_REG_FILE_NAME +# define ROCP_REG_FILE_NAME \ + ::std::string{ __FILE__ } \ + .substr(::std::string_view{ __FILE__ }.find_last_of('/') + 1) \ + .c_str() +#endif + +namespace rocdecode +{ +namespace +{ +auto& +get_rocdecode_api_table_impl() +{ + static auto _table = std::atomic{ nullptr }; + return _table; +} + +void +register_profiler_impl() +{ + static auto _const_api_table = rocdecodeApiFuncTable{}; + initialize_rocdecode_api_table(&_const_api_table); + + // set this before any recursive opportunity arises + get_rocdecode_api_table_impl().exchange(&_const_api_table); + + // create a copy of the api table for modification by registration + static auto _profiler_api_table = rocdecodeApiFuncTable{}; + copy_rocdecode_api_table(&_profiler_api_table, &_const_api_table); + + void* _profiler_api_table_v = static_cast(&_profiler_api_table); + + auto lib_id = rocprofiler_register_library_indentifier_t{}; + auto success = rocprofiler_register_library_api_table( + "rocdecode", + &ROCPROFILER_REGISTER_IMPORT_FUNC(rocdecode), + ROCP_REG_VERSION, + &_profiler_api_table_v, + 1, + &lib_id); + + if(success == 0) + { + printf("[%s] rocdecode identifier %lu\n", ROCP_REG_FILE_NAME, lib_id.handle); + auto* _api_table = &_const_api_table; + if(!get_rocdecode_api_table_impl().compare_exchange_strong(_api_table, + &_profiler_api_table)) + { + // with the current impl, if we ever get here, someone is calling one the + // functions in this anonymous namespace that shouldn't + std::cerr + << "register_profiler_impl expected the API table to be the internal " + "implementation and yet it is not. something went wrong.\n"; + abort(); + } + } + else if(success != ROCP_REG_NO_TOOLS) + { + std::cerr << "rocdecode library failed to register with rocprofiler-register: " + << rocprofiler_register_error_string(success) << "\n"; + exit(EXIT_FAILURE); + } +} + +void +register_profiler() +{ + // this registration scheme is designed to minimize overhead once + // registered (only pay cost of checking atomic boolean) + // once the profiler is registered. If the library has not + // been registered and two or more threads try to register concurrently + // the first thread to acquire the lock below, will block the + // threads until registration is complete. However, + // if the same thread performing the registration re-enters this function + // i.e. this library's API is called during registration, this function + // will prevent a deadlock by not attempting to re-enter the + // the call-once and not releasing any waiting threads by flipping + // the _is_registered field to true. + static auto _is_registered = std::atomic{ false }; + + if(!_is_registered.load(std::memory_order_acquire)) + { + using mutex_t = std::recursive_mutex; + using auto_lock_t = std::unique_lock; + static auto _once = std::once_flag{}; + static auto _mutex = mutex_t{}; + + // defer the lock so we can check for recursion + auto _lk = auto_lock_t{ _mutex, std::defer_lock }; + + // this will be true if the same thread currently executing the call_once invokes + // the library's API while registering the profiler (e.g. tool which wants to + // instrument rocdecode API invokes a rocdecode function while registering with + // the profiler) we allow this thread to proceed and access the "const" API table + // but return so it does not flip _is_registered to true, which would result in + // any subsequent threads not waiting until the library is fully registered, + // resulting in missed callbacks for the tools + if(_lk.owns_lock()) return; + + // ensures any subsequent threads wait until the first thread + // finishes registration + _lk.lock(); + // call_once to ensure that we only register once + std::call_once(_once, register_profiler_impl); + // the first thread has completed registration and all + // threads waiting on lock will be released and this + // block will not be entered again + _is_registered.exchange(true, std::memory_order_release); + } +} +} // namespace + +rocdecodeApiFuncTable* +get_rocdecode_api_table() +{ + register_profiler(); + return get_rocdecode_api_table_impl().load(std::memory_order_relaxed); +} + +void +rocdecode_init() +{ + printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__); +} +} // namespace rocdecode + +extern "C" { +void +rocdecode_init(void) +{ + rocdecode::get_rocdecode_api_table()->rocDecCreateDecoder_fn({}, {}); +} +} diff --git a/tests/rocdecode/rocdecode.hpp b/tests/rocdecode/rocdecode.hpp new file mode 100644 index 0000000000..e1706e939d --- /dev/null +++ b/tests/rocdecode/rocdecode.hpp @@ -0,0 +1,55 @@ +#pragma once + +#define ROCDECODE_RUNTIME_API_TABLE_MAJOR_VERSION 0 +#define ROCDECODE_RUNTIME_API_TABLE_STEP_VERSION 1 + +#include +#include + +extern "C" { +// fake rccl function +enum rocDecStatus +{ +}; + +enum rocDecDecoderHandle +{ +}; + +enum RocDecoderCreateInfo +{ +}; + +rocDecStatus +rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, + RocDecoderCreateInfo* decoder_create_info) + __attribute__((visibility("default"))); +} + +namespace rocdecode +{ +struct rocdecodeApiFuncTable +{ + uint64_t size = 0; + decltype(::rocDecCreateDecoder)* rocDecCreateDecoder_fn = nullptr; +}; + +rocDecStatus +rocDecCreateDecoder(rocDecDecoderHandle* decoder_handle, + RocDecoderCreateInfo* decoder_create_info); + +// populates rocdecode api table with function pointers +inline void +initialize_rocdecode_api_table(rocdecodeApiFuncTable* dst) +{ + dst->size = sizeof(rocdecodeApiFuncTable); + dst->rocDecCreateDecoder_fn = &::rocdecode::rocDecCreateDecoder; +} + +// copies the api table from src to dst +inline void +copy_rocdecode_api_table(rocdecodeApiFuncTable* dst, const rocdecodeApiFuncTable* src) +{ + *dst = *src; +} +} // namespace rocdecode diff --git a/tests/rocprofiler/rocprofiler.cpp b/tests/rocprofiler/rocprofiler.cpp index 087c7ead30..63418badd6 100644 --- a/tests/rocprofiler/rocprofiler.cpp +++ b/tests/rocprofiler/rocprofiler.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,13 @@ ncclGetVersion(int*) return {}; } +rocDecStatus +rocDecCreateDecoder(rocDecDecoderHandle*, RocDecoderCreateInfo*) +{ + printf("[%s] %s\n", ROCP_REG_FILE_NAME, __FUNCTION__); + return {}; +} + void roctx_range_push(const char* name) { @@ -83,10 +91,11 @@ rocprofiler_set_api_table(const char* name, " did not contain rocprofiler_configure symbol" }; } - using hip_table_t = hip::HipApiTable; - using hsa_table_t = hsa::HsaApiTable; - using roctx_table_t = roctx::ROCTxApiTable; - using rccl_table_t = rccl::rcclApiFuncTable; + using hip_table_t = hip::HipApiTable; + using hsa_table_t = hsa::HsaApiTable; + using roctx_table_t = roctx::ROCTxApiTable; + using rccl_table_t = rccl::rcclApiFuncTable; + using rocdecode_table_t = rocdecode::rocdecodeApiFuncTable; auto* _wrap_v = std::getenv("ROCP_REG_TEST_WRAP"); bool _wrap = (_wrap_v != nullptr && std::stoi(_wrap_v) != 0); @@ -121,6 +130,11 @@ rocprofiler_set_api_table(const char* name, rccl_table_t* _table = static_cast(tables[0]); _table->ncclGetVersion_fn = &rocprofiler::ncclGetVersion; } + else if(std::string_view{ name } == "rocdecode") + { + rocdecode_table_t* _table = static_cast(tables[0]); + _table->rocDecCreateDecoder_fn = &rocprofiler::rocDecCreateDecoder; + } } return 0;