d3eaacd610
* Update scripts/update-doxygen.sh
- ensure build-docs folder exists
* Update scripts/run-ci.py
- exclude files in details subdirectory from code coverage
* Update scripts/thread-sanitizer-suppr.txt
- exclude races in glog
* Update docs/rocprofiler.dox.in
- exclude defines in include/rocprofiler/defines.h from doxygen
- Tweak EXCLUDE_PATTERNS and EXAMPLE_PATTERNS
* Update docs workflow
- trigger workflow whenever there is a change to the public headers (which may be doxygen comments)
* Update include/rocprofiler (reorg and overhaul)
- rocprofiler_status_t additions
- CONTEXT_NOT_FOUND
- CONTEXT_ERROR
- INVALID_CONTEXT_ID
- INVALID_CONTEXT
- BUFFER_BUSY
- rocprofiler_context_is_active func
- rocprofiler_context_is_valid func
- rocprofiler_service_callback_tracing_kind_t update
- remove ROCPROFILER_SERVICE_CALLBACK_TRACING_HELPER_THREAD
- Remove rocprofiler_tracing_helper_thread_operation_t
- Remove rocprofiler_helper_thread_callback_tracer_data_t
- Added rocprofiler_internal_thread_library_t
- Added rocprofiler_at_internal_thread_create
- split rocprofiler.h into several smaller headers
- reworked rocprofiler_status_t values
- added doxygen comments for enums
- replaced rocprofiler_trace_record_operation_kind_t with rocprofiler_trace_operation_t
- use @ instead of / in doxygen comment in rocprofiler_plugin.h
- fix ref to ROCPROFILER_SERVICE_CALLBACK_TRACING_MARKER_API
- end group in fwd.h
- remove PROFILE_COUNTING group in dispatch_profile.h
- remove premature group close in callback_tracing.h
- hsa.h: remove rocprofiler_hsa_trace_data_t
- fwd.h: remove rocprofiler_tracer_callback_data_t
- rename rocprofiler_correlation_id_t.handle to rocprofiler_correlation_id_t.id (consistency)
- fwd.h: add rocprofiler_callback_tracing_record_t
- callback_tracing.h: update rocprofiler_hsa_api_callback_tracer_data_t
- callback_tracing.h: add size fields
- simplify rocprofiler_tracer_callback_t
- removed ROCPROFILER_NONNULL from rocprofiler_get_version
- added rocprofiler_get_timestamp
- ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED in rocprofiler_status_t
- add ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND rocprofiler_status_t
- add rocprofiler_buffer_category_t
- rocprofiler_trace_operation_t -> rocprofiler_tracing_operation_t
- rocprofiler_user_data_t union
- tweak rocprofiler_callback_tracing_record_t
- make external_correlation_id non-pointer
- add rocprofiler_user_data_t data field
- tweak rocprofiler_record_header_t
- instead of single uint64_t kind field, have union for category + kind (two u32) with u64 hash
- API extensions for kind id <-> kind string
- API extensions for operation id <-> operation string
- rocprofiler_callback_trace_kind_name_cb_t
- rocprofiler_callback_trace_operation_name_cb_t
- rocprofiler_iterate_callback_trace_kind_names
- rocprofiler_iterate_callback_trace_kind_operation_names
- modify rocprofiler_hsa_api_callback_tracer_data_t data members (remove pointers)
- add rocprofiler_callback_trace_operation_args_cb_t function pointer typedef
- add rocprofiler_iterate_callback_trace_operation_args function
- fixed inconsistent use of *_trace_* vs. *_tracing_* (opting for tracing)
- removed rocprofiler_query_callback_trace_kind_name
- removed rocprofiler_query_callback_kind_operation_name
- Add include/rocprofiler/registration.h
- header dedicated to registering a tool/client with rocprofiler
- this header is not intended to be included by rocprofiler.h
- rocprofiler_client_id_t
- identifier for client tool
- rocprofiler_client_finalize_t
- function pointer prototype for tool-initiated finalization
- rocprofiler_tool_initialize_t
- function pointer prototype for tool initialization (i.e. configuration)
- rocprofiler_tool_finalize_t
- function pointer prototype for tool finalization
- rocprofiler_tool_configure_result_t
- struct returned by tool/client to rocprofiler
- rocprofiler_is_initialized
- function for querying whether tool-induced initialization is possible
- rocprofiler_is_finalized
- function for querying whether rocprofiler has been finalized
- rocprofiler_configure prototype
- this is the function tools implement
- prototype is always marked as having default visibility
- no implementation in rocprofiler
- added typedef for rocprofiler_configure function pointer
- added rocprofiler_force_configure to explicitly invoke rocprofiler_configure instead of relying on lazy init
- made callback typedef names more consistent (_cb_t suffix)
- typedef for rocprofiler_internal_thread_library_cb_t function pointer
- added rocprofiler_at_internal_thread_create function
- added rocprofiler_callback_thread_t struct
- added rocprofiler_create_callback_thread function
- added rocprofiler_assign_callback_thread function
- removed rocprofiler_buffer_tracing_record_header_t in favor of kind and correlation id in each record type
- added rocprofiler_buffer_tracing_kind_name_cb_t typedef
- added rocprofiler_buffer_tracing_operation_name_cb_t typedef
- added rocprofiler_iterate_buffer_tracing_kind_names function
- added rocprofiler_iterate_buffer_tracing_kind_operation_names function
- removed rocprofiler_query_buffer_trace_kind_name function
- removed rocprofiler_query_buffer_kind_operation_name function
* Update lib/common/container/stable_vector.hpp
- include limits header
- reserve_size struct
- overload stable_vector constructor to support reserving as part of construction
* Update lib/common/container/record_header_buffer.{hpp,cpp}
- add emplace member function accepting category and kind (two u32 variables) instead of one u64 kind
- use std::shared_mutex to prevent data-race when reading m_headers
- record_header_buffer is now multiple writer, single reader
- add read_lock member function (shared)
- add read_unlock member function (shared)
- lock member function gets exclusive lock
- unlock member function releases exclusive lock
* Rename "config" to "context" + restructure + implement
- Restructure config files + license
- move config files into lib/rocprofiler/config subfolder
- rename some files
- add license to some files which were missing it
- Rename config/helpers.hpp
- rename to allocator.hpp
- remove get_domain_max_ops
- Create config/domain.{hpp,cpp}
- structures for handling tracing domains and ops
- Update config/config.{hpp,cpp}
- buffer_instance struct
- callback_tracing_service struct
- buffer_tracing_service struct
- config struct
- allocate_{config,buffer} func
- {validate,start,stop}_config funcs
- get_registered_configs func
- get_active_configs func
- get_buffers func
- Update rocprofiler.cpp
- Implement rocprofiler_create_context
- Implement rocprofiler_start_context
- Implement rocprofiler_stop_context
- Implement rocprofiler_context_is_active
- Implement rocprofiler_context_is_valid
- Implement rocprofiler_flush_buffer
- Implement rocprofiler_destroy_buffer
- Implement rocprofiler_create_buffer
- Update lib/rocprofiler/hsa
- use rocprofiler_tracer_activity_domain_t instead of rocprofiler_tracer_activity_domain_t
- remove ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API fromHSA_API_INFO_DEFINITION_* macros
- Update lib/rocprofiler/context/domain.*
- fixes for domain_info (i.e. use correct enums)
- update rocprofiler_status_t codes
- fix template instantiations
- Update lib/rocprofiler/context/context.*
- use rocprofiler_service_callback_tracing_kind_t instead of rocprofiler_tracer_activity_domain_t
- rename correlation_context to correlation_tracing_service
- fix domains in callback_tracing_service and buffer_tracing_service
- unique_ptr for callback_tracer and buffered_tracer in context
- Update lib/rocprofiler/rocprofiler.cpp
- implement rocprofiler_configure_callback_tracing_service
- Update lib/rocprofiler/hsa/ostream.hpp
- include rocprofiler.h instead of tracer.hpp
- Update lib/rocprofiler/hsa
- migration to use rocprofiler_hsa_api_callback_tracer_data_t instead of rocprofiler_hsa_trace_data_t
- restructure hsa_api_impl<Idx>
- remove phase_enter and phase_exit
- add set_data_args (partial replacement for phase_enter)
- functor handles the contexts
- Update lib/rocprofiler/rocprofiler.cpp
- implement rocprofiler_get_version
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
- remove hsa_api_ prefix for functions already in hsa namespace
- Update lib/rocprofiler/context/context.{hpp,cpp}
- add client_idx to context struct (tool identifier)
- add push_client function to set client_idx before context is allocated
- add pop_client function to remove client identifier from future context creations
- implemented {registered,active}_contexts and buffers to use new container::reserve_size overload to stable_vector
- fix implementation of start_context
- fix implementation of stop_context
- Update lib/rocprofiler/rocprofiler.cpp
- prevent context creation, buffer creation, pc sampling config, etc. after initialization
- add nullptr checks to rocprofiler_context_is_valid
- fix rocprofiler_configure_callback_tracing_service
- was checking size of buffers, not registered context
- implement rocprofiler_iterate_callback_trace_kind_names
- implement rocprofiler_iterate_callback_trace_kind_operation_names
- Update lib/rocprofiler/CMakeLists.txt
- add registration.{hpp,cpp} to rocprofiler-library target sources
- Update lib/rocprofiler/hsa/utils.hpp
- fix using fmt::formt with const char* strings
- remove join functions (no longer used)
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
- remove args_string function
- remove named_args_string function
- update iterate_args function
- change callback type
- accept user data
- rework the hsa_api_impl<Idx>::functor function
- save the rocprofiler_callback_tracing_record_t between callbacks
- update update_table function
- check buffered_tracer domains
- remove comments
- Update lib/rocprofiler/hsa/defines.hpp
- remove MEMBER_<N> macros
- add ADDR_MEMBER_<N> macros
- remove doxygen comments for GET_MEMBER_FIELDS
- add GET_ADDR_MEMBER_FIELDS
- update HSA_API_INFO_DEFINITION_{0,V}
- rename domain_idx to callback_domain_idx
- add buffered_domain_idx
- add as_arg_addr function
- Update lib/rocprofiler/rocprofiler.cpp
- implement rocprofiler_iterate_callback_trace_operation_args
- Remove lib/rocprofiler/tracing.{hpp,cpp} and lib/rocprofiler/CMakeLists.txt
- unused
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
- support buffered tracing in hsa_api_impl<Idx>::functor
- rocprofiler_callback_trace_operation_args_cb_t -> rocprofiler_callback_tracing_operation_args_cb_t
- i.e. trace -> tracing
- Update lib/rocprofiler/context/context.{hpp,cpp}
- removed buffer_instance struct
- removed allocate_buffer function
- removed get_buffers function
- changed buffer_tracing_service::buffer_array_t
- Update lib/rocprofiler/hsa: hsa.cpp, ostream.hpp, details folder
- move ostream.hpp into details folder to prevent from contributing to code coverage
- update cmake build system for new directory
* Add lib/rocprofiler/registration.{hpp,cpp}
- implements rocprofiler_set_api_table (called by rocprofiler-register)
- miscellaneous functions for client configure/initialize/finalize
- functions for querying the init/fini status
- relocated OnLoad HSA workaround to this file
- at present, this is used to workaround ROCr not having rocprofiler-register integration yet
- implement rocprofiler_force_configure function
- implement rocprofiler_is_initialized function
- implement rocprofiler_is_finalized function
- ensure configure functions only invoked once
- ensure internal thread creation notification functions are invoked
- get_status is pair of atomics
- fix heap-use-after-free in init_logging
- update finalize
- invoke hsa_shut_down
- set all active contexts to null pointers
* Add lib/rocprofiler/buffer_tracing.cpp
- contains implementations of buffer_tracing (i.e. rocprofiler/buffer_tracing.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp
* Add lib/rocprofiler/buffer.{hpp,cpp}
- contains implementations of buffer (i.e. rocprofiler/buffer.h) and misc internal access functions
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp and lib/rocprofiler/context/context.{hpp,cpp}
* Add lib/rocprofiler/callback_tracing.cpp
- contains implementations of callback_tracing (i.e. rocprofiler/callback_tracing.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp
* Add lib/rocprofiler/context.cpp
- contains implementations of context public API functions (i.e. rocprofiler/context.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp
* Add lib/rocprofiler/internal_threading.{hpp,cpp}
- contains implementations of internal_threading (i.e. rocprofiler/internal_threading.h)
- also contains implementations of internal access functions
- update finalize function
- join all task groups and destroy all thread pools first, then reset unique_ptr
* Update lib/rocprofiler/rocprofiler.cpp
- rocprofiler_get_version returns status
- implement rocprofiler_get_timestamp
- remove misc implementations that were split into other files
* Update lib/rocprofiler/CMakeLists.txt
- compile new implementation files
- buffer.cpp
- buffer_tracing.cpp
- callback_tracing.cpp
- context.cpp
- internal_threading.cpp
* Update lib/tests/buffering/buffering-*.cpp
- update to reflect changes to rocprofiler_record_header_t
* Update CMakeLists.txt
- increase minimum cmake version to 3.21 which added HIP support as a language
* Add samples/apps/transpose
- simple HIP application for testing
* Add samples/api_callback_tracing
- HIP application and tool library
- This effectively demos how to setup HSA API tracing
- For each function called in tool, it stores the func/file/line and prints it during finalization
- client.hpp and client.cpp are the tool library
- Implement use of rocprofiler_iterate_callback_trace_operation_args
- add demo of using rocprofiler_get_version
- add_test
- remove PASS_REGULAR_EXPRESSION
- causing false passes during memcheck
- add ROCPROFILER_MEMCHECK_PRELOAD_ENV to environment
- check if rocprofiler is initialized before stopping context
* Add samples/api_buffered_tracing
- Sample demonstrating tracing the HSA API via buffering
- demo rocprofiler_record_header_compute_hash
- throw exceptions for unexpected buffer data
- add_test
- remove PASS_REGULAR_EXPRESSION
- causing false passes during memcheck
- add ROCPROFILER_MEMCHECK_PRELOAD_ENV to environment
* Update samples/CMakeLists.txt
- add subdirectory for api_callback_tracing
- add subdirectory api_buffered_tracing
* Update samples/pc_sampling/common.h
- fix processing of headers
* Update lib/rocprofiler/hsa/details/ostream.hpp
- fix data race on HSA_depth_max_cnt and recursion
- HSA_depth_max_cnt and recursion is now thread-local static instead of global static
- replace std::string usage with std::string_view
* Actions update
- add dependabot.yml
- use actions/checkout@v4
- install latest libasan and libtsan in sanitizer containers
* Add PTL (Parallel Tasking Library) submodule
557 rivejä
16 KiB
C++
557 rivejä
16 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2023 ROCm Developer Tools
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "lib/rocprofiler/registration.hpp"
|
|
#include "lib/rocprofiler/context/context.hpp"
|
|
#include "lib/rocprofiler/hsa/hsa.hpp"
|
|
#include "lib/rocprofiler/internal_threading.hpp"
|
|
|
|
#include <rocprofiler/context.h>
|
|
#include <rocprofiler/fwd.h>
|
|
#include <rocprofiler/hsa.h>
|
|
#include <rocprofiler/version.h>
|
|
|
|
#include <fmt/format.h>
|
|
#include <glog/logging.h>
|
|
|
|
#include <dlfcn.h>
|
|
#include <link.h>
|
|
#include <unistd.h>
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <thread>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
extern "C" {
|
|
#pragma weak rocprofiler_configure
|
|
|
|
extern rocprofiler_tool_configure_result_t*
|
|
rocprofiler_configure(uint32_t, const char*, uint32_t, rocprofiler_client_id_t*);
|
|
}
|
|
|
|
namespace rocprofiler
|
|
{
|
|
namespace registration
|
|
{
|
|
namespace
|
|
{
|
|
auto&
|
|
get_status()
|
|
{
|
|
static auto _v = std::pair<std::atomic<int>, std::atomic<int>>{0, 0};
|
|
return _v;
|
|
}
|
|
|
|
auto&
|
|
get_invoked_configures()
|
|
{
|
|
static auto _v = std::unordered_set<rocprofiler_configure_func_t>{};
|
|
return _v;
|
|
}
|
|
|
|
auto&
|
|
get_forced_configure()
|
|
{
|
|
static rocprofiler_configure_func_t _v = nullptr;
|
|
return _v;
|
|
}
|
|
|
|
void
|
|
init_logging()
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
std::call_once(_once, []() {
|
|
auto get_argv0 = []() {
|
|
auto ifs = std::ifstream{"/proc/self/cmdline"};
|
|
auto sarg = std::string{};
|
|
while(ifs && !ifs.eof())
|
|
{
|
|
ifs >> sarg;
|
|
if(!sarg.empty()) break;
|
|
}
|
|
return sarg;
|
|
};
|
|
|
|
static auto argv0 = get_argv0();
|
|
google::InitGoogleLogging(argv0.c_str());
|
|
LOG(INFO) << "logging initialized";
|
|
});
|
|
}
|
|
|
|
std::vector<std::string>
|
|
get_link_map()
|
|
{
|
|
auto chain = std::vector<std::string>{};
|
|
void* handle = nullptr;
|
|
handle = dlopen(nullptr, RTLD_LAZY | RTLD_NOLOAD);
|
|
|
|
if(handle)
|
|
{
|
|
struct link_map* link_map_v = nullptr;
|
|
dlinfo(handle, RTLD_DI_LINKMAP, &link_map_v);
|
|
struct link_map* next_link = link_map_v->l_next;
|
|
while(next_link)
|
|
{
|
|
if(next_link->l_name != nullptr && !std::string_view{next_link->l_name}.empty())
|
|
{
|
|
chain.emplace_back(next_link->l_name);
|
|
}
|
|
next_link = next_link->l_next;
|
|
}
|
|
}
|
|
|
|
return chain;
|
|
}
|
|
|
|
struct client_library
|
|
{
|
|
std::string name = {};
|
|
void* dlhandle = nullptr;
|
|
decltype(::rocprofiler_configure)* configure_func = nullptr;
|
|
std::unique_ptr<rocprofiler_tool_configure_result_t> configure_result = {};
|
|
rocprofiler_client_id_t internal_client_id = {};
|
|
rocprofiler_client_id_t mutable_client_id = {};
|
|
};
|
|
|
|
std::vector<client_library>
|
|
find_clients()
|
|
{
|
|
auto data = std::vector<client_library>{};
|
|
|
|
if(get_forced_configure())
|
|
{
|
|
data.emplace_back(client_library{"(forced)", nullptr, get_forced_configure()});
|
|
}
|
|
|
|
if(!rocprofiler_configure && !get_forced_configure())
|
|
{
|
|
LOG(ERROR) << "no rocprofiler_configure function found";
|
|
return data;
|
|
}
|
|
|
|
if(rocprofiler_configure != &rocprofiler_configure)
|
|
throw std::runtime_error("rocprofiler_configure != &rocprofiler_configure");
|
|
|
|
if(&rocprofiler_configure != get_forced_configure())
|
|
data.emplace_back(client_library{"unknown", nullptr, &rocprofiler_configure});
|
|
|
|
for(const auto& itr : get_link_map())
|
|
{
|
|
LOG(INFO) << "searching " << itr << " for rocprofiler_configure";
|
|
|
|
void* handle = dlopen(itr.c_str(), RTLD_LAZY | RTLD_NOLOAD);
|
|
LOG_IF(ERROR, handle == nullptr) << "error dlopening " << itr;
|
|
|
|
decltype(::rocprofiler_configure)* _sym = nullptr;
|
|
*(void**) (&_sym) = dlsym(handle, "rocprofiler_configure");
|
|
|
|
// skip the configure function that was forced
|
|
if(_sym == get_forced_configure())
|
|
{
|
|
data.front().name = itr;
|
|
data.front().dlhandle = handle;
|
|
data.front().internal_client_id.name = "(forced)";
|
|
continue;
|
|
}
|
|
|
|
if(!_sym)
|
|
{
|
|
LOG(INFO) << "|_" << itr << " did not contain rocprofiler_configure symbol";
|
|
continue;
|
|
}
|
|
|
|
if(_sym == &rocprofiler_configure && data.size() == 1)
|
|
{
|
|
data.front().name = itr;
|
|
data.front().dlhandle = handle;
|
|
data.front().internal_client_id.name = "default";
|
|
}
|
|
else
|
|
{
|
|
uint32_t _prio = data.size();
|
|
auto& entry =
|
|
data.emplace_back(client_library{itr,
|
|
handle,
|
|
_sym,
|
|
nullptr,
|
|
rocprofiler_client_id_t{nullptr, _prio},
|
|
rocprofiler_client_id_t{nullptr, _prio}});
|
|
entry.internal_client_id.name = entry.name.c_str();
|
|
}
|
|
}
|
|
|
|
LOG(ERROR) << __FUNCTION__ << " found " << data.size() << " clients";
|
|
|
|
return data;
|
|
}
|
|
|
|
std::vector<client_library>&
|
|
get_clients()
|
|
{
|
|
static auto _v = find_clients();
|
|
return _v;
|
|
}
|
|
|
|
using mutex_t = std::recursive_mutex;
|
|
using scoped_lock_t = std::unique_lock<mutex_t>;
|
|
|
|
mutex_t&
|
|
get_registration_mutex()
|
|
{
|
|
static auto _v = mutex_t{};
|
|
return _v;
|
|
}
|
|
} // namespace
|
|
|
|
int
|
|
get_init_status()
|
|
{
|
|
return get_status().first.load(std::memory_order_acquire);
|
|
}
|
|
|
|
int
|
|
get_fini_status()
|
|
{
|
|
return get_status().second.load(std::memory_order_acquire);
|
|
}
|
|
|
|
void
|
|
set_init_status(int v)
|
|
{
|
|
get_status().first.store(v, std::memory_order_release);
|
|
}
|
|
|
|
void
|
|
set_fini_status(int v)
|
|
{
|
|
get_status().second.store(v, std::memory_order_release);
|
|
}
|
|
|
|
bool
|
|
invoke_client_configures()
|
|
{
|
|
if(get_init_status() > 0) return false;
|
|
|
|
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
|
|
if(_lk.owns_lock()) return false;
|
|
_lk.lock();
|
|
|
|
LOG(ERROR) << __FUNCTION__;
|
|
|
|
size_t prio = 0;
|
|
for(auto& itr : get_clients())
|
|
{
|
|
if(get_invoked_configures().find(itr.configure_func) != get_invoked_configures().end())
|
|
{
|
|
LOG(ERROR) << "rocprofiler::registration::invoke_client_configures() attempted to "
|
|
"invoke configure function from "
|
|
<< itr.name << " (addr="
|
|
<< fmt::format("{:#018x}", reinterpret_cast<uint64_t>(itr.configure_func))
|
|
<< ") more than once";
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
LOG(INFO) << "rocprofiler::registration::invoke_client_configures() invoking configure "
|
|
"function from "
|
|
<< itr.name << " (addr="
|
|
<< fmt::format("{:#018x}", reinterpret_cast<uint64_t>(itr.configure_func))
|
|
<< ")";
|
|
}
|
|
|
|
auto* _result = itr.configure_func(
|
|
ROCPROFILER_VERSION, ROCPROFILER_VERSION_STRING, prio++, &itr.mutable_client_id);
|
|
if(_result)
|
|
itr.configure_result = std::make_unique<rocprofiler_tool_configure_result_t>(*_result);
|
|
|
|
get_invoked_configures().emplace(itr.configure_func);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
invoke_client_initializers()
|
|
{
|
|
if(get_init_status() > 0) return false;
|
|
|
|
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
|
|
if(_lk.owns_lock()) return false;
|
|
_lk.lock();
|
|
|
|
LOG(ERROR) << __FUNCTION__;
|
|
|
|
set_init_status(-1);
|
|
for(auto& itr : get_clients())
|
|
{
|
|
if(itr.configure_result && itr.configure_result->initialize)
|
|
{
|
|
context::push_client(itr.internal_client_id.handle);
|
|
itr.configure_result->initialize(&invoke_client_finalizer,
|
|
itr.configure_result->tool_data);
|
|
context::pop_client(itr.internal_client_id.handle);
|
|
// set to nullptr so initialize only gets called once
|
|
itr.configure_result->initialize = nullptr;
|
|
}
|
|
}
|
|
|
|
// initialization is no longer available
|
|
set_init_status(1);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
invoke_client_finalizers()
|
|
{
|
|
if(get_fini_status() > 0) return false;
|
|
|
|
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
|
|
if(_lk.owns_lock()) return false;
|
|
_lk.lock();
|
|
|
|
set_fini_status(-1);
|
|
for(auto& itr : get_clients())
|
|
{
|
|
if(itr.configure_result && itr.configure_result->finalize)
|
|
{
|
|
itr.configure_result->finalize(itr.configure_result->tool_data);
|
|
// set to nullptr so finalize only gets called once
|
|
itr.configure_result->finalize = nullptr;
|
|
}
|
|
}
|
|
|
|
set_fini_status(1);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
invoke_client_initializer(rocprofiler_client_id_t client_id)
|
|
{
|
|
if(get_init_status() > 0) return false;
|
|
|
|
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
|
|
if(_lk.owns_lock()) return false;
|
|
_lk.lock();
|
|
|
|
// save the original status
|
|
auto _restore_status = get_init_status();
|
|
set_init_status(-1);
|
|
for(auto& itr : get_clients())
|
|
{
|
|
if(itr.internal_client_id.handle == client_id.handle &&
|
|
itr.mutable_client_id.handle == client_id.handle)
|
|
{
|
|
if(itr.configure_result && itr.configure_result->initialize)
|
|
{
|
|
context::push_client(itr.internal_client_id.handle);
|
|
itr.configure_result->initialize(&invoke_client_finalizer,
|
|
itr.configure_result->tool_data);
|
|
context::pop_client(itr.internal_client_id.handle);
|
|
// set to nullptr so initialize only gets called once
|
|
itr.configure_result->initialize = nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
// we don't want the explicit client initialization to set the init status to 1
|
|
// we just want to restore what it previously was
|
|
set_init_status(_restore_status);
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
invoke_client_finalizer(rocprofiler_client_id_t client_id)
|
|
{
|
|
auto _lk = scoped_lock_t{get_registration_mutex(), std::defer_lock};
|
|
if(_lk.owns_lock()) return;
|
|
_lk.lock();
|
|
|
|
for(auto& itr : get_clients())
|
|
{
|
|
if(itr.internal_client_id.handle == client_id.handle &&
|
|
itr.mutable_client_id.handle == client_id.handle)
|
|
{
|
|
if(itr.configure_result && itr.configure_result->finalize)
|
|
{
|
|
itr.configure_result->finalize(itr.configure_result->tool_data);
|
|
// set to nullptr so finalize only gets called once
|
|
itr.configure_result->finalize = nullptr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
initialize()
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
static auto _ready = std::atomic<bool>{false};
|
|
|
|
std::call_once(_once, []() {
|
|
init_logging();
|
|
invoke_client_configures();
|
|
invoke_client_initializers();
|
|
internal_threading::initialize();
|
|
std::atexit(&finalize);
|
|
_ready.store(true, std::memory_order_release);
|
|
});
|
|
|
|
if(!_ready.load(std::memory_order_acquire))
|
|
{
|
|
while(!_ready.load(std::memory_order_acquire))
|
|
std::this_thread::yield();
|
|
}
|
|
}
|
|
|
|
void
|
|
finalize()
|
|
{
|
|
hsa_shut_down();
|
|
invoke_client_finalizers();
|
|
for(auto& itr : rocprofiler::context::get_active_contexts())
|
|
itr.store(nullptr, std::memory_order_seq_cst);
|
|
internal_threading::finalize();
|
|
}
|
|
} // namespace registration
|
|
} // namespace rocprofiler
|
|
|
|
extern "C" {
|
|
rocprofiler_status_t
|
|
rocprofiler_is_initialized(int* status)
|
|
{
|
|
*status = rocprofiler::registration::get_init_status();
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
|
|
rocprofiler_status_t
|
|
rocprofiler_is_finalized(int* status)
|
|
{
|
|
*status = rocprofiler::registration::get_fini_status();
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
|
|
rocprofiler_status_t
|
|
rocprofiler_force_configure(rocprofiler_configure_func_t configure_func)
|
|
{
|
|
auto& forced_config = rocprofiler::registration::get_forced_configure();
|
|
|
|
// init status may be -1 (currently initializing) or 1 (already initialized).
|
|
// if either case, we want to ignore this function call but if this is
|
|
if(rocprofiler::registration::get_init_status() != 0)
|
|
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
|
|
|
// if another tool forced configure, the init status should be 1, but
|
|
// let's just make sure that the forced configure function is a nullptr
|
|
if(forced_config) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
|
|
|
forced_config = configure_func;
|
|
rocprofiler::registration::initialize();
|
|
|
|
return ROCPROFILER_STATUS_SUCCESS;
|
|
}
|
|
|
|
int
|
|
rocprofiler_set_api_table(const char* name,
|
|
uint64_t lib_version,
|
|
uint64_t lib_instance,
|
|
void** tables,
|
|
uint64_t num_tables)
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
std::call_once(_once, rocprofiler::registration::initialize);
|
|
|
|
// pass to roctx init
|
|
LOG_IF(ERROR, num_tables == 0) << " rocprofiler expected " << name
|
|
<< " library to pass at least one table, not " << num_tables;
|
|
LOG_IF(ERROR, tables == nullptr) << " rocprofiler expected pointer to array of tables from "
|
|
<< name << " library, not a nullptr";
|
|
|
|
if(std::string_view{name} == "hip")
|
|
{
|
|
// pass to hip init
|
|
LOG_IF(ERROR, num_tables > 1)
|
|
<< " rocprofiler expected HIP library to pass 1 API table, not " << num_tables;
|
|
}
|
|
else if(std::string_view{name} == "hsa")
|
|
{
|
|
// pass to hsa init
|
|
LOG_IF(ERROR, num_tables > 1)
|
|
<< " rocprofiler expected HSA library to pass 1 API table, not " << num_tables;
|
|
|
|
auto* hsa_api_table = static_cast<HsaApiTable*>(*tables);
|
|
auto& saved_hsa_api_table = rocprofiler::hsa::get_table();
|
|
::copyTables(hsa_api_table, &saved_hsa_api_table);
|
|
|
|
rocprofiler::hsa::update_table(hsa_api_table);
|
|
}
|
|
else if(std::string_view{name} == "roctx")
|
|
{
|
|
// pass to roctx init
|
|
LOG_IF(ERROR, num_tables > 1)
|
|
<< " rocprofiler expected ROCTX library to pass 1 API table, not " << num_tables;
|
|
}
|
|
else
|
|
{
|
|
LOG(ERROR) << "rocprofiler does not accept API tables from " << name;
|
|
LOG_ASSERT(false) << " rocprofiler does not accept API tables from " << name;
|
|
}
|
|
|
|
(void) lib_version;
|
|
(void) lib_instance;
|
|
(void) tables;
|
|
(void) num_tables;
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool
|
|
OnLoad(HsaApiTable* table,
|
|
uint64_t runtime_version,
|
|
uint64_t failed_tool_count,
|
|
const char* const* failed_tool_names)
|
|
{
|
|
rocprofiler::registration::init_logging();
|
|
|
|
(void) runtime_version;
|
|
(void) failed_tool_count;
|
|
(void) failed_tool_names;
|
|
|
|
fprintf(stderr, "[%s:%i] %s\n", __FILE__, __LINE__, __FUNCTION__);
|
|
|
|
void* table_v = static_cast<void*>(table);
|
|
rocprofiler_set_api_table("hsa", runtime_version, 0, &table_v, 1);
|
|
|
|
return true;
|
|
}
|
|
}
|