diff --git a/projects/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/CMakeLists.txt index 961a324b97..18bb821715 100644 --- a/projects/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/CMakeLists.txt @@ -27,8 +27,8 @@ project( rocprofiler LANGUAGES C CXX VERSION ${ROCPROFILER_VERSION} - DESCRIPTION "ROCm GPU performance analysis" - HOMEPAGE_URL "https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal") + DESCRIPTION "ROCm GPU performance analysis SDK" + HOMEPAGE_URL "https://github.com/ROCm/rocprofiler-v2-internal") set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core") diff --git a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp index 0def999330..4514918928 100644 --- a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp +++ b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp @@ -448,7 +448,12 @@ tool_fini(void* tool_data) void setup() { - ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), "force configuration"); + if(int status = 0; + rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) + { + ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), + "force configuration"); + } } void diff --git a/projects/rocprofiler-sdk/source/docs/about.md b/projects/rocprofiler-sdk/source/docs/about.md index 4fb8d2dc06..2de952a8c8 100644 --- a/projects/rocprofiler-sdk/source/docs/about.md +++ b/projects/rocprofiler-sdk/source/docs/about.md @@ -8,7 +8,7 @@ ## Important Changes -[Roctracer](https://github.com/ROCm-Developer-Tools/roctracer) and [rocprofiler (v1)](https://github.com/ROCm-Developer-Tools/rocprofiler) +[Roctracer](https://github.com/ROCm/roctracer) and [rocprofiler (v1)](https://github.com/ROCm/rocprofiler) have been combined into a single rocprofiler SDK and re-designed from scratch. The new rocprofiler API has been designed with some new restrictions to avoid problems that plagued the former implementations. These restrictions enable more efficient implementations and much better thread-safety. The most important restriction is the window for tools to inform rocprofiler about which services diff --git a/projects/rocprofiler-sdk/source/docs/index.md b/projects/rocprofiler-sdk/source/docs/index.md index 40630f5963..69303562a9 100644 --- a/projects/rocprofiler-sdk/source/docs/index.md +++ b/projects/rocprofiler-sdk/source/docs/index.md @@ -1,4 +1,4 @@ -# Welcome to the [ROCprofiler](https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal) Documentation! +# Welcome to the [ROCprofiler](https://github.com/ROCm/rocprofiler-v2-internal) Documentation! ```eval_rst .. toctree:: diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/internal_threading.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/internal_threading.h index 234bc7cc0c..cdf4accf6a 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/internal_threading.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/internal_threading.h @@ -98,6 +98,8 @@ typedef struct * @param [in] cb_thread_id User-provided pointer to a @ref rocprofiler_callback_thread_t * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_SUCCESS Successful thread creation + * @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread creation is no longer available + * post-initialization * @retval ::ROCPROFILER_STATUS_ERROR Failed to create thread */ rocprofiler_status_t ROCPROFILER_API @@ -114,6 +116,8 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id) * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_SUCCESS Successful assignment of the delivery thread for the given * buffer + * @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread assignment is no longer available + * post-initialization * @retval ::ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND Thread identifier did not match any of the * threads created by rocprofiler * @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Buffer identifier did not match any of the diff --git a/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt index e85cc81061..017463d7e7 100644 --- a/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/common/CMakeLists.txt @@ -3,7 +3,8 @@ # rocprofiler_activate_clang_tidy() -set(common_sources config.cpp environment.cpp demangle.cpp utility.cpp xml.cpp) +set(common_sources config.cpp environment.cpp demangle.cpp static_object.cpp utility.cpp + xml.cpp) set(common_headers config.hpp defines.hpp @@ -11,6 +12,7 @@ set(common_headers demangle.hpp mpl.hpp scope_destructor.hpp + static_object.hpp synchronized.hpp utility.hpp xml.hpp) diff --git a/projects/rocprofiler-sdk/source/lib/common/static_object.cpp b/projects/rocprofiler-sdk/source/lib/common/static_object.cpp new file mode 100644 index 0000000000..e59c169a0f --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/common/static_object.cpp @@ -0,0 +1,79 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/common/static_object.hpp" + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace common +{ +namespace +{ +auto*& +get_static_object_stack() +{ + static auto* _v = new std::stack{}; + return _v; +} +} // namespace + +void +destroy_static_objects() +{ + static auto _sync = std::mutex{}; + auto _lk = std::unique_lock{_sync}; + + auto*& _stack = get_static_object_stack(); + if(_stack) + { + while(!_stack->empty()) + { + auto& itr = _stack->top(); + if(itr) itr(); + _stack->pop(); + } + + delete _stack; + _stack = nullptr; + } +} + +void +register_static_dtor(static_dtor_func_t&& _func) +{ + static auto _sync = std::mutex{}; + auto _lk = std::unique_lock{_sync}; + + auto*& _stack = get_static_object_stack(); + if(_stack) + { + _stack->push(_func); + } +} +} // namespace common +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/common/static_object.hpp b/projects/rocprofiler-sdk/source/lib/common/static_object.hpp new file mode 100644 index 0000000000..27b8b1fa04 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/common/static_object.hpp @@ -0,0 +1,121 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "lib/common/defines.hpp" + +#include + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace common +{ +using static_dtor_func_t = void (*)(); + +void +destroy_static_objects(); + +void +register_static_dtor(static_dtor_func_t&&); + +namespace +{ +struct anonymous +{}; +} // namespace + +template +constexpr size_t +static_buffer_size() +{ + return sizeof(Tp); +} + +/** + * @brief This struct is used to create static singleton objects which have the properties of a + * heap-allocated static object without a memory leak. + * + * @tparam Tp Data type of singleton + * @tparam ContextT Use to differentiate singletons in different translation units (if using default + * parameter) or ensure the singleton can be accessed in different translation units (not + * recommended) as long as this type is not in an anonymous namespace + * + * This template works by creating a buffer of at least `sizeof(Tp)` bytes in the binary and does a + * placement new into that buffer. The object created is NOT heap allocated, the address of the + * object is an address in between the library load address and the load address + size of library. + */ +template +struct static_object +{ + static_object() = delete; + ~static_object() = delete; + static_object(const static_object&) = delete; + static_object(static_object&&) noexcept = delete; + static_object& operator=(const static_object&) = delete; + static_object& operator=(static_object&&) noexcept = delete; + + template + static Tp*& construct(Args&&... args); + + static Tp* get() { return m_object; } + +private: + static Tp* m_object; + static std::array()> m_buffer; +}; + +template +Tp* static_object::m_object = nullptr; + +template +std::array()> static_object::m_buffer = {}; + +template +template +Tp*& +static_object::construct(Args&&... args) +{ + static auto _once = std::once_flag{}; + std::call_once(_once, []() { + register_static_dtor([]() { + if(static_object::m_object) + { + static_object::m_object->~Tp(); + static_object::m_object = nullptr; + } + }); + }); + + LOG_IF(FATAL, m_object) + << "reconstructing static object. Use get() function to retrieve pointer"; + + m_object = new(m_buffer.data()) Tp{std::forward(args)...}; + return m_object; +} +} // namespace common +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp index c0da352b20..a737615e9e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp @@ -25,6 +25,8 @@ #include #include "lib/common/filesystem.hpp" +#include "lib/common/scope_destructor.hpp" +#include "lib/common/static_object.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" @@ -37,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +54,36 @@ namespace agent { namespace { +using name_array_t = std::vector>>; + +name_array_t* +get_string_array() +{ + static auto*& _v = common::static_object::construct(); + return _v; +} + +std::string* +get_string_entry(std::string_view name) +{ + auto _hash_v = std::hash{}(name); + static auto _sync = std::shared_mutex{}; + if(!get_string_array()) return nullptr; + + { + auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }}; + _sync.lock_shared(); + for(const auto& itr : *get_string_array()) + if(itr.first == _hash_v) return itr.second.get(); + } + + auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }}; + _sync.lock(); + return get_string_array() + ->emplace_back(std::make_pair(_hash_v, std::make_unique(name))) + .second.get(); +} + struct cpu_info { long processor = -1; @@ -371,7 +404,7 @@ read_topology() agent_info.node_id = nodecount++; if(!name_prop.empty()) - agent_info.model_name = strdup(name_prop.front().c_str()); + agent_info.model_name = get_string_entry(name_prop.front())->c_str(); else agent_info.model_name = ""; @@ -449,9 +482,10 @@ read_topology() auto step = (agent_info.gfx_target_version % 100); agent_info.name = - strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str()); - agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle)); - agent_info.vendor_name = strdup("AMD"); + get_string_entry(fmt::format("gfx{}{}{:x}", major, minor, step))->c_str(); + agent_info.product_name = + get_string_entry(amdgpu_get_marketing_name(device_handle))->c_str(); + agent_info.vendor_name = get_string_entry("AMD")->c_str(); amdgpu_gpu_info gpu_info = {}; if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0) @@ -478,13 +512,13 @@ read_topology() else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU) { agent_info.cu_count = agent_info.cpu_cores_count; - agent_info.vendor_name = strdup("CPU"); + agent_info.vendor_name = get_string_entry("CPU")->c_str(); for(const auto& itr : cpu_info_v) { if(agent_info.cpu_core_id_base == itr.apicid) { - agent_info.name = strdup(itr.model_name.c_str()); - agent_info.product_name = strdup(agent_info.name); + agent_info.name = get_string_entry(itr.model_name)->c_str(); + agent_info.product_name = get_string_entry(agent_info.name)->c_str(); agent_info.family_id = itr.family; break; } @@ -585,18 +619,9 @@ read_topology() data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) { if(ptr) { - auto free_cstring = [](const char*& val) { - if(val && ::strnlen(val, 1) > 0) ::free(const_cast(val)); - val = ""; - }; - delete[] ptr->mem_banks; delete[] ptr->caches; delete[] ptr->io_links; - free_cstring(ptr->name); - free_cstring(ptr->vendor_name); - free_cstring(ptr->product_name); - free_cstring(ptr->model_name); } delete ptr; }); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp index 2f190207fa..7427b061b3 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.cpp @@ -25,6 +25,7 @@ #include #include "lib/common/container/stable_vector.hpp" +#include "lib/common/static_object.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" @@ -86,10 +87,11 @@ get_active_contexts_impl() return *_v; } -auto& +auto*& get_correlation_id_map() { - static auto _v = common::Synchronized>>{}; + using data_type = std::vector>; + static auto*& _v = common::static_object>::construct(); return _v; } @@ -112,9 +114,10 @@ correlation_id* correlation_tracing_service::construct(uint32_t _init_ref_count) { auto _internal_id = get_unique_internal_id(); - auto& corr_id_map = get_correlation_id_map(); - auto& ret = corr_id_map.wlock([](auto& data) -> auto& { return data.emplace_back(); }); - ret = std::make_unique(_init_ref_count, common::get_tid(), _internal_id); + auto* corr_id_map = get_correlation_id_map(); + if(!corr_id_map) return nullptr; + auto& ret = corr_id_map->wlock([](auto& data) -> auto& { return data.emplace_back(); }); + ret = std::make_unique(_init_ref_count, common::get_tid(), _internal_id); get_latest_correlation_id_impl() = ret.get(); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/code_object.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/code_object.cpp index 2b8e84ff60..15a30724f4 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/code_object.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/code_object.cpp @@ -22,6 +22,7 @@ #include "lib/rocprofiler-sdk/hsa/code_object.hpp" #include "lib/common/scope_destructor.hpp" +#include "lib/common/static_object.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" @@ -63,11 +64,35 @@ using context_t = context::context; using user_data_t = rocprofiler_user_data_t; using context_array_t = context::context_array_t; using context_user_data_map_t = std::unordered_map; +using name_array_t = std::vector>>; -template -auto -consume_args(Tp&&...) -{} +name_array_t* +get_string_array() +{ + static auto*& _v = common::static_object::construct(); + return _v; +} + +std::string* +get_string_entry(std::string_view name) +{ + auto _hash_v = std::hash{}(name); + static auto _sync = std::shared_mutex{}; + if(!get_string_array()) return nullptr; + + { + auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }}; + _sync.lock_shared(); + for(const auto& itr : *get_string_array()) + if(itr.first == _hash_v) return itr.second.get(); + } + + auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }}; + _sync.lock(); + return get_string_array() + ->emplace_back(std::make_pair(_hash_v, std::make_unique(name))) + .second.get(); +} hsa_loader_table_t& get_loader_table() @@ -96,7 +121,7 @@ struct kernel_symbol bool beg_notified = false; bool end_notified = false; - std::string name = {}; + std::string* name = {}; hsa_executable_t hsa_executable = {}; hsa_agent_t hsa_agent = {}; hsa_executable_symbol_t hsa_symbol = {}; @@ -113,13 +138,13 @@ kernel_symbol::operator=(kernel_symbol&& rhs) noexcept { beg_notified = rhs.beg_notified; end_notified = rhs.end_notified; - name = std::move(rhs.name); + name = rhs.name; hsa_executable = rhs.hsa_executable; hsa_agent = rhs.hsa_agent; hsa_symbol = rhs.hsa_symbol; rocp_data = rhs.rocp_data; user_data = std::move(rhs.user_data); - rocp_data.kernel_name = name.c_str(); + rocp_data.kernel_name = (name) ? name->c_str() : nullptr; } return *this; @@ -148,7 +173,7 @@ struct code_object bool beg_notified = false; bool end_notified = false; - std::string uri = {}; + std::string* uri = {}; hsa_executable_t hsa_executable = {}; hsa_loaded_code_object_t hsa_code_object = {}; code_object_data_t rocp_data = common::init_public_api_struct(code_object_data_t{}); @@ -166,12 +191,12 @@ code_object::operator=(code_object&& rhs) noexcept { beg_notified = rhs.beg_notified; end_notified = rhs.end_notified; - uri = std::move(rhs.uri); + uri = rhs.uri; hsa_executable = rhs.hsa_executable; hsa_code_object = rhs.hsa_code_object; rocp_data = rhs.rocp_data; user_data = std::move(rhs.user_data); - rocp_data.uri = uri.c_str(); + rocp_data.uri = (uri) ? uri->c_str() : nullptr; symbols = std::move(rhs.symbols); } @@ -293,9 +318,9 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl auto _name = std::string(_name_length + 1, '\0'); ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_NAME, _name.data()); - symbol_v.name = _name.substr(0, _name.find_first_of('\0')); + symbol_v.name = get_string_entry(_name.substr(0, _name.find_first_of('\0'))); } - data.kernel_name = symbol_v.name.c_str(); + data.kernel_name = (symbol_v.name) ? symbol_v.name->c_str() : nullptr; // these should all be self-explanatory ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, @@ -455,9 +480,9 @@ code_object_load_callback(hsa_executable_t executable, ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, _uri.data()); - code_obj_v.uri = _uri; + code_obj_v.uri = get_string_entry(_uri); } - data.uri = code_obj_v.uri.data(); + data.uri = (code_obj_v.uri) ? code_obj_v.uri->data() : nullptr; auto _hsa_agent = hsa_agent_t{}; ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp index 635edb7752..52cf6330d6 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/hsa.cpp @@ -28,6 +28,7 @@ #include "lib/rocprofiler-sdk/hsa/details/ostream.hpp" #include "lib/rocprofiler-sdk/hsa/types.hpp" #include "lib/rocprofiler-sdk/hsa/utils.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" #include #include @@ -177,6 +178,15 @@ hsa_api_impl::functor(Args&&... args) { using info_type = hsa_api_info; + if(registration::get_fini_status() != 0) + { + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_same::value) + return _ret; + else + return HSA_STATUS_SUCCESS; + } + struct callback_context_data { const context::context* ctx = nullptr; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp index 2f371b6a2f..effef6e926 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/internal_threading.cpp @@ -25,6 +25,7 @@ #include #include "lib/common/container/stable_vector.hpp" +#include "lib/common/static_object.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/allocator.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" @@ -32,6 +33,8 @@ #include "lib/rocprofiler-sdk/internal_threading.hpp" #include "lib/rocprofiler-sdk/registration.hpp" +#include + #include #include #include @@ -162,10 +165,10 @@ execute_creation_notifiers(rocprofiler_runtime_library_t libs, // using thread_pool_vec_t = std::vector>; // using task_group_vec_t = std::vector>; -auto& +auto*& get_thread_pools() { - static auto _v = thread_pool_vec_t{}; + static auto* _v = common::static_object::construct(); return _v; } @@ -225,12 +228,13 @@ create_callback_thread() notify_pre_internal_thread_create(ROCPROFILER_LIBRARY); // this will be index after emplace_back - auto idx = get_thread_pools().size(); + auto idx = CHECK_NOTNULL(get_thread_pools())->size(); thread_pool_config_t pool_config = {}; pool_config.pool_size = 1; - auto& thr_pool = get_thread_pools().emplace_back(std::make_shared(pool_config)); + auto& thr_pool = CHECK_NOTNULL(get_thread_pools()) + ->emplace_back(std::make_shared(pool_config)); if(!get_task_groups()) get_task_groups() = new task_group_vec_t{}; @@ -272,6 +276,9 @@ rocprofiler_at_internal_thread_create(rocprofiler_internal_thread_library_cb_t p rocprofiler_status_t rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id) { + if(rocprofiler::registration::get_init_status() > 0) + return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + rocprofiler::internal_threading::initialize(); auto cb_tid = rocprofiler::internal_threading::create_callback_thread(); @@ -284,10 +291,13 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id) return ROCPROFILER_STATUS_ERROR; } -rocprofiler_status_t ROCPROFILER_API +rocprofiler_status_t rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id, rocprofiler_callback_thread_t cb_thread_id) { + if(rocprofiler::registration::get_init_status() > 0) + return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + if(!rocprofiler::internal_threading::get_task_groups()) return ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp index 3f53b67c7f..86e7c82545 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/registration.cpp @@ -22,8 +22,8 @@ #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/common/environment.hpp" +#include "lib/common/static_object.hpp" #include "lib/rocprofiler-sdk/agent.hpp" -#include "lib/rocprofiler-sdk/allocator.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" #include "lib/rocprofiler-sdk/hsa/code_object.hpp" #include "lib/rocprofiler-sdk/hsa/hsa.hpp" @@ -86,10 +86,11 @@ invoke_client_finalizers(); // explicitly invoke the finalize function of a specific client void invoke_client_finalizer(rocprofiler_client_id_t); -auto& +auto* get_status() { - static auto _v = std::pair, std::atomic>{0, 0}; + static auto*& _v = + common::static_object, std::atomic>>::construct(0, 0); return _v; } @@ -151,8 +152,7 @@ struct client_library rocprofiler_client_id_t mutable_client_id = {}; }; -using client_library_vec_t = - std::vector>; +using client_library_vec_t = std::vector; client_library_vec_t find_clients() @@ -304,10 +304,10 @@ find_clients() return data; } -client_library_vec_t& +client_library_vec_t* get_clients() { - static auto _v = find_clients(); + static auto*& _v = common::static_object::construct(find_clients()); return _v; } @@ -332,7 +332,9 @@ invoke_client_configures() LOG(ERROR) << __FUNCTION__; - for(auto& itr : get_clients()) + if(!get_clients()) return false; + + for(auto& itr : *get_clients()) { if(!itr.configure_func) { @@ -392,7 +394,9 @@ invoke_client_initializers() LOG(ERROR) << __FUNCTION__; - for(auto& itr : get_clients()) + if(!get_clients()) return false; + + for(auto& itr : *get_clients()) { if(itr.configure_result && itr.configure_result->initialize) { @@ -420,7 +424,11 @@ invoke_client_finalizers() if(_lk.owns_lock()) return false; _lk.lock(); - for(auto& itr : get_clients()) + LOG(ERROR) << __FUNCTION__; + + if(!get_clients()) return false; + + for(auto& itr : *get_clients()) { if(itr.configure_result && itr.configure_result->finalize) { @@ -441,7 +449,11 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id) if(_lk.owns_lock()) return; _lk.lock(); - for(auto& itr : get_clients()) + LOG(ERROR) << __FUNCTION__; + + if(!get_clients()) return; + + for(auto& itr : *get_clients()) { if(itr.internal_client_id.handle == client_id.handle && itr.mutable_client_id.handle == client_id.handle) @@ -499,25 +511,25 @@ get_client_offset() int get_init_status() { - return get_status().first.load(std::memory_order_acquire); + return (get_status()) ? get_status()->first.load(std::memory_order_acquire) : 1; } int get_fini_status() { - return get_status().second.load(std::memory_order_acquire); + return (get_status()) ? get_status()->second.load(std::memory_order_acquire) : 1; } void set_init_status(int v) { - get_status().first.store(v, std::memory_order_release); + if(get_status()) get_status()->first.store(v, std::memory_order_release); } void set_fini_status(int v) { - get_status().second.store(v, std::memory_order_release); + if(get_status()) get_status()->second.store(v, std::memory_order_release); } void @@ -529,7 +541,10 @@ initialize() std::call_once(_once, []() { // initialization is in process set_init_status(-1); - std::atexit(&finalize); + std::atexit([]() { + finalize(); + common::destroy_static_objects(); + }); init_logging(); invoke_client_configures(); invoke_client_initializers(); @@ -630,6 +645,8 @@ rocprofiler_set_api_table(const char* name, // pass to hip init LOG_IF(ERROR, num_tables > 1) << " rocprofiler expected HIP library to pass 1 API table, not " << num_tables; + + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } else if(std::string_view{name} == "hsa") { @@ -667,11 +684,14 @@ rocprofiler_set_api_table(const char* name, // pass to roctx init LOG_IF(ERROR, num_tables > 1) << " rocprofiler expected ROCTX library to pass 1 API table, not " << num_tables; + + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } else { LOG(ERROR) << "rocprofiler does not accept API tables from " << name; - LOG_ASSERT(false) << " rocprofiler does not accept API tables from " << name; + + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; } (void) lib_version;