Improve static singleton memory safety (#316)

* Update GitHub links

* Update samples/api_buffered_tracing/client.cpp

- check if initialized before forcing initialization

* Add lib/common/static_object.*

- template class for creating a static allocation in the binary which has all the properties of a heap allocated singleton but does not trigger leak sanitizers

* Update include/rocprofiler-sdk/internal_threading.h

- document return values

* Update lib/rocprofiler-sdk/internal_threading.cpp

- return codes from rocprofiler_create_callback_thread and rocprofiler_assign_callback_thread
- use common::static_object for thread-pool object

* Update lib/rocprofiler-sdk/agent.cpp

- use common::static_object to store array of strings and their hashes

* Update lib/rocprofiler-sdk/hsa/code_object.cpp

- use common::static_object to store array of strings and their hashes to ensure strings exist until termination

* Update lib/rocprofiler-sdk/registration.cpp

- use common::static_object to store status and client libraries
- update return values for rocprofiler_set_api_table

* Update lib/rocprofiler-sdk/hsa/hsa.cpp

- check registration::get_fini_status() in hsa_api_impl::functor<Idx>(args...)

* Update lib/rocprofiler-sdk/context/context.cpp

- using common::static_object for correlation id map

[ROCm/rocprofiler-sdk commit: 6b374b8e68]
Этот коммит содержится в:
Jonathan R. Madsen
2023-12-19 13:47:21 -06:00
коммит произвёл GitHub
родитель 946407623f
Коммит 1e675fceb5
14 изменённых файлов: 367 добавлений и 63 удалений
+2 -2
Просмотреть файл
@@ -27,8 +27,8 @@ project(
rocprofiler
LANGUAGES C CXX
VERSION ${ROCPROFILER_VERSION}
DESCRIPTION "ROCm GPU performance analysis"
HOMEPAGE_URL "https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal")
DESCRIPTION "ROCm GPU performance analysis SDK"
HOMEPAGE_URL "https://github.com/ROCm/rocprofiler-v2-internal")
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core")
+6 -1
Просмотреть файл
@@ -448,7 +448,12 @@ tool_fini(void* tool_data)
void
setup()
{
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), "force configuration");
if(int status = 0;
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
{
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure),
"force configuration");
}
}
void
+1 -1
Просмотреть файл
@@ -8,7 +8,7 @@
## Important Changes
[Roctracer](https://github.com/ROCm-Developer-Tools/roctracer) and [rocprofiler (v1)](https://github.com/ROCm-Developer-Tools/rocprofiler)
[Roctracer](https://github.com/ROCm/roctracer) and [rocprofiler (v1)](https://github.com/ROCm/rocprofiler)
have been combined into a single rocprofiler SDK and re-designed from scratch. The new rocprofiler API has been designed with some
new restrictions to avoid problems that plagued the former implementations. These restrictions enable more efficient implementations
and much better thread-safety. The most important restriction is the window for tools to inform rocprofiler about which services
+1 -1
Просмотреть файл
@@ -1,4 +1,4 @@
# Welcome to the [ROCprofiler](https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal) Documentation!
# Welcome to the [ROCprofiler](https://github.com/ROCm/rocprofiler-v2-internal) Documentation!
```eval_rst
.. toctree::
+4
Просмотреть файл
@@ -98,6 +98,8 @@ typedef struct
* @param [in] cb_thread_id User-provided pointer to a @ref rocprofiler_callback_thread_t
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_SUCCESS Successful thread creation
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread creation is no longer available
* post-initialization
* @retval ::ROCPROFILER_STATUS_ERROR Failed to create thread
*/
rocprofiler_status_t ROCPROFILER_API
@@ -114,6 +116,8 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
* @return ::rocprofiler_status_t
* @retval ::ROCPROFILER_STATUS_SUCCESS Successful assignment of the delivery thread for the given
* buffer
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread assignment is no longer available
* post-initialization
* @retval ::ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND Thread identifier did not match any of the
* threads created by rocprofiler
* @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Buffer identifier did not match any of the
+3 -1
Просмотреть файл
@@ -3,7 +3,8 @@
#
rocprofiler_activate_clang_tidy()
set(common_sources config.cpp environment.cpp demangle.cpp utility.cpp xml.cpp)
set(common_sources config.cpp environment.cpp demangle.cpp static_object.cpp utility.cpp
xml.cpp)
set(common_headers
config.hpp
defines.hpp
@@ -11,6 +12,7 @@ set(common_headers
demangle.hpp
mpl.hpp
scope_destructor.hpp
static_object.hpp
synchronized.hpp
utility.hpp
xml.hpp)
+79
Просмотреть файл
@@ -0,0 +1,79 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/common/static_object.hpp"
#include <array>
#include <cstddef>
#include <functional>
#include <mutex>
#include <stack>
namespace rocprofiler
{
namespace common
{
namespace
{
auto*&
get_static_object_stack()
{
static auto* _v = new std::stack<static_dtor_func_t>{};
return _v;
}
} // namespace
void
destroy_static_objects()
{
static auto _sync = std::mutex{};
auto _lk = std::unique_lock<std::mutex>{_sync};
auto*& _stack = get_static_object_stack();
if(_stack)
{
while(!_stack->empty())
{
auto& itr = _stack->top();
if(itr) itr();
_stack->pop();
}
delete _stack;
_stack = nullptr;
}
}
void
register_static_dtor(static_dtor_func_t&& _func)
{
static auto _sync = std::mutex{};
auto _lk = std::unique_lock<std::mutex>{_sync};
auto*& _stack = get_static_object_stack();
if(_stack)
{
_stack->push(_func);
}
}
} // namespace common
} // namespace rocprofiler
+121
Просмотреть файл
@@ -0,0 +1,121 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include "lib/common/defines.hpp"
#include <glog/logging.h>
#include <array>
#include <cstddef>
#include <functional>
#include <mutex>
namespace rocprofiler
{
namespace common
{
using static_dtor_func_t = void (*)();
void
destroy_static_objects();
void
register_static_dtor(static_dtor_func_t&&);
namespace
{
struct anonymous
{};
} // namespace
template <typename Tp>
constexpr size_t
static_buffer_size()
{
return sizeof(Tp);
}
/**
* @brief This struct is used to create static singleton objects which have the properties of a
* heap-allocated static object without a memory leak.
*
* @tparam Tp Data type of singleton
* @tparam ContextT Use to differentiate singletons in different translation units (if using default
* parameter) or ensure the singleton can be accessed in different translation units (not
* recommended) as long as this type is not in an anonymous namespace
*
* This template works by creating a buffer of at least `sizeof(Tp)` bytes in the binary and does a
* placement new into that buffer. The object created is NOT heap allocated, the address of the
* object is an address in between the library load address and the load address + size of library.
*/
template <typename Tp, typename ContextT = anonymous>
struct static_object
{
static_object() = delete;
~static_object() = delete;
static_object(const static_object&) = delete;
static_object(static_object&&) noexcept = delete;
static_object& operator=(const static_object&) = delete;
static_object& operator=(static_object&&) noexcept = delete;
template <typename... Args>
static Tp*& construct(Args&&... args);
static Tp* get() { return m_object; }
private:
static Tp* m_object;
static std::array<std::byte, static_buffer_size<Tp>()> m_buffer;
};
template <typename Tp, typename ContextT>
Tp* static_object<Tp, ContextT>::m_object = nullptr;
template <typename Tp, typename ContextT>
std::array<std::byte, static_buffer_size<Tp>()> static_object<Tp, ContextT>::m_buffer = {};
template <typename Tp, typename ContextT>
template <typename... Args>
Tp*&
static_object<Tp, ContextT>::construct(Args&&... args)
{
static auto _once = std::once_flag{};
std::call_once(_once, []() {
register_static_dtor([]() {
if(static_object<Tp, ContextT>::m_object)
{
static_object<Tp, ContextT>::m_object->~Tp();
static_object<Tp, ContextT>::m_object = nullptr;
}
});
});
LOG_IF(FATAL, m_object)
<< "reconstructing static object. Use get() function to retrieve pointer";
m_object = new(m_buffer.data()) Tp{std::forward<Args>(args)...};
return m_object;
}
} // namespace common
} // namespace rocprofiler
+41 -16
Просмотреть файл
@@ -25,6 +25,8 @@
#include <rocprofiler-sdk/rocprofiler.h>
#include "lib/common/filesystem.hpp"
#include "lib/common/scope_destructor.hpp"
#include "lib/common/static_object.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
@@ -37,6 +39,7 @@
#include <fstream>
#include <limits>
#include <regex>
#include <shared_mutex>
#include <sstream>
#include <string>
#include <type_traits>
@@ -51,6 +54,36 @@ namespace agent
{
namespace
{
using name_array_t = std::vector<std::pair<size_t, std::unique_ptr<std::string>>>;
name_array_t*
get_string_array()
{
static auto*& _v = common::static_object<name_array_t>::construct();
return _v;
}
std::string*
get_string_entry(std::string_view name)
{
auto _hash_v = std::hash<std::string_view>{}(name);
static auto _sync = std::shared_mutex{};
if(!get_string_array()) return nullptr;
{
auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }};
_sync.lock_shared();
for(const auto& itr : *get_string_array())
if(itr.first == _hash_v) return itr.second.get();
}
auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }};
_sync.lock();
return get_string_array()
->emplace_back(std::make_pair(_hash_v, std::make_unique<std::string>(name)))
.second.get();
}
struct cpu_info
{
long processor = -1;
@@ -371,7 +404,7 @@ read_topology()
agent_info.node_id = nodecount++;
if(!name_prop.empty())
agent_info.model_name = strdup(name_prop.front().c_str());
agent_info.model_name = get_string_entry(name_prop.front())->c_str();
else
agent_info.model_name = "";
@@ -449,9 +482,10 @@ read_topology()
auto step = (agent_info.gfx_target_version % 100);
agent_info.name =
strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str());
agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle));
agent_info.vendor_name = strdup("AMD");
get_string_entry(fmt::format("gfx{}{}{:x}", major, minor, step))->c_str();
agent_info.product_name =
get_string_entry(amdgpu_get_marketing_name(device_handle))->c_str();
agent_info.vendor_name = get_string_entry("AMD")->c_str();
amdgpu_gpu_info gpu_info = {};
if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0)
@@ -478,13 +512,13 @@ read_topology()
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
{
agent_info.cu_count = agent_info.cpu_cores_count;
agent_info.vendor_name = strdup("CPU");
agent_info.vendor_name = get_string_entry("CPU")->c_str();
for(const auto& itr : cpu_info_v)
{
if(agent_info.cpu_core_id_base == itr.apicid)
{
agent_info.name = strdup(itr.model_name.c_str());
agent_info.product_name = strdup(agent_info.name);
agent_info.name = get_string_entry(itr.model_name)->c_str();
agent_info.product_name = get_string_entry(agent_info.name)->c_str();
agent_info.family_id = itr.family;
break;
}
@@ -585,18 +619,9 @@ read_topology()
data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) {
if(ptr)
{
auto free_cstring = [](const char*& val) {
if(val && ::strnlen(val, 1) > 0) ::free(const_cast<char*>(val));
val = "";
};
delete[] ptr->mem_banks;
delete[] ptr->caches;
delete[] ptr->io_links;
free_cstring(ptr->name);
free_cstring(ptr->vendor_name);
free_cstring(ptr->product_name);
free_cstring(ptr->model_name);
}
delete ptr;
});
+8 -5
Просмотреть файл
@@ -25,6 +25,7 @@
#include <rocprofiler-sdk/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
@@ -86,10 +87,11 @@ get_active_contexts_impl()
return *_v;
}
auto&
auto*&
get_correlation_id_map()
{
static auto _v = common::Synchronized<std::vector<std::unique_ptr<correlation_id>>>{};
using data_type = std::vector<std::unique_ptr<correlation_id>>;
static auto*& _v = common::static_object<common::Synchronized<data_type>>::construct();
return _v;
}
@@ -112,9 +114,10 @@ correlation_id*
correlation_tracing_service::construct(uint32_t _init_ref_count)
{
auto _internal_id = get_unique_internal_id();
auto& corr_id_map = get_correlation_id_map();
auto& ret = corr_id_map.wlock([](auto& data) -> auto& { return data.emplace_back(); });
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
auto* corr_id_map = get_correlation_id_map();
if(!corr_id_map) return nullptr;
auto& ret = corr_id_map->wlock([](auto& data) -> auto& { return data.emplace_back(); });
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
get_latest_correlation_id_impl() = ret.get();
+39 -14
Просмотреть файл
@@ -22,6 +22,7 @@
#include "lib/rocprofiler-sdk/hsa/code_object.hpp"
#include "lib/common/scope_destructor.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
@@ -63,11 +64,35 @@ using context_t = context::context;
using user_data_t = rocprofiler_user_data_t;
using context_array_t = context::context_array_t;
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
using name_array_t = std::vector<std::pair<size_t, std::unique_ptr<std::string>>>;
template <typename... Tp>
auto
consume_args(Tp&&...)
{}
name_array_t*
get_string_array()
{
static auto*& _v = common::static_object<name_array_t>::construct();
return _v;
}
std::string*
get_string_entry(std::string_view name)
{
auto _hash_v = std::hash<std::string_view>{}(name);
static auto _sync = std::shared_mutex{};
if(!get_string_array()) return nullptr;
{
auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }};
_sync.lock_shared();
for(const auto& itr : *get_string_array())
if(itr.first == _hash_v) return itr.second.get();
}
auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }};
_sync.lock();
return get_string_array()
->emplace_back(std::make_pair(_hash_v, std::make_unique<std::string>(name)))
.second.get();
}
hsa_loader_table_t&
get_loader_table()
@@ -96,7 +121,7 @@ struct kernel_symbol
bool beg_notified = false;
bool end_notified = false;
std::string name = {};
std::string* name = {};
hsa_executable_t hsa_executable = {};
hsa_agent_t hsa_agent = {};
hsa_executable_symbol_t hsa_symbol = {};
@@ -113,13 +138,13 @@ kernel_symbol::operator=(kernel_symbol&& rhs) noexcept
{
beg_notified = rhs.beg_notified;
end_notified = rhs.end_notified;
name = std::move(rhs.name);
name = rhs.name;
hsa_executable = rhs.hsa_executable;
hsa_agent = rhs.hsa_agent;
hsa_symbol = rhs.hsa_symbol;
rocp_data = rhs.rocp_data;
user_data = std::move(rhs.user_data);
rocp_data.kernel_name = name.c_str();
rocp_data.kernel_name = (name) ? name->c_str() : nullptr;
}
return *this;
@@ -148,7 +173,7 @@ struct code_object
bool beg_notified = false;
bool end_notified = false;
std::string uri = {};
std::string* uri = {};
hsa_executable_t hsa_executable = {};
hsa_loaded_code_object_t hsa_code_object = {};
code_object_data_t rocp_data = common::init_public_api_struct(code_object_data_t{});
@@ -166,12 +191,12 @@ code_object::operator=(code_object&& rhs) noexcept
{
beg_notified = rhs.beg_notified;
end_notified = rhs.end_notified;
uri = std::move(rhs.uri);
uri = rhs.uri;
hsa_executable = rhs.hsa_executable;
hsa_code_object = rhs.hsa_code_object;
rocp_data = rhs.rocp_data;
user_data = std::move(rhs.user_data);
rocp_data.uri = uri.c_str();
rocp_data.uri = (uri) ? uri->c_str() : nullptr;
symbols = std::move(rhs.symbols);
}
@@ -293,9 +318,9 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl
auto _name = std::string(_name_length + 1, '\0');
ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_NAME, _name.data());
symbol_v.name = _name.substr(0, _name.find_first_of('\0'));
symbol_v.name = get_string_entry(_name.substr(0, _name.find_first_of('\0')));
}
data.kernel_name = symbol_v.name.c_str();
data.kernel_name = (symbol_v.name) ? symbol_v.name->c_str() : nullptr;
// these should all be self-explanatory
ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
@@ -455,9 +480,9 @@ code_object_load_callback(hsa_executable_t executable,
ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI,
_uri.data());
code_obj_v.uri = _uri;
code_obj_v.uri = get_string_entry(_uri);
}
data.uri = code_obj_v.uri.data();
data.uri = (code_obj_v.uri) ? code_obj_v.uri->data() : nullptr;
auto _hsa_agent = hsa_agent_t{};
ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT,
+10
Просмотреть файл
@@ -28,6 +28,7 @@
#include "lib/rocprofiler-sdk/hsa/details/ostream.hpp"
#include "lib/rocprofiler-sdk/hsa/types.hpp"
#include "lib/rocprofiler-sdk/hsa/utils.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include <rocprofiler-sdk/buffer.h>
#include <rocprofiler-sdk/callback_tracing.h>
@@ -177,6 +178,15 @@ hsa_api_impl<Idx>::functor(Args&&... args)
{
using info_type = hsa_api_info<Idx>;
if(registration::get_fini_status() != 0)
{
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
if constexpr(!std::is_same<decltype(_ret), null_type>::value)
return _ret;
else
return HSA_STATUS_SUCCESS;
}
struct callback_context_data
{
const context::context* ctx = nullptr;
+15 -5
Просмотреть файл
@@ -25,6 +25,7 @@
#include <rocprofiler-sdk/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/allocator.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
@@ -32,6 +33,8 @@
#include "lib/rocprofiler-sdk/internal_threading.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include <glog/logging.h>
#include <cstdint>
#include <mutex>
#include <stdexcept>
@@ -162,10 +165,10 @@ execute_creation_notifiers(rocprofiler_runtime_library_t libs,
// using thread_pool_vec_t = std::vector<std::unique_ptr<thread_pool_t>>;
// using task_group_vec_t = std::vector<std::unique_ptr<task_group_t>>;
auto&
auto*&
get_thread_pools()
{
static auto _v = thread_pool_vec_t{};
static auto* _v = common::static_object<thread_pool_vec_t>::construct();
return _v;
}
@@ -225,12 +228,13 @@ create_callback_thread()
notify_pre_internal_thread_create(ROCPROFILER_LIBRARY);
// this will be index after emplace_back
auto idx = get_thread_pools().size();
auto idx = CHECK_NOTNULL(get_thread_pools())->size();
thread_pool_config_t pool_config = {};
pool_config.pool_size = 1;
auto& thr_pool = get_thread_pools().emplace_back(std::make_shared<thread_pool_t>(pool_config));
auto& thr_pool = CHECK_NOTNULL(get_thread_pools())
->emplace_back(std::make_shared<thread_pool_t>(pool_config));
if(!get_task_groups()) get_task_groups() = new task_group_vec_t{};
@@ -272,6 +276,9 @@ rocprofiler_at_internal_thread_create(rocprofiler_internal_thread_library_cb_t p
rocprofiler_status_t
rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
{
if(rocprofiler::registration::get_init_status() > 0)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
rocprofiler::internal_threading::initialize();
auto cb_tid = rocprofiler::internal_threading::create_callback_thread();
@@ -284,10 +291,13 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
return ROCPROFILER_STATUS_ERROR;
}
rocprofiler_status_t ROCPROFILER_API
rocprofiler_status_t
rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id,
rocprofiler_callback_thread_t cb_thread_id)
{
if(rocprofiler::registration::get_init_status() > 0)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
if(!rocprofiler::internal_threading::get_task_groups())
return ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND;
+37 -17
Просмотреть файл
@@ -22,8 +22,8 @@
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/common/environment.hpp"
#include "lib/common/static_object.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/allocator.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/hsa/code_object.hpp"
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
@@ -86,10 +86,11 @@ invoke_client_finalizers();
// explicitly invoke the finalize function of a specific client
void invoke_client_finalizer(rocprofiler_client_id_t);
auto&
auto*
get_status()
{
static auto _v = std::pair<std::atomic<int>, std::atomic<int>>{0, 0};
static auto*& _v =
common::static_object<std::pair<std::atomic<int>, std::atomic<int>>>::construct(0, 0);
return _v;
}
@@ -151,8 +152,7 @@ struct client_library
rocprofiler_client_id_t mutable_client_id = {};
};
using client_library_vec_t =
std::vector<client_library, allocator::static_data_allocator<client_library>>;
using client_library_vec_t = std::vector<client_library>;
client_library_vec_t
find_clients()
@@ -304,10 +304,10 @@ find_clients()
return data;
}
client_library_vec_t&
client_library_vec_t*
get_clients()
{
static auto _v = find_clients();
static auto*& _v = common::static_object<client_library_vec_t>::construct(find_clients());
return _v;
}
@@ -332,7 +332,9 @@ invoke_client_configures()
LOG(ERROR) << __FUNCTION__;
for(auto& itr : get_clients())
if(!get_clients()) return false;
for(auto& itr : *get_clients())
{
if(!itr.configure_func)
{
@@ -392,7 +394,9 @@ invoke_client_initializers()
LOG(ERROR) << __FUNCTION__;
for(auto& itr : get_clients())
if(!get_clients()) return false;
for(auto& itr : *get_clients())
{
if(itr.configure_result && itr.configure_result->initialize)
{
@@ -420,7 +424,11 @@ invoke_client_finalizers()
if(_lk.owns_lock()) return false;
_lk.lock();
for(auto& itr : get_clients())
LOG(ERROR) << __FUNCTION__;
if(!get_clients()) return false;
for(auto& itr : *get_clients())
{
if(itr.configure_result && itr.configure_result->finalize)
{
@@ -441,7 +449,11 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
if(_lk.owns_lock()) return;
_lk.lock();
for(auto& itr : get_clients())
LOG(ERROR) << __FUNCTION__;
if(!get_clients()) return;
for(auto& itr : *get_clients())
{
if(itr.internal_client_id.handle == client_id.handle &&
itr.mutable_client_id.handle == client_id.handle)
@@ -499,25 +511,25 @@ get_client_offset()
int
get_init_status()
{
return get_status().first.load(std::memory_order_acquire);
return (get_status()) ? get_status()->first.load(std::memory_order_acquire) : 1;
}
int
get_fini_status()
{
return get_status().second.load(std::memory_order_acquire);
return (get_status()) ? get_status()->second.load(std::memory_order_acquire) : 1;
}
void
set_init_status(int v)
{
get_status().first.store(v, std::memory_order_release);
if(get_status()) get_status()->first.store(v, std::memory_order_release);
}
void
set_fini_status(int v)
{
get_status().second.store(v, std::memory_order_release);
if(get_status()) get_status()->second.store(v, std::memory_order_release);
}
void
@@ -529,7 +541,10 @@ initialize()
std::call_once(_once, []() {
// initialization is in process
set_init_status(-1);
std::atexit(&finalize);
std::atexit([]() {
finalize();
common::destroy_static_objects();
});
init_logging();
invoke_client_configures();
invoke_client_initializers();
@@ -630,6 +645,8 @@ rocprofiler_set_api_table(const char* name,
// pass to hip init
LOG_IF(ERROR, num_tables > 1)
<< " rocprofiler expected HIP library to pass 1 API table, not " << num_tables;
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
else if(std::string_view{name} == "hsa")
{
@@ -667,11 +684,14 @@ rocprofiler_set_api_table(const char* name,
// pass to roctx init
LOG_IF(ERROR, num_tables > 1)
<< " rocprofiler expected ROCTX library to pass 1 API table, not " << num_tables;
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
else
{
LOG(ERROR) << "rocprofiler does not accept API tables from " << name;
LOG_ASSERT(false) << " rocprofiler does not accept API tables from " << name;
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
(void) lib_version;