Improve static singleton memory safety (#316)
* Update GitHub links
* Update samples/api_buffered_tracing/client.cpp
- check if initialized before forcing initialization
* Add lib/common/static_object.*
- template class for creating a static allocation in the binary which has all the properties of a heap allocated singleton but does not trigger leak sanitizers
* Update include/rocprofiler-sdk/internal_threading.h
- document return values
* Update lib/rocprofiler-sdk/internal_threading.cpp
- return codes from rocprofiler_create_callback_thread and rocprofiler_assign_callback_thread
- use common::static_object for thread-pool object
* Update lib/rocprofiler-sdk/agent.cpp
- use common::static_object to store array of strings and their hashes
* Update lib/rocprofiler-sdk/hsa/code_object.cpp
- use common::static_object to store array of strings and their hashes to ensure strings exist until termination
* Update lib/rocprofiler-sdk/registration.cpp
- use common::static_object to store status and client libraries
- update return values for rocprofiler_set_api_table
* Update lib/rocprofiler-sdk/hsa/hsa.cpp
- check registration::get_fini_status() in hsa_api_impl::functor<Idx>(args...)
* Update lib/rocprofiler-sdk/context/context.cpp
- using common::static_object for correlation id map
[ROCm/rocprofiler-sdk commit: 6b374b8e68]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
946407623f
Коммит
1e675fceb5
@@ -27,8 +27,8 @@ project(
|
||||
rocprofiler
|
||||
LANGUAGES C CXX
|
||||
VERSION ${ROCPROFILER_VERSION}
|
||||
DESCRIPTION "ROCm GPU performance analysis"
|
||||
HOMEPAGE_URL "https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal")
|
||||
DESCRIPTION "ROCm GPU performance analysis SDK"
|
||||
HOMEPAGE_URL "https://github.com/ROCm/rocprofiler-v2-internal")
|
||||
|
||||
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "core")
|
||||
|
||||
|
||||
@@ -448,7 +448,12 @@ tool_fini(void* tool_data)
|
||||
void
|
||||
setup()
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure), "force configuration");
|
||||
if(int status = 0;
|
||||
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure),
|
||||
"force configuration");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
## Important Changes
|
||||
|
||||
[Roctracer](https://github.com/ROCm-Developer-Tools/roctracer) and [rocprofiler (v1)](https://github.com/ROCm-Developer-Tools/rocprofiler)
|
||||
[Roctracer](https://github.com/ROCm/roctracer) and [rocprofiler (v1)](https://github.com/ROCm/rocprofiler)
|
||||
have been combined into a single rocprofiler SDK and re-designed from scratch. The new rocprofiler API has been designed with some
|
||||
new restrictions to avoid problems that plagued the former implementations. These restrictions enable more efficient implementations
|
||||
and much better thread-safety. The most important restriction is the window for tools to inform rocprofiler about which services
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Welcome to the [ROCprofiler](https://github.com/ROCm-Developer-Tools/rocprofiler-v2-internal) Documentation!
|
||||
# Welcome to the [ROCprofiler](https://github.com/ROCm/rocprofiler-v2-internal) Documentation!
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
|
||||
@@ -98,6 +98,8 @@ typedef struct
|
||||
* @param [in] cb_thread_id User-provided pointer to a @ref rocprofiler_callback_thread_t
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Successful thread creation
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread creation is no longer available
|
||||
* post-initialization
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR Failed to create thread
|
||||
*/
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
@@ -114,6 +116,8 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS Successful assignment of the delivery thread for the given
|
||||
* buffer
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Thread assignment is no longer available
|
||||
* post-initialization
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND Thread identifier did not match any of the
|
||||
* threads created by rocprofiler
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Buffer identifier did not match any of the
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
#
|
||||
rocprofiler_activate_clang_tidy()
|
||||
|
||||
set(common_sources config.cpp environment.cpp demangle.cpp utility.cpp xml.cpp)
|
||||
set(common_sources config.cpp environment.cpp demangle.cpp static_object.cpp utility.cpp
|
||||
xml.cpp)
|
||||
set(common_headers
|
||||
config.hpp
|
||||
defines.hpp
|
||||
@@ -11,6 +12,7 @@ set(common_headers
|
||||
demangle.hpp
|
||||
mpl.hpp
|
||||
scope_destructor.hpp
|
||||
static_object.hpp
|
||||
synchronized.hpp
|
||||
utility.hpp
|
||||
xml.hpp)
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/common/static_object.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
namespace
|
||||
{
|
||||
auto*&
|
||||
get_static_object_stack()
|
||||
{
|
||||
static auto* _v = new std::stack<static_dtor_func_t>{};
|
||||
return _v;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
destroy_static_objects()
|
||||
{
|
||||
static auto _sync = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_sync};
|
||||
|
||||
auto*& _stack = get_static_object_stack();
|
||||
if(_stack)
|
||||
{
|
||||
while(!_stack->empty())
|
||||
{
|
||||
auto& itr = _stack->top();
|
||||
if(itr) itr();
|
||||
_stack->pop();
|
||||
}
|
||||
|
||||
delete _stack;
|
||||
_stack = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
register_static_dtor(static_dtor_func_t&& _func)
|
||||
{
|
||||
static auto _sync = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_sync};
|
||||
|
||||
auto*& _stack = get_static_object_stack();
|
||||
if(_stack)
|
||||
{
|
||||
_stack->push(_func);
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,121 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace common
|
||||
{
|
||||
using static_dtor_func_t = void (*)();
|
||||
|
||||
void
|
||||
destroy_static_objects();
|
||||
|
||||
void
|
||||
register_static_dtor(static_dtor_func_t&&);
|
||||
|
||||
namespace
|
||||
{
|
||||
struct anonymous
|
||||
{};
|
||||
} // namespace
|
||||
|
||||
template <typename Tp>
|
||||
constexpr size_t
|
||||
static_buffer_size()
|
||||
{
|
||||
return sizeof(Tp);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This struct is used to create static singleton objects which have the properties of a
|
||||
* heap-allocated static object without a memory leak.
|
||||
*
|
||||
* @tparam Tp Data type of singleton
|
||||
* @tparam ContextT Use to differentiate singletons in different translation units (if using default
|
||||
* parameter) or ensure the singleton can be accessed in different translation units (not
|
||||
* recommended) as long as this type is not in an anonymous namespace
|
||||
*
|
||||
* This template works by creating a buffer of at least `sizeof(Tp)` bytes in the binary and does a
|
||||
* placement new into that buffer. The object created is NOT heap allocated, the address of the
|
||||
* object is an address in between the library load address and the load address + size of library.
|
||||
*/
|
||||
template <typename Tp, typename ContextT = anonymous>
|
||||
struct static_object
|
||||
{
|
||||
static_object() = delete;
|
||||
~static_object() = delete;
|
||||
static_object(const static_object&) = delete;
|
||||
static_object(static_object&&) noexcept = delete;
|
||||
static_object& operator=(const static_object&) = delete;
|
||||
static_object& operator=(static_object&&) noexcept = delete;
|
||||
|
||||
template <typename... Args>
|
||||
static Tp*& construct(Args&&... args);
|
||||
|
||||
static Tp* get() { return m_object; }
|
||||
|
||||
private:
|
||||
static Tp* m_object;
|
||||
static std::array<std::byte, static_buffer_size<Tp>()> m_buffer;
|
||||
};
|
||||
|
||||
template <typename Tp, typename ContextT>
|
||||
Tp* static_object<Tp, ContextT>::m_object = nullptr;
|
||||
|
||||
template <typename Tp, typename ContextT>
|
||||
std::array<std::byte, static_buffer_size<Tp>()> static_object<Tp, ContextT>::m_buffer = {};
|
||||
|
||||
template <typename Tp, typename ContextT>
|
||||
template <typename... Args>
|
||||
Tp*&
|
||||
static_object<Tp, ContextT>::construct(Args&&... args)
|
||||
{
|
||||
static auto _once = std::once_flag{};
|
||||
std::call_once(_once, []() {
|
||||
register_static_dtor([]() {
|
||||
if(static_object<Tp, ContextT>::m_object)
|
||||
{
|
||||
static_object<Tp, ContextT>::m_object->~Tp();
|
||||
static_object<Tp, ContextT>::m_object = nullptr;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
LOG_IF(FATAL, m_object)
|
||||
<< "reconstructing static object. Use get() function to retrieve pointer";
|
||||
|
||||
m_object = new(m_buffer.data()) Tp{std::forward<Args>(args)...};
|
||||
return m_object;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
@@ -25,6 +25,8 @@
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
|
||||
@@ -37,6 +39,7 @@
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <regex>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
@@ -51,6 +54,36 @@ namespace agent
|
||||
{
|
||||
namespace
|
||||
{
|
||||
using name_array_t = std::vector<std::pair<size_t, std::unique_ptr<std::string>>>;
|
||||
|
||||
name_array_t*
|
||||
get_string_array()
|
||||
{
|
||||
static auto*& _v = common::static_object<name_array_t>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string*
|
||||
get_string_entry(std::string_view name)
|
||||
{
|
||||
auto _hash_v = std::hash<std::string_view>{}(name);
|
||||
static auto _sync = std::shared_mutex{};
|
||||
if(!get_string_array()) return nullptr;
|
||||
|
||||
{
|
||||
auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }};
|
||||
_sync.lock_shared();
|
||||
for(const auto& itr : *get_string_array())
|
||||
if(itr.first == _hash_v) return itr.second.get();
|
||||
}
|
||||
|
||||
auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }};
|
||||
_sync.lock();
|
||||
return get_string_array()
|
||||
->emplace_back(std::make_pair(_hash_v, std::make_unique<std::string>(name)))
|
||||
.second.get();
|
||||
}
|
||||
|
||||
struct cpu_info
|
||||
{
|
||||
long processor = -1;
|
||||
@@ -371,7 +404,7 @@ read_topology()
|
||||
agent_info.node_id = nodecount++;
|
||||
|
||||
if(!name_prop.empty())
|
||||
agent_info.model_name = strdup(name_prop.front().c_str());
|
||||
agent_info.model_name = get_string_entry(name_prop.front())->c_str();
|
||||
else
|
||||
agent_info.model_name = "";
|
||||
|
||||
@@ -449,9 +482,10 @@ read_topology()
|
||||
auto step = (agent_info.gfx_target_version % 100);
|
||||
|
||||
agent_info.name =
|
||||
strdup(fmt::format("gfx{}{}{:x}", major, minor, step).c_str());
|
||||
agent_info.product_name = strdup(amdgpu_get_marketing_name(device_handle));
|
||||
agent_info.vendor_name = strdup("AMD");
|
||||
get_string_entry(fmt::format("gfx{}{}{:x}", major, minor, step))->c_str();
|
||||
agent_info.product_name =
|
||||
get_string_entry(amdgpu_get_marketing_name(device_handle))->c_str();
|
||||
agent_info.vendor_name = get_string_entry("AMD")->c_str();
|
||||
|
||||
amdgpu_gpu_info gpu_info = {};
|
||||
if(amdgpu_query_gpu_info(device_handle, &gpu_info) == 0)
|
||||
@@ -478,13 +512,13 @@ read_topology()
|
||||
else if(agent_info.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
{
|
||||
agent_info.cu_count = agent_info.cpu_cores_count;
|
||||
agent_info.vendor_name = strdup("CPU");
|
||||
agent_info.vendor_name = get_string_entry("CPU")->c_str();
|
||||
for(const auto& itr : cpu_info_v)
|
||||
{
|
||||
if(agent_info.cpu_core_id_base == itr.apicid)
|
||||
{
|
||||
agent_info.name = strdup(itr.model_name.c_str());
|
||||
agent_info.product_name = strdup(agent_info.name);
|
||||
agent_info.name = get_string_entry(itr.model_name)->c_str();
|
||||
agent_info.product_name = get_string_entry(agent_info.name)->c_str();
|
||||
agent_info.family_id = itr.family;
|
||||
break;
|
||||
}
|
||||
@@ -585,18 +619,9 @@ read_topology()
|
||||
data.emplace_back(new rocprofiler_agent_t{agent_info}, [](rocprofiler_agent_t* ptr) {
|
||||
if(ptr)
|
||||
{
|
||||
auto free_cstring = [](const char*& val) {
|
||||
if(val && ::strnlen(val, 1) > 0) ::free(const_cast<char*>(val));
|
||||
val = "";
|
||||
};
|
||||
|
||||
delete[] ptr->mem_banks;
|
||||
delete[] ptr->caches;
|
||||
delete[] ptr->io_links;
|
||||
free_cstring(ptr->name);
|
||||
free_cstring(ptr->vendor_name);
|
||||
free_cstring(ptr->product_name);
|
||||
free_cstring(ptr->model_name);
|
||||
}
|
||||
delete ptr;
|
||||
});
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/common/container/stable_vector.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
@@ -86,10 +87,11 @@ get_active_contexts_impl()
|
||||
return *_v;
|
||||
}
|
||||
|
||||
auto&
|
||||
auto*&
|
||||
get_correlation_id_map()
|
||||
{
|
||||
static auto _v = common::Synchronized<std::vector<std::unique_ptr<correlation_id>>>{};
|
||||
using data_type = std::vector<std::unique_ptr<correlation_id>>;
|
||||
static auto*& _v = common::static_object<common::Synchronized<data_type>>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -112,9 +114,10 @@ correlation_id*
|
||||
correlation_tracing_service::construct(uint32_t _init_ref_count)
|
||||
{
|
||||
auto _internal_id = get_unique_internal_id();
|
||||
auto& corr_id_map = get_correlation_id_map();
|
||||
auto& ret = corr_id_map.wlock([](auto& data) -> auto& { return data.emplace_back(); });
|
||||
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
|
||||
auto* corr_id_map = get_correlation_id_map();
|
||||
if(!corr_id_map) return nullptr;
|
||||
auto& ret = corr_id_map->wlock([](auto& data) -> auto& { return data.emplace_back(); });
|
||||
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
|
||||
|
||||
get_latest_correlation_id_impl() = ret.get();
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "lib/rocprofiler-sdk/hsa/code_object.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
@@ -63,11 +64,35 @@ using context_t = context::context;
|
||||
using user_data_t = rocprofiler_user_data_t;
|
||||
using context_array_t = context::context_array_t;
|
||||
using context_user_data_map_t = std::unordered_map<const context_t*, user_data_t>;
|
||||
using name_array_t = std::vector<std::pair<size_t, std::unique_ptr<std::string>>>;
|
||||
|
||||
template <typename... Tp>
|
||||
auto
|
||||
consume_args(Tp&&...)
|
||||
{}
|
||||
name_array_t*
|
||||
get_string_array()
|
||||
{
|
||||
static auto*& _v = common::static_object<name_array_t>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
std::string*
|
||||
get_string_entry(std::string_view name)
|
||||
{
|
||||
auto _hash_v = std::hash<std::string_view>{}(name);
|
||||
static auto _sync = std::shared_mutex{};
|
||||
if(!get_string_array()) return nullptr;
|
||||
|
||||
{
|
||||
auto _unlock = common::scope_destructor{[]() { _sync.unlock_shared(); }};
|
||||
_sync.lock_shared();
|
||||
for(const auto& itr : *get_string_array())
|
||||
if(itr.first == _hash_v) return itr.second.get();
|
||||
}
|
||||
|
||||
auto _unlock = common::scope_destructor{[]() { _sync.unlock(); }};
|
||||
_sync.lock();
|
||||
return get_string_array()
|
||||
->emplace_back(std::make_pair(_hash_v, std::make_unique<std::string>(name)))
|
||||
.second.get();
|
||||
}
|
||||
|
||||
hsa_loader_table_t&
|
||||
get_loader_table()
|
||||
@@ -96,7 +121,7 @@ struct kernel_symbol
|
||||
|
||||
bool beg_notified = false;
|
||||
bool end_notified = false;
|
||||
std::string name = {};
|
||||
std::string* name = {};
|
||||
hsa_executable_t hsa_executable = {};
|
||||
hsa_agent_t hsa_agent = {};
|
||||
hsa_executable_symbol_t hsa_symbol = {};
|
||||
@@ -113,13 +138,13 @@ kernel_symbol::operator=(kernel_symbol&& rhs) noexcept
|
||||
{
|
||||
beg_notified = rhs.beg_notified;
|
||||
end_notified = rhs.end_notified;
|
||||
name = std::move(rhs.name);
|
||||
name = rhs.name;
|
||||
hsa_executable = rhs.hsa_executable;
|
||||
hsa_agent = rhs.hsa_agent;
|
||||
hsa_symbol = rhs.hsa_symbol;
|
||||
rocp_data = rhs.rocp_data;
|
||||
user_data = std::move(rhs.user_data);
|
||||
rocp_data.kernel_name = name.c_str();
|
||||
rocp_data.kernel_name = (name) ? name->c_str() : nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
@@ -148,7 +173,7 @@ struct code_object
|
||||
|
||||
bool beg_notified = false;
|
||||
bool end_notified = false;
|
||||
std::string uri = {};
|
||||
std::string* uri = {};
|
||||
hsa_executable_t hsa_executable = {};
|
||||
hsa_loaded_code_object_t hsa_code_object = {};
|
||||
code_object_data_t rocp_data = common::init_public_api_struct(code_object_data_t{});
|
||||
@@ -166,12 +191,12 @@ code_object::operator=(code_object&& rhs) noexcept
|
||||
{
|
||||
beg_notified = rhs.beg_notified;
|
||||
end_notified = rhs.end_notified;
|
||||
uri = std::move(rhs.uri);
|
||||
uri = rhs.uri;
|
||||
hsa_executable = rhs.hsa_executable;
|
||||
hsa_code_object = rhs.hsa_code_object;
|
||||
rocp_data = rhs.rocp_data;
|
||||
user_data = std::move(rhs.user_data);
|
||||
rocp_data.uri = uri.c_str();
|
||||
rocp_data.uri = (uri) ? uri->c_str() : nullptr;
|
||||
symbols = std::move(rhs.symbols);
|
||||
}
|
||||
|
||||
@@ -293,9 +318,9 @@ executable_iterate_agent_symbols_load_callback(hsa_executable_t executabl
|
||||
auto _name = std::string(_name_length + 1, '\0');
|
||||
ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_NAME, _name.data());
|
||||
|
||||
symbol_v.name = _name.substr(0, _name.find_first_of('\0'));
|
||||
symbol_v.name = get_string_entry(_name.substr(0, _name.find_first_of('\0')));
|
||||
}
|
||||
data.kernel_name = symbol_v.name.c_str();
|
||||
data.kernel_name = (symbol_v.name) ? symbol_v.name->c_str() : nullptr;
|
||||
|
||||
// these should all be self-explanatory
|
||||
ROCP_HSA_CORE_GET_EXE_SYMBOL_INFO(HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
@@ -455,9 +480,9 @@ code_object_load_callback(hsa_executable_t executable,
|
||||
ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI,
|
||||
_uri.data());
|
||||
|
||||
code_obj_v.uri = _uri;
|
||||
code_obj_v.uri = get_string_entry(_uri);
|
||||
}
|
||||
data.uri = code_obj_v.uri.data();
|
||||
data.uri = (code_obj_v.uri) ? code_obj_v.uri->data() : nullptr;
|
||||
|
||||
auto _hsa_agent = hsa_agent_t{};
|
||||
ROCP_HSA_VEN_LOADER_GET_CODE_OBJECT_INFO(HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_AGENT,
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "lib/rocprofiler-sdk/hsa/details/ostream.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/types.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/utils.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
@@ -177,6 +178,15 @@ hsa_api_impl<Idx>::functor(Args&&... args)
|
||||
{
|
||||
using info_type = hsa_api_info<Idx>;
|
||||
|
||||
if(registration::get_fini_status() != 0)
|
||||
{
|
||||
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
if constexpr(!std::is_same<decltype(_ret), null_type>::value)
|
||||
return _ret;
|
||||
else
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
struct callback_context_data
|
||||
{
|
||||
const context::context* ctx = nullptr;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/common/container/stable_vector.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk/allocator.hpp"
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
@@ -32,6 +33,8 @@
|
||||
#include "lib/rocprofiler-sdk/internal_threading.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
@@ -162,10 +165,10 @@ execute_creation_notifiers(rocprofiler_runtime_library_t libs,
|
||||
// using thread_pool_vec_t = std::vector<std::unique_ptr<thread_pool_t>>;
|
||||
// using task_group_vec_t = std::vector<std::unique_ptr<task_group_t>>;
|
||||
|
||||
auto&
|
||||
auto*&
|
||||
get_thread_pools()
|
||||
{
|
||||
static auto _v = thread_pool_vec_t{};
|
||||
static auto* _v = common::static_object<thread_pool_vec_t>::construct();
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -225,12 +228,13 @@ create_callback_thread()
|
||||
notify_pre_internal_thread_create(ROCPROFILER_LIBRARY);
|
||||
|
||||
// this will be index after emplace_back
|
||||
auto idx = get_thread_pools().size();
|
||||
auto idx = CHECK_NOTNULL(get_thread_pools())->size();
|
||||
|
||||
thread_pool_config_t pool_config = {};
|
||||
pool_config.pool_size = 1;
|
||||
|
||||
auto& thr_pool = get_thread_pools().emplace_back(std::make_shared<thread_pool_t>(pool_config));
|
||||
auto& thr_pool = CHECK_NOTNULL(get_thread_pools())
|
||||
->emplace_back(std::make_shared<thread_pool_t>(pool_config));
|
||||
|
||||
if(!get_task_groups()) get_task_groups() = new task_group_vec_t{};
|
||||
|
||||
@@ -272,6 +276,9 @@ rocprofiler_at_internal_thread_create(rocprofiler_internal_thread_library_cb_t p
|
||||
rocprofiler_status_t
|
||||
rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
|
||||
{
|
||||
if(rocprofiler::registration::get_init_status() > 0)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
rocprofiler::internal_threading::initialize();
|
||||
|
||||
auto cb_tid = rocprofiler::internal_threading::create_callback_thread();
|
||||
@@ -284,10 +291,13 @@ rocprofiler_create_callback_thread(rocprofiler_callback_thread_t* cb_thread_id)
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_status_t
|
||||
rocprofiler_assign_callback_thread(rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_callback_thread_t cb_thread_id)
|
||||
{
|
||||
if(rocprofiler::registration::get_init_status() > 0)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
if(!rocprofiler::internal_threading::get_task_groups())
|
||||
return ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND;
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@
|
||||
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/allocator.hpp"
|
||||
#include "lib/rocprofiler-sdk/context/context.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/hsa.hpp"
|
||||
@@ -86,10 +86,11 @@ invoke_client_finalizers();
|
||||
// explicitly invoke the finalize function of a specific client
|
||||
void invoke_client_finalizer(rocprofiler_client_id_t);
|
||||
|
||||
auto&
|
||||
auto*
|
||||
get_status()
|
||||
{
|
||||
static auto _v = std::pair<std::atomic<int>, std::atomic<int>>{0, 0};
|
||||
static auto*& _v =
|
||||
common::static_object<std::pair<std::atomic<int>, std::atomic<int>>>::construct(0, 0);
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -151,8 +152,7 @@ struct client_library
|
||||
rocprofiler_client_id_t mutable_client_id = {};
|
||||
};
|
||||
|
||||
using client_library_vec_t =
|
||||
std::vector<client_library, allocator::static_data_allocator<client_library>>;
|
||||
using client_library_vec_t = std::vector<client_library>;
|
||||
|
||||
client_library_vec_t
|
||||
find_clients()
|
||||
@@ -304,10 +304,10 @@ find_clients()
|
||||
return data;
|
||||
}
|
||||
|
||||
client_library_vec_t&
|
||||
client_library_vec_t*
|
||||
get_clients()
|
||||
{
|
||||
static auto _v = find_clients();
|
||||
static auto*& _v = common::static_object<client_library_vec_t>::construct(find_clients());
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -332,7 +332,9 @@ invoke_client_configures()
|
||||
|
||||
LOG(ERROR) << __FUNCTION__;
|
||||
|
||||
for(auto& itr : get_clients())
|
||||
if(!get_clients()) return false;
|
||||
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(!itr.configure_func)
|
||||
{
|
||||
@@ -392,7 +394,9 @@ invoke_client_initializers()
|
||||
|
||||
LOG(ERROR) << __FUNCTION__;
|
||||
|
||||
for(auto& itr : get_clients())
|
||||
if(!get_clients()) return false;
|
||||
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(itr.configure_result && itr.configure_result->initialize)
|
||||
{
|
||||
@@ -420,7 +424,11 @@ invoke_client_finalizers()
|
||||
if(_lk.owns_lock()) return false;
|
||||
_lk.lock();
|
||||
|
||||
for(auto& itr : get_clients())
|
||||
LOG(ERROR) << __FUNCTION__;
|
||||
|
||||
if(!get_clients()) return false;
|
||||
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(itr.configure_result && itr.configure_result->finalize)
|
||||
{
|
||||
@@ -441,7 +449,11 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
|
||||
if(_lk.owns_lock()) return;
|
||||
_lk.lock();
|
||||
|
||||
for(auto& itr : get_clients())
|
||||
LOG(ERROR) << __FUNCTION__;
|
||||
|
||||
if(!get_clients()) return;
|
||||
|
||||
for(auto& itr : *get_clients())
|
||||
{
|
||||
if(itr.internal_client_id.handle == client_id.handle &&
|
||||
itr.mutable_client_id.handle == client_id.handle)
|
||||
@@ -499,25 +511,25 @@ get_client_offset()
|
||||
int
|
||||
get_init_status()
|
||||
{
|
||||
return get_status().first.load(std::memory_order_acquire);
|
||||
return (get_status()) ? get_status()->first.load(std::memory_order_acquire) : 1;
|
||||
}
|
||||
|
||||
int
|
||||
get_fini_status()
|
||||
{
|
||||
return get_status().second.load(std::memory_order_acquire);
|
||||
return (get_status()) ? get_status()->second.load(std::memory_order_acquire) : 1;
|
||||
}
|
||||
|
||||
void
|
||||
set_init_status(int v)
|
||||
{
|
||||
get_status().first.store(v, std::memory_order_release);
|
||||
if(get_status()) get_status()->first.store(v, std::memory_order_release);
|
||||
}
|
||||
|
||||
void
|
||||
set_fini_status(int v)
|
||||
{
|
||||
get_status().second.store(v, std::memory_order_release);
|
||||
if(get_status()) get_status()->second.store(v, std::memory_order_release);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -529,7 +541,10 @@ initialize()
|
||||
std::call_once(_once, []() {
|
||||
// initialization is in process
|
||||
set_init_status(-1);
|
||||
std::atexit(&finalize);
|
||||
std::atexit([]() {
|
||||
finalize();
|
||||
common::destroy_static_objects();
|
||||
});
|
||||
init_logging();
|
||||
invoke_client_configures();
|
||||
invoke_client_initializers();
|
||||
@@ -630,6 +645,8 @@ rocprofiler_set_api_table(const char* name,
|
||||
// pass to hip init
|
||||
LOG_IF(ERROR, num_tables > 1)
|
||||
<< " rocprofiler expected HIP library to pass 1 API table, not " << num_tables;
|
||||
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
else if(std::string_view{name} == "hsa")
|
||||
{
|
||||
@@ -667,11 +684,14 @@ rocprofiler_set_api_table(const char* name,
|
||||
// pass to roctx init
|
||||
LOG_IF(ERROR, num_tables > 1)
|
||||
<< " rocprofiler expected ROCTX library to pass 1 API table, not " << num_tables;
|
||||
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(ERROR) << "rocprofiler does not accept API tables from " << name;
|
||||
LOG_ASSERT(false) << " rocprofiler does not accept API tables from " << name;
|
||||
|
||||
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
(void) lib_version;
|
||||
|
||||
Ссылка в новой задаче
Block a user