Add caching of category region for rocpd (#1420)
* Add caching of category region Fix vaapi traces Remove region_with_name * Applied suggestions from code review
Bu işleme şunda yer alıyor:
işlemeyi yapan:
GitHub
ebeveyn
4a1a4aa472
işleme
48fdcebf62
@@ -9,6 +9,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
|
||||
### Added
|
||||
|
||||
- Added a `ROCPROFSYS_PERFETTO_FLUSH_PERIOD_MS` configuration setting to set the flush period for Perfetto traces. The default value is 10000 ms (10 seconds).
|
||||
- Added tracing of VAAPI, MPI and host events inside `rocpd` output database.
|
||||
|
||||
## ROCm Systems Profiler 1.2.0 for ROCm 7.1.0
|
||||
|
||||
|
||||
@@ -96,8 +96,6 @@ buffer_storage::buffer_storage(pid_t _pid)
|
||||
}
|
||||
};
|
||||
|
||||
ROCPROFSYS_DEBUG("Starting buffered storage flushing thread for pid %d",
|
||||
static_cast<int>(_pid));
|
||||
m_created_process = _pid;
|
||||
std::mutex _shutdown_condition_mutex;
|
||||
while(m_running)
|
||||
|
||||
+6
-6
@@ -279,6 +279,11 @@ rocpd_post_processing::get_region_callback() const
|
||||
return tokens;
|
||||
};
|
||||
|
||||
if(arg_str.empty())
|
||||
{
|
||||
return args;
|
||||
}
|
||||
|
||||
auto tokens = split(arg_str, delimiter);
|
||||
|
||||
// Ensure the number of tokens is a multiple of 4
|
||||
@@ -307,12 +312,7 @@ rocpd_post_processing::get_region_callback() const
|
||||
auto thread_primary_key =
|
||||
data_processor.map_thread_id_to_primary_key(_rs.thread_id);
|
||||
|
||||
auto callback_tracing_info = m_metadata.get_callback_tracing_info();
|
||||
auto _name = std::string{ callback_tracing_info.at(
|
||||
static_cast<rocprofiler_callback_tracing_kind_t>(_rs.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_rs.operation)) };
|
||||
auto name_primary_key = data_processor.insert_string(_name.c_str());
|
||||
|
||||
auto name_primary_key = data_processor.insert_string(_rs.name.c_str());
|
||||
auto category_primary_key = data_processor.insert_string(_rs.category.c_str());
|
||||
|
||||
size_t stack_id = _rs.correlation_id_internal;
|
||||
|
||||
@@ -131,13 +131,12 @@ struct memory_allocate_sample : storage_parsed_type_base
|
||||
struct region_sample : storage_parsed_type_base
|
||||
{
|
||||
region_sample() = default;
|
||||
region_sample(uint64_t _thread_id, int32_t _kind, int32_t _operation,
|
||||
region_sample(uint64_t _thread_id, std::string _name,
|
||||
uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor,
|
||||
uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||
std::string _call_stack, std::string _args_str, std::string _category)
|
||||
: thread_id(_thread_id)
|
||||
, kind(_kind)
|
||||
, operation(_operation)
|
||||
, name(std::move(_name))
|
||||
, correlation_id_internal(_correlation_id_internal)
|
||||
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||
, start_timestamp(_start_timestamp)
|
||||
@@ -147,10 +146,8 @@ struct region_sample : storage_parsed_type_base
|
||||
, category(std::move(_category))
|
||||
{}
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
uint64_t thread_id;
|
||||
std::string name;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
|
||||
@@ -164,8 +164,7 @@ storage_parser::consume_storage()
|
||||
case entry_type::region:
|
||||
{
|
||||
region_sample _region_sample;
|
||||
parse_data(sample.data(), _region_sample.thread_id, _region_sample.kind,
|
||||
_region_sample.operation,
|
||||
parse_data(sample.data(), _region_sample.thread_id, _region_sample.name,
|
||||
_region_sample.correlation_id_internal,
|
||||
_region_sample.correlation_id_ancestor,
|
||||
_region_sample.start_timestamp, _region_sample.end_timestamp,
|
||||
|
||||
@@ -26,11 +26,15 @@
|
||||
#include "core/defines.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "library/causal/data.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
#include "library/tracing/annotation.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <timemory/components/gotcha/backends.hpp>
|
||||
#include <timemory/hash/types.hpp>
|
||||
#include <timemory/mpl/concepts.hpp>
|
||||
@@ -38,6 +42,83 @@
|
||||
#include <timemory/utility/types.hpp>
|
||||
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void
|
||||
cache_region(uint64_t thread_id, const std::string& name, uint64_t start_ts,
|
||||
uint64_t end_ts, const std::string& category)
|
||||
{
|
||||
constexpr size_t NO_CORRELATION_ID = 0;
|
||||
constexpr const char* CALLSTACK = "";
|
||||
constexpr const char* ARGUMENTS = "";
|
||||
rocprofsys::trace_cache::get_buffer_storage().store(
|
||||
rocprofsys::trace_cache::entry_type::region, thread_id, name.c_str(),
|
||||
NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts, end_ts, CALLSTACK, ARGUMENTS,
|
||||
category.c_str());
|
||||
}
|
||||
|
||||
struct entry_key
|
||||
{
|
||||
std::string name;
|
||||
std::string category;
|
||||
|
||||
friend bool operator<(const entry_key& lhs, const entry_key& rhs)
|
||||
{
|
||||
if(lhs.name != rhs.name)
|
||||
{
|
||||
return lhs.name < rhs.name;
|
||||
}
|
||||
|
||||
return lhs.category < rhs.category;
|
||||
}
|
||||
};
|
||||
|
||||
using timestamp_t = uint64_t;
|
||||
|
||||
thread_local std::map<entry_key, timestamp_t> map_name_to_args;
|
||||
|
||||
template <typename CategoryT, typename... Args>
|
||||
void
|
||||
cache_start(const char* name)
|
||||
{
|
||||
const auto start_ts =
|
||||
static_cast<timestamp_t>(rocprofsys::comp::wall_clock::record());
|
||||
map_name_to_args[{ name, rocprofsys::trait::name<CategoryT>::value }] = start_ts;
|
||||
}
|
||||
|
||||
template <typename CategoryT>
|
||||
void
|
||||
cache_stop(const char* name)
|
||||
{
|
||||
entry_key key{ name, rocprofsys::trait::name<CategoryT>::value };
|
||||
auto x = map_name_to_args.find(key);
|
||||
if(x != map_name_to_args.end())
|
||||
{
|
||||
map_name_to_args.erase(key);
|
||||
auto timestamp = x->second;
|
||||
|
||||
const auto end_ts =
|
||||
static_cast<timestamp_t>(rocprofsys::comp::wall_clock::record());
|
||||
uint64_t thread_id = 0;
|
||||
|
||||
const auto& extended_info =
|
||||
rocprofsys::thread_info::get(std::this_thread::get_id());
|
||||
if(extended_info.has_value() && extended_info->index_data.has_value())
|
||||
{
|
||||
constexpr size_t UNKNOWN_TIME = 0;
|
||||
thread_id = extended_info->index_data->system_value;
|
||||
rocprofsys::trace_cache::get_metadata_registry().add_thread_info(
|
||||
{ getppid(), getpid(), thread_id, UNKNOWN_TIME, UNKNOWN_TIME, "{}" });
|
||||
}
|
||||
|
||||
cache_region(thread_id, name, timestamp, end_ts,
|
||||
rocprofsys::trait::name<CategoryT>::value);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace tim
|
||||
{
|
||||
@@ -192,6 +273,8 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
|
||||
tracing::push_perfetto(CategoryT{}, name.data(), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
||||
cache_start<CategoryT>(name.data());
|
||||
}
|
||||
|
||||
template <typename CategoryT>
|
||||
@@ -257,6 +340,8 @@ category_region<CategoryT>::stop(std::string_view name, Args&&... args)
|
||||
if(get_use_causal()) causal::pop_progress_point(name);
|
||||
}
|
||||
}
|
||||
|
||||
cache_stop<CategoryT>(name.data());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
+5
-2
@@ -551,11 +551,14 @@ cache_region(const rocprofiler_callback_tracing_record_t* record,
|
||||
const std::string& args_str, const std::string& category)
|
||||
|
||||
{
|
||||
auto callback_tracing_info =
|
||||
trace_cache::get_metadata_registry().get_callback_tracing_info();
|
||||
auto _name = std::string{ callback_tracing_info.at(record->kind, record->operation) };
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::region,
|
||||
record->thread_id,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
_name.c_str(),
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
start_timestamp,
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle