Add caching of category region for rocpd (#1420)

* Add caching of category region

Fix vaapi traces

Remove region_with_name

* Applied suggestions from code review
Bu işleme şunda yer alıyor:
Milan Radosavljevic
2025-10-20 22:05:14 +02:00
işlemeyi yapan: GitHub
ebeveyn 4a1a4aa472
işleme 48fdcebf62
7 değiştirilmiş dosya ile 102 ekleme ve 19 silme
+1
Dosyayı Görüntüle
@@ -9,6 +9,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
### Added
- Added a `ROCPROFSYS_PERFETTO_FLUSH_PERIOD_MS` configuration setting to set the flush period for Perfetto traces. The default value is 10000 ms (10 seconds).
- Added tracing of VAAPI, MPI and host events inside `rocpd` output database.
## ROCm Systems Profiler 1.2.0 for ROCm 7.1.0
-2
Dosyayı Görüntüle
@@ -96,8 +96,6 @@ buffer_storage::buffer_storage(pid_t _pid)
}
};
ROCPROFSYS_DEBUG("Starting buffered storage flushing thread for pid %d",
static_cast<int>(_pid));
m_created_process = _pid;
std::mutex _shutdown_condition_mutex;
while(m_running)
+6 -6
Dosyayı Görüntüle
@@ -279,6 +279,11 @@ rocpd_post_processing::get_region_callback() const
return tokens;
};
if(arg_str.empty())
{
return args;
}
auto tokens = split(arg_str, delimiter);
// Ensure the number of tokens is a multiple of 4
@@ -307,12 +312,7 @@ rocpd_post_processing::get_region_callback() const
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_rs.thread_id);
auto callback_tracing_info = m_metadata.get_callback_tracing_info();
auto _name = std::string{ callback_tracing_info.at(
static_cast<rocprofiler_callback_tracing_kind_t>(_rs.kind),
static_cast<rocprofiler_tracing_operation_t>(_rs.operation)) };
auto name_primary_key = data_processor.insert_string(_name.c_str());
auto name_primary_key = data_processor.insert_string(_rs.name.c_str());
auto category_primary_key = data_processor.insert_string(_rs.category.c_str());
size_t stack_id = _rs.correlation_id_internal;
+4 -7
Dosyayı Görüntüle
@@ -131,13 +131,12 @@ struct memory_allocate_sample : storage_parsed_type_base
struct region_sample : storage_parsed_type_base
{
region_sample() = default;
region_sample(uint64_t _thread_id, int32_t _kind, int32_t _operation,
region_sample(uint64_t _thread_id, std::string _name,
uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor,
uint64_t _start_timestamp, uint64_t _end_timestamp,
std::string _call_stack, std::string _args_str, std::string _category)
: thread_id(_thread_id)
, kind(_kind)
, operation(_operation)
, name(std::move(_name))
, correlation_id_internal(_correlation_id_internal)
, correlation_id_ancestor(_correlation_id_ancestor)
, start_timestamp(_start_timestamp)
@@ -147,10 +146,8 @@ struct region_sample : storage_parsed_type_base
, category(std::move(_category))
{}
// Identification fields
uint64_t thread_id;
int32_t kind;
int32_t operation;
uint64_t thread_id;
std::string name;
// Correlation fields
uint64_t correlation_id_internal;
+1 -2
Dosyayı Görüntüle
@@ -164,8 +164,7 @@ storage_parser::consume_storage()
case entry_type::region:
{
region_sample _region_sample;
parse_data(sample.data(), _region_sample.thread_id, _region_sample.kind,
_region_sample.operation,
parse_data(sample.data(), _region_sample.thread_id, _region_sample.name,
_region_sample.correlation_id_internal,
_region_sample.correlation_id_ancestor,
_region_sample.start_timestamp, _region_sample.end_timestamp,
@@ -26,11 +26,15 @@
#include "core/defines.hpp"
#include "core/state.hpp"
#include "core/timemory.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "library/causal/data.hpp"
#include "library/runtime.hpp"
#include "library/thread_info.hpp"
#include "library/tracing.hpp"
#include "library/tracing/annotation.hpp"
#include <map>
#include <thread>
#include <timemory/components/gotcha/backends.hpp>
#include <timemory/hash/types.hpp>
#include <timemory/mpl/concepts.hpp>
@@ -38,6 +42,83 @@
#include <timemory/utility/types.hpp>
#include <string_view>
#include <utility>
namespace
{
void
cache_region(uint64_t thread_id, const std::string& name, uint64_t start_ts,
uint64_t end_ts, const std::string& category)
{
constexpr size_t NO_CORRELATION_ID = 0;
constexpr const char* CALLSTACK = "";
constexpr const char* ARGUMENTS = "";
rocprofsys::trace_cache::get_buffer_storage().store(
rocprofsys::trace_cache::entry_type::region, thread_id, name.c_str(),
NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts, end_ts, CALLSTACK, ARGUMENTS,
category.c_str());
}
struct entry_key
{
std::string name;
std::string category;
friend bool operator<(const entry_key& lhs, const entry_key& rhs)
{
if(lhs.name != rhs.name)
{
return lhs.name < rhs.name;
}
return lhs.category < rhs.category;
}
};
using timestamp_t = uint64_t;
thread_local std::map<entry_key, timestamp_t> map_name_to_args;
template <typename CategoryT, typename... Args>
void
cache_start(const char* name)
{
const auto start_ts =
static_cast<timestamp_t>(rocprofsys::comp::wall_clock::record());
map_name_to_args[{ name, rocprofsys::trait::name<CategoryT>::value }] = start_ts;
}
template <typename CategoryT>
void
cache_stop(const char* name)
{
entry_key key{ name, rocprofsys::trait::name<CategoryT>::value };
auto x = map_name_to_args.find(key);
if(x != map_name_to_args.end())
{
map_name_to_args.erase(key);
auto timestamp = x->second;
const auto end_ts =
static_cast<timestamp_t>(rocprofsys::comp::wall_clock::record());
uint64_t thread_id = 0;
const auto& extended_info =
rocprofsys::thread_info::get(std::this_thread::get_id());
if(extended_info.has_value() && extended_info->index_data.has_value())
{
constexpr size_t UNKNOWN_TIME = 0;
thread_id = extended_info->index_data->system_value;
rocprofsys::trace_cache::get_metadata_registry().add_thread_info(
{ getppid(), getpid(), thread_id, UNKNOWN_TIME, UNKNOWN_TIME, "{}" });
}
cache_region(thread_id, name, timestamp, end_ts,
rocprofsys::trait::name<CategoryT>::value);
}
}
} // namespace
namespace tim
{
@@ -192,6 +273,8 @@ category_region<CategoryT>::start(std::string_view name, Args&&... args)
tracing::push_perfetto(CategoryT{}, name.data(), std::forward<Args>(args)...);
}
}
cache_start<CategoryT>(name.data());
}
template <typename CategoryT>
@@ -257,6 +340,8 @@ category_region<CategoryT>::stop(std::string_view name, Args&&... args)
if(get_use_causal()) causal::pop_progress_point(name);
}
}
cache_stop<CategoryT>(name.data());
}
else
{
+5 -2
Dosyayı Görüntüle
@@ -551,11 +551,14 @@ cache_region(const rocprofiler_callback_tracing_record_t* record,
const std::string& args_str, const std::string& category)
{
auto callback_tracing_info =
trace_cache::get_metadata_registry().get_callback_tracing_info();
auto _name = std::string{ callback_tracing_info.at(record->kind, record->operation) };
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::region,
record->thread_id,
static_cast<int32_t>(record->kind),
static_cast<int32_t>(record->operation),
_name.c_str(),
record->correlation_id.internal,
get_parent_stack_id(record->correlation_id),
start_timestamp,