// MIT License // // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "library/components/backtrace_metrics.hpp" #include "core/common.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/perfetto.hpp" #include "core/trace_cache/cache_manager.hpp" #include "core/trace_cache/cacheable.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "library/components/ensure_storage.hpp" #include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" #include "library/tracing.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "logger/debug.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include namespace tracing { using namespace ::rocprofsys::tracing; } namespace rocprofsys { namespace component { using hw_counters = typename backtrace_metrics::hw_counters; using signal_type_instances = thread_data, category::sampling>; using backtrace_metrics_init_instances = thread_data; using sampler_running_instances = thread_data; using papi_vector_instances = thread_data; using papi_label_instances = thread_data, category::sampling>; namespace { struct perfetto_rusage {}; unique_ptr_t>& get_papi_labels(int64_t _tid) { return papi_label_instances::instance(construct_on_thread{ _tid }); } unique_ptr_t& get_papi_vector(int64_t _tid) { return papi_vector_instances::instance(construct_on_thread{ _tid }); } unique_ptr_t& get_backtrace_metrics_init(int64_t _tid) { return backtrace_metrics_init_instances::instance(construct_on_thread{ _tid }); } unique_ptr_t& get_sampler_running(int64_t _tid) { return sampler_running_instances::instance(construct_on_thread{ _tid }, false); } } // namespace std::string backtrace_metrics::label() { return "backtrace_metrics"; } std::string backtrace_metrics::description() { return "Records sampling data"; } std::vector backtrace_metrics::get_hw_counter_labels(int64_t _tid) { auto& _v = get_papi_labels(_tid); return (_v) ? *_v : std::vector{}; } void backtrace_metrics::start() {} void backtrace_metrics::stop() {} namespace { template auto get_enabled(tim::type_list) { constexpr size_t N = sizeof...(Tp); auto _v = std::bitset{}; size_t _n = 0; (_v.set(_n++, trait::runtime_enabled::get()), ...); return _v; } void metadata_init_categories() { static bool _is_initialized = false; if(_is_initialized) return; trace_cache::get_metadata_registry().add_string( trait::name::value); trace_cache::get_metadata_registry().add_string( trait::name::value); trace_cache::get_metadata_registry().add_string( trait::name::value); trace_cache::get_metadata_registry().add_string( trait::name::value); trace_cache::get_metadata_registry().add_string( trait::name::value); _is_initialized = true; } template void apply_for_all_thread_names(int64_t _tid, std::function _apply) { if constexpr(std::is_same_v) { auto _hw_cnt_labels = *get_papi_labels(_tid); for(auto& itr : _hw_cnt_labels) { std::string _desc = tim::papi::get_event_info(itr).short_descr; if(_desc.empty()) _desc = itr; if(get_is_continuous_integration() && _desc.empty()) { throw std::runtime_error( fmt::format("Empty description for {}", itr.c_str())); } std::stringstream track_name_ss; track_name_ss << "Thread " << _desc << " [" << _tid << "] (S)"; _apply(track_name_ss.str()); } } else { std::stringstream track_name_ss; track_name_ss << trait::name::value << " [" << _tid << "]"; _apply(track_name_ss.str()); } } template void metadata_init_tracks(int64_t _tid) { const auto& t_info = thread_info::get(_tid, SequentTID); auto thread_id = static_cast(t_info->index_data->system_value); trace_cache::get_metadata_registry().add_thread_info( { getppid(), getpid(), thread_id, static_cast(t_info->get_start()), static_cast(t_info->get_stop()), "{}" }); apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { trace_cache::get_metadata_registry().add_track({ _track_name, thread_id, "{}" }); }); } template void metadata_initialize_backtrace_metrics_pmc(size_t dev_id, const char* _units, int64_t _tid) { constexpr size_t EVENT_CODE = 0; constexpr size_t INSTANCE_ID = 0; const char* LONG_DESCRIPTION = ""; const char* COMPONENT = ""; const char* BLOCK = ""; const char* EXPRESSION = ""; const char* TARGET_ARCH = "CPU"; apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, _track_name, trait::name::value, trait::name::description, LONG_DESCRIPTION, COMPONENT, _units, trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0, "{}" }); }); } template void cache_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp_ns, Value value, int64_t _tid) { auto _tid_name = fmt::format("[{}]", _tid); size_t stack_id = 0; size_t parent_stack_id = 0; size_t correlation_id = 0; const auto* event_metadata = ""; const auto* call_stack = ""; const auto* line_info = ""; auto insert_event_and_sample = [&](const char* _track_name, double _value) { trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{ static_cast(category_enum_id::value), _track_name, timestamp_ns, event_metadata, stack_id, parent_stack_id, correlation_id, call_stack, line_info, device_id, static_cast(agent_type::CPU), _track_name, _value }); }; if constexpr(std::is_same_v) { const auto& hw_counters = static_cast(value); size_t idx = 0; apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { if(idx < hw_counters.size()) { insert_event_and_sample(_track_name.c_str(), hw_counters.at(idx)); } ++idx; }); } else { apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { insert_event_and_sample(_track_name.c_str(), value); }); } } } // namespace void backtrace_metrics::sample(int) { if(!get_enabled(type_list{}).all()) { m_valid.reset(); return; } m_valid = get_enabled(categories_t{}); // return if everything is disabled if(!m_valid.any()) return; auto _cache = tim::rusage_cache{ RUSAGE_THREAD }; m_cpu = tim::get_clock_thread_now(); m_mem_peak = _cache.get_peak_rss(); m_ctx_swch = _cache.get_num_priority_context_switch() + _cache.get_num_voluntary_context_switch(); m_page_flt = _cache.get_num_major_page_faults() + _cache.get_num_minor_page_faults(); if constexpr(tim::trait::is_available::value) { constexpr auto hw_counters_idx = tim::index_of::value; constexpr auto hw_category_idx = tim::index_of::value; auto _tid = threading::get_id(); if(m_valid.test(hw_category_idx) && m_valid.test(hw_counters_idx)) { assert(get_papi_vector(_tid).get() != nullptr); m_hw_counter = get_papi_vector(_tid)->record(); } } } void backtrace_metrics::configure(bool _setup, int64_t _tid) { auto& _running = get_sampler_running(_tid); bool _is_running = (!_running) ? false : *_running; ensure_storage{}(); if(_setup && !_is_running) { (void) get_debug_sampling(); // make sure query in sampler does not allocate assert(_tid == threading::get_id()); if constexpr(tim::trait::is_available::value) { perfetto_counter_track::init(); LOG_DEBUG("HW COUNTER: starting..."); if(get_papi_vector(_tid)) { get_papi_vector(_tid)->start(); *get_papi_labels(_tid) = get_papi_vector(_tid)->get_config()->labels; } } } else if(!_setup && _is_running) { LOG_DEBUG("Destroying sampler for thread {}...", _tid); *_running = false; if constexpr(tim::trait::is_available::value) { if(_tid == threading::get_id()) { if(get_papi_vector(_tid)) get_papi_vector(_tid)->stop(); LOG_DEBUG("HW COUNTER: stopped..."); } } LOG_DEBUG("Sampler destroyed for thread {}...", _tid); } } void backtrace_metrics::init_perfetto(int64_t _tid, valid_array_t _valid) { auto _hw_cnt_labels = *get_papi_labels(_tid); auto _tid_name = fmt::format("[{}]", _tid); if(!perfetto_counter_track::exists(_tid)) { if(get_valid(category::thread_cpu_time{}, _valid)) perfetto_counter_track::emplace( _tid, fmt::format("Thread CPU time {} (S)", _tid_name), "sec"); if(get_valid(category::thread_peak_memory{}, _valid)) perfetto_counter_track::emplace( _tid, fmt::format("Thread Peak Memory Usage {} (S)", _tid_name), "MB"); if(get_valid(category::thread_context_switch{}, _valid)) perfetto_counter_track::emplace( _tid, fmt::format("Thread Context Switches {} (S)", _tid_name)); if(get_valid(category::thread_page_fault{}, _valid)) perfetto_counter_track::emplace( _tid, fmt::format("Thread Page Faults {} (S)", _tid_name)); } if(!perfetto_counter_track::exists(_tid) && get_valid(type_list{}, _valid) && get_valid(category::thread_hardware_counter{}, _valid)) { for(auto& itr : _hw_cnt_labels) { std::string _desc = tim::papi::get_event_info(itr).short_descr; if(_desc.empty()) _desc = itr; if(get_is_continuous_integration() && _desc.empty()) { throw std::runtime_error( fmt::format("Empty description for {}", itr.c_str())); } perfetto_counter_track::emplace( _tid, fmt::format("Thread {} {} (S)", _desc, _tid_name)); } } } void backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid) { auto _hw_cnt_labels = *get_papi_labels(_tid); const auto& _thread_info = thread_info::get(_tid, SequentTID); if(get_is_continuous_integration() && !_thread_info) { throw std::runtime_error( fmt::format("Error! missing thread info for tid={}", _tid)); } if(!_thread_info) return; uint64_t _ts = _thread_info->get_stop(); uint64_t _rusage_idx = 0; if(get_valid(category::thread_cpu_time{}, _valid)) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, 0); } if(get_valid(category::thread_peak_memory{}, _valid)) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, 0); } if(get_valid(category::thread_context_switch{}, _valid)) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, 0); } if(get_valid(category::thread_page_fault{}, _valid)) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, 0); } if(get_valid(type_list{}, _valid) && get_valid(category::thread_hardware_counter{}, _valid)) { for(size_t i = 0; i < perfetto_counter_track::size(_tid); ++i) { if(i < _hw_cnt_labels.size()) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, i), _ts, 0.0); } } } } void backtrace_metrics::init_cache(int64_t _tid, valid_array_t _valid) { metadata_init_categories(); if(get_valid(category::thread_cpu_time{}, _valid)) { metadata_init_tracks(_tid); metadata_initialize_backtrace_metrics_pmc(0, "sec", _tid); } if(get_valid(category::thread_peak_memory{}, _valid)) { metadata_init_tracks(_tid); metadata_initialize_backtrace_metrics_pmc(0, "MB", _tid); } if(get_valid(category::thread_context_switch{}, _valid)) { metadata_init_tracks(_tid); metadata_initialize_backtrace_metrics_pmc(0, "", _tid); } if(get_valid(category::thread_page_fault{}, _valid)) { metadata_init_tracks(_tid); metadata_initialize_backtrace_metrics_pmc(0, "", _tid); } if(get_valid(type_list{}, _valid) && get_valid(category::thread_hardware_counter{}, _valid)) { metadata_init_tracks(_tid); metadata_initialize_backtrace_metrics_pmc( 0, "", _tid); } } backtrace_metrics& backtrace_metrics::operator-=(const backtrace_metrics& _rhs) { auto& _lhs = *this; if(_lhs(category::thread_cpu_time{})) { _lhs.m_cpu -= _rhs.m_cpu; } if(_lhs(category::thread_peak_memory{})) { _lhs.m_mem_peak -= _rhs.m_mem_peak; } if(_lhs(category::thread_context_switch{})) { _lhs.m_ctx_swch -= _rhs.m_ctx_swch; } if(_lhs(category::thread_page_fault{})) { _lhs.m_page_flt -= _rhs.m_page_flt; } if(_lhs(type_list{}) && _lhs(category::thread_hardware_counter{})) { for(size_t i = 0; i < _lhs.m_hw_counter.size(); ++i) _lhs.m_hw_counter.at(i) -= _rhs.m_hw_counter.at(i); } return _lhs; } void backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const { uint64_t _rusage_idx = 0; if((*this)(category::thread_cpu_time{})) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, m_cpu / units::sec); } if((*this)(category::thread_peak_memory{})) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, m_mem_peak / units::megabyte); } if((*this)(category::thread_context_switch{})) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, m_ctx_swch); } if((*this)(category::thread_page_fault{})) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, _rusage_idx++), _ts, m_page_flt); } if((*this)(type_list{}) && (*this)(category::thread_hardware_counter{})) { for(size_t i = 0; i < perfetto_counter_track::size(_tid); ++i) { if(i < m_hw_counter.size()) { TRACE_COUNTER(trait::name::value, perfetto_counter_track::at(_tid, i), _ts, m_hw_counter.at(i)); } } } } void backtrace_metrics::cache_backtrace_data([[maybe_unused]] int64_t _tid, [[maybe_unused]] uint64_t _ts) const { #if ROCPROFSYS_USE_ROCM > 0 auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); }; if(is_category_enabled(category::thread_cpu_time{})) { cache_backtrace_metrics_events( 0, _ts, m_cpu / units::sec, _tid); } if(is_category_enabled(category::thread_peak_memory{})) { cache_backtrace_metrics_events( 0, _ts, m_mem_peak / units::megabyte, _tid); } if(is_category_enabled(category::thread_context_switch{})) { cache_backtrace_metrics_events( 0, _ts, m_ctx_swch, _tid); } if(is_category_enabled(category::thread_page_fault{})) { cache_backtrace_metrics_events( 0, _ts, m_page_flt, _tid); } if(is_category_enabled(type_list{}) && is_category_enabled(category::thread_hardware_counter{})) { cache_backtrace_metrics_events(0, _ts, m_hw_counter, _tid); } #endif } } // namespace component } // namespace rocprofsys ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, double) ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, double) ROCPROFSYS_INSTANTIATE_EXTERN_COMPONENT( TIMEMORY_ESC(data_tracker), true, double) TIMEMORY_INITIALIZE_STORAGE(rocprofsys::component::backtrace_metrics)