// MIT License // // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "library/cpu_freq.hpp" #include "core/agent.hpp" #include "core/agent_manager.hpp" #include "core/common.hpp" #include "core/config.hpp" #include "core/node_info.hpp" #include "core/perfetto.hpp" #include "core/timemory.hpp" #include "core/trace_cache/cache_manager.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "core/trace_cache/sample_type.hpp" #include "library/components/cpu_freq.hpp" #include "library/thread_info.hpp" #include #include #include #include #include #include "logger/debug.hpp" #include #include #include #include #include #include #include #include namespace rocprofsys { namespace cpu_freq { template using type_list = tim::type_list; namespace { using cpu_data_tuple_t = std::tuple; std::deque data = {}; template void init_perfetto_counter_tracks(type_list) { (perfetto_counter_track::init(), ...); } template void do_for_enabled_cpus(Func&& func) { const auto& enabled_cpus = component::cpu_freq::get_enabled_cpus(); for(const auto& cpu : enabled_cpus) { func(cpu); } } void metadata_initialize_cpu_freq_category() { trace_cache::get_metadata_registry().add_string( trait::name::value); } void metadata_initialize_cpu_freq_tracks() { do_for_enabled_cpus([&](size_t cpu_id) { trace_cache::get_metadata_registry().add_track( { trace_cache::info::annotate_with_device_id(cpu_id) .c_str(), std::nullopt, "{}" }); }); } void metadata_initialize_cpu_usage_tracks() { trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); trace_cache::get_metadata_registry().add_track( { trait::name::value, std::nullopt, "{}" }); } void metadata_initialize_cpu_freq_pmc(size_t dev_id) { // TODO: Find the proper values for a following definitions size_t EVENT_CODE = 0; size_t INSTANCE_ID = 0; const char* LONG_DESCRIPTION = ""; const char* COMPONENT = ""; const char* BLOCK = ""; const char* EXPRESSION = ""; const char* MEMORY = "MB"; const char* TIME = "sec"; auto ni = node_info::get_instance(); const auto* TARGET_ARCH = "CPU"; do_for_enabled_cpus([&](size_t cpu_id) { trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trace_cache::info::annotate_with_device_id(cpu_id) .c_str(), "Frequency", trait::name::description, LONG_DESCRIPTION, COMPONENT, component::cpu_freq::display_unit().c_str(), rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Memory Usage", trait::name::description, LONG_DESCRIPTION, COMPONENT, MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Virtual Memory Usage", trait::name::description, LONG_DESCRIPTION, COMPONENT, MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Peak Memory", trait::name::description, LONG_DESCRIPTION, COMPONENT, MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Context Switches", trait::name::description, LONG_DESCRIPTION, COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Page Faults", trait::name::description, LONG_DESCRIPTION, COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "User Time", trait::name::description, LONG_DESCRIPTION, COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); trace_cache::get_metadata_registry().add_pmc_info( { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, trait::name::value, "Kernel Time", trait::name::description, LONG_DESCRIPTION, COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); } std::vector serialize_freqs(const component::cpu_freq& freq) { constexpr size_t idx_elements = sizeof(size_t) / sizeof(uint8_t); constexpr size_t value_elements = sizeof(float) / sizeof(uint8_t); std::vector result; const auto enabled_cpus_size = component::cpu_freq::get_enabled_cpus().size(); const auto result_size = enabled_cpus_size * (idx_elements + value_elements); result.resize(result_size); result.assign(result_size, 0); size_t offset = 0; do_for_enabled_cpus([&](const auto& idx) { auto value = freq.at(idx); std::memcpy(result.data() + offset, &idx, sizeof(size_t)); offset += sizeof(size_t); std::memcpy(result.data() + offset, &value, sizeof(float)); offset += sizeof(float); }); return result; } } // namespace } // namespace cpu_freq } // namespace rocprofsys namespace rocprofsys { namespace cpu_freq { void setup() { if(get_use_perfetto()) { init_perfetto_counter_tracks( type_list{}); } metadata_initialize_cpu_freq_category(); metadata_initialize_cpu_usage_tracks(); } void config() { component::cpu_freq::configure(); metadata_initialize_cpu_freq_tracks(); // `get_enabled_cpus()` returns the number of cores enabled for monitoring but // the actual device_id is 0, since there is a single device available. And // the agents seems to be assigned per device basis not per core. // TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU // implementation. auto cpu_agents = get_agent_manager_instance().get_agents_by_type(agent_type::CPU); for(auto& agent : cpu_agents) { metadata_initialize_cpu_freq_pmc(agent->device_id); } } void sample() { if(get_state() >= State::Finalized) return; auto _timestamp = tim::get_clock_real_now(); auto _rcache = tim::rusage_cache{ RUSAGE_SELF }; auto _freqs = component::cpu_freq{}.sample(); // user and kernel mode times are in microseconds trace_cache::get_buffer_storage().store(trace_cache::cpu_freq_sample{ _timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), _rcache.get_num_priority_context_switch() + _rcache.get_num_voluntary_context_switch(), _rcache.get_num_major_page_faults() + _rcache.get_num_minor_page_faults(), _rcache.get_user_mode_time() * 1000, _rcache.get_kernel_mode_time() * 1000, serialize_freqs(_freqs) }); data.emplace_back( _timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), _rcache.get_num_priority_context_switch() + _rcache.get_num_voluntary_context_switch(), _rcache.get_num_major_page_faults() + _rcache.get_num_minor_page_faults(), _rcache.get_user_mode_time() * 1000, _rcache.get_kernel_mode_time() * 1000, std::move(_freqs)); } void shutdown() {} namespace { template void config_perfetto_counter_tracks(type_list, std::array _labels, std::array _units) { static_assert(sizeof...(Types) == N, "Error! Number of types != number of labels/units"); auto _config = [&](auto _t) { using type = std::decay_t; using track = perfetto_counter_track; constexpr auto _idx = tim::index_of>::value; if(!track::exists(0)) { auto addendum = [&](const char* _v) { return fmt::format("CPU [{}] (S)", _v); }; track::emplace(0, addendum(_labels.at(_idx)), _units.at(_idx)); } }; (_config(Types{}), ...); } struct index { size_t value = 0; }; template void write_perfetto_counter_track(Args... _args) { using track = perfetto_counter_track; TRACE_COUNTER(trait::name::value, track::at(0, 0), _args...); } template void write_perfetto_counter_track(index&& _idx, Args... _args) { using track = perfetto_counter_track; TRACE_COUNTER(trait::name::value, track::at(_idx.value, 0), _args...); } } // namespace void post_process() { LOG_DEBUG("Post-processing {} cpu frequency and memory usage entries...", data.size()); auto& enabled_cpus = component::cpu_freq::get_enabled_cpus(); auto _process_frequencies = [](size_t _idx, size_t _offset) { using freq_track = perfetto_counter_track; const auto& _thread_info = thread_info::get(0, InternalTID); if(get_is_continuous_integration() && !_thread_info) { throw std::runtime_error("Missing thread info for thread 0"); } if(!_thread_info) return; if(!freq_track::exists(_idx)) { auto addendum = [&](const char* _v) { return fmt::format("CPU {} [{}] (S)", _v, _idx); }; freq_track::emplace(_idx, addendum("Frequency"), "MHz"); } for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); double _freq = static_cast(std::get<8>(itr).at(_offset)); if(!_thread_info->is_valid_time(_ts)) continue; write_perfetto_counter_track(index{ _idx }, _ts, _freq); } auto _end_ts = _thread_info->get_stop(); write_perfetto_counter_track(index{ _idx }, _end_ts, 0); }; auto _process_cpu_rusage = []() { if(get_use_perfetto()) { config_perfetto_counter_tracks( type_list{}, { "Memory Usage", "Virtual Memory Usage", "Peak Memory", "Context Switches", "Page Faults", "User Time", "Kernel Time" }, { "MB", "MB", "MB", "", "", "sec", "sec" }); } const auto& _thread_info = thread_info::get(0, InternalTID); if(get_is_continuous_integration() && !_thread_info) { throw std::runtime_error("Missing thread info for thread 0"); } if(!_thread_info) return; for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); if(!_thread_info->is_valid_time(_ts)) continue; double _page = std::get<1>(itr) / units::megabyte; double _virt = std::get<2>(itr) / units::megabyte; double _peak = std::get<3>(itr) / units::megabyte; uint64_t _cntx = std::get<4>(itr); uint64_t _flts = std::get<5>(itr); double _user = std::get<6>(itr) / units::sec; double _kern = std::get<7>(itr) / units::sec; if(get_use_perfetto()) { write_perfetto_counter_track(_ts, _page); write_perfetto_counter_track(_ts, _virt); write_perfetto_counter_track(_ts, _peak); write_perfetto_counter_track(_ts, _cntx); write_perfetto_counter_track(_ts, _flts); write_perfetto_counter_track(_ts, _user); write_perfetto_counter_track(_ts, _kern); } } if(get_use_perfetto()) { auto _end_ts = _thread_info->get_stop(); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0); write_perfetto_counter_track(_end_ts, 0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); } }; _process_cpu_rusage(); if(get_use_perfetto()) { for(auto itr = enabled_cpus.begin(); itr != enabled_cpus.end(); ++itr) { auto _idx = *itr; auto _offset = std::distance(enabled_cpus.begin(), itr); _process_frequencies(_idx, _offset); } } enabled_cpus.clear(); } } // namespace cpu_freq } // namespace rocprofsys