// MIT License // // Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "library/cpu_freq.hpp" #include "core/common.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/debug.hpp" #include "core/defines.hpp" #include "core/perfetto.hpp" #include "core/timemory.hpp" #include "library/components/cpu_freq.hpp" #include "library/thread_data.hpp" #include "library/thread_info.hpp" #include #include #include #include #include #include #include #include #include #include #include #include namespace rocprofsys { namespace cpu_freq { template using type_list = tim::type_list; namespace { using cpu_data_tuple_t = std::tuple; std::deque data = {}; template void init_perfetto_counter_tracks(type_list) { (perfetto_counter_track::init(), ...); } } // namespace } // namespace cpu_freq } // namespace rocprofsys namespace rocprofsys { namespace cpu_freq { void setup() { init_perfetto_counter_tracks( type_list{}); } void config() { component::cpu_freq::configure(); } void sample() { auto _ts = tim::get_clock_real_now(); auto _rcache = tim::rusage_cache{ RUSAGE_SELF }; auto _freqs = component::cpu_freq{}.sample(); // user and kernel mode times are in microseconds data.emplace_back( _ts, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), _rcache.get_num_priority_context_switch() + _rcache.get_num_voluntary_context_switch(), _rcache.get_num_major_page_faults() + _rcache.get_num_minor_page_faults(), _rcache.get_user_mode_time() * 1000, _rcache.get_kernel_mode_time() * 1000, std::move(_freqs)); } void shutdown() {} namespace { template void config_perfetto_counter_tracks(type_list, std::array _labels, std::array _units) { static_assert(sizeof...(Types) == N, "Error! Number of types != number of labels/units"); auto _config = [&](auto _t) { using type = std::decay_t; using track = perfetto_counter_track; constexpr auto _idx = tim::index_of>::value; if(!track::exists(0)) { auto addendum = [&](const char* _v) { return JOIN(" ", "CPU", _v, "(S)"); }; track::emplace(0, addendum(_labels.at(_idx)), _units.at(_idx)); } }; (_config(Types{}), ...); } struct index { size_t value = 0; }; template void write_perfetto_counter_track(Args... _args) { using track = perfetto_counter_track; TRACE_COUNTER(trait::name::value, track::at(0, 0), _args...); } template void write_perfetto_counter_track(index&& _idx, Args... _args) { using track = perfetto_counter_track; TRACE_COUNTER(trait::name::value, track::at(_idx.value, 0), _args...); } } // namespace void post_process() { ROCPROFSYS_VERBOSE(1, "Post-processing %zu cpu frequency and memory usage entries...\n", data.size()); auto _process_frequencies = [](size_t _idx, size_t _offset) { using freq_track = perfetto_counter_track; const auto& _thread_info = thread_info::get(0, InternalTID); ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; if(!freq_track::exists(_idx)) { auto addendum = [&](const char* _v) { return JOIN(" ", "CPU", _v, JOIN("", '[', _idx, ']'), "(S)"); }; freq_track::emplace(_idx, addendum("Frequency"), "MHz"); } for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); double _freq = static_cast(std::get<8>(itr).at(_offset)); if(!_thread_info->is_valid_time(_ts)) continue; write_perfetto_counter_track(index{ _idx }, _ts, _freq); } auto _end_ts = _thread_info->get_stop(); write_perfetto_counter_track(index{ _idx }, _end_ts, 0); }; auto _process_cpu_rusage = []() { config_perfetto_counter_tracks( type_list{}, { "Memory Usage", "Virtual Memory Usage", "Peak Memory", "Context Switches", "Page Faults", "User Time", "Kernel Time" }, { "MB", "MB", "MB", "", "", "sec", "sec" }); const auto& _thread_info = thread_info::get(0, InternalTID); ROCPROFSYS_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; for(auto& itr : data) { uint64_t _ts = std::get<0>(itr); if(!_thread_info->is_valid_time(_ts)) continue; double _page = std::get<1>(itr); double _virt = std::get<2>(itr); double _peak = std::get<3>(itr); uint64_t _cntx = std::get<4>(itr); uint64_t _flts = std::get<5>(itr); double _user = std::get<6>(itr); double _kern = std::get<7>(itr); write_perfetto_counter_track(_ts, _page / units::megabyte); write_perfetto_counter_track(_ts, _virt / units::megabyte); write_perfetto_counter_track(_ts, _peak / units::megabyte); write_perfetto_counter_track(_ts, _cntx); write_perfetto_counter_track(_ts, _flts); write_perfetto_counter_track( _ts, _user / units::sec); write_perfetto_counter_track( _ts, _kern / units::sec); } auto _end_ts = _thread_info->get_stop(); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0); write_perfetto_counter_track(_end_ts, 0); write_perfetto_counter_track(_end_ts, 0.0); write_perfetto_counter_track(_end_ts, 0.0); }; _process_cpu_rusage(); auto& enabled_cpu_freqs = component::cpu_freq::get_enabled_cpus(); for(auto itr = enabled_cpu_freqs.begin(); itr != enabled_cpu_freqs.end(); ++itr) { auto _idx = *itr; auto _offset = std::distance(enabled_cpu_freqs.begin(), itr); _process_frequencies(_idx, _offset); } enabled_cpu_freqs.clear(); } } // namespace cpu_freq } // namespace rocprofsys