// MIT License // // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "library/causal/experiment.hpp" #include "binary/analysis.hpp" #include "binary/dwarf_entry.hpp" #include "binary/symbol.hpp" #include "common/defines.h" #include "core/config.hpp" #include "core/demangler.hpp" #include "core/state.hpp" #include "library/causal/components/backtrace.hpp" #include "library/causal/components/progress_point.hpp" #include "library/causal/data.hpp" #include "library/causal/delay.hpp" #include "library/causal/sample_data.hpp" #include "library/thread_data.hpp" #include "library/thread_info.hpp" #include "library/tracing.hpp" #include #include #include #include #include #include #include #include #include #include "logger/debug.hpp" #include #include #include #include #include #include namespace rocprofsys { namespace causal { namespace { using backtrace_causal = rocprofsys::causal::component::backtrace; namespace cereal = ::tim::cereal; auto current_experiment_value = experiment{}; auto current_selected_count = std::atomic{ 0 }; auto current_experiment = std::atomic{ nullptr }; auto experiment_history = std::vector{}; int64_t global_scaling = 1; int64_t global_scaling_increments = 0; bool use_exp_speedup_scaling = get_env("ROCPROFSYS_CAUSAL_SCALE_EXPERIMENT_TIME_BY_SPEEDUP", false); } // namespace experiment::sample::sample(const base_type& _b, uint64_t _c) : base_type{ _b } , count{ _c } { if(lineinfo) { for(const auto& itr : lineinfo.lines) { if(itr.inlined) inlines.emplace_back( binary::inlined_symbol{ itr.line, itr.location, itr.name }); } } } bool experiment::sample::operator==(const sample& _v) const { return base_type::operator==(_v); } bool experiment::sample::operator<(const sample& _v) const { return base_type::operator<(_v); } const auto& experiment::sample::operator+=(const sample& _v) const { if(*this == _v && this != &_v) count += _v.count; return *this; } template void experiment::sample::serialize(ArchiveT& ar, const unsigned) { namespace cereal = ::tim::cereal; using cereal::make_nvp; ar(cereal::make_nvp("count", count)); if constexpr(concepts::is_output_archive::value) { ar(cereal::make_nvp("location", get_identifier())); } ar.setNextName("info"); ar.startNode(); ar(make_nvp("address", address), make_nvp("line", lineno), make_nvp("file", location), make_nvp("func", name)); if constexpr(concepts::is_output_archive::value) { ar(cereal::make_nvp("dfunc", rocprofsys::utility::demangle(name)), cereal::make_nvp("dwarf_info", std::vector{})); } ar(cereal::make_nvp("inlines", inlines)); ar.finishNode(); ar(cereal::make_nvp("dlinfo", info)); } std::string experiment::sample::get_identifier() const { return (lineno > 0 && !location.empty()) ? fmt::format("{}:{}", location, lineno) : rocprofsys::utility::demangle(name); } template void experiment::record::serialize(ArchiveT& ar, const unsigned) { namespace cereal = ::tim::cereal; ar(cereal::make_nvp("startup_time", startup), cereal::make_nvp("experiments", experiments), cereal::make_nvp("runtime", runtime)); auto _samples = std::vector{}; if constexpr(concepts::is_input_archive::value) { ar(cereal::make_nvp("samples", _samples)); for(auto& itr : _samples) samples.emplace_back(std::move(itr)); } else { ar(cereal::make_nvp("samples", samples)); } } template void experiment::serialize(ArchiveT& ar, const unsigned) { namespace cereal = ::tim::cereal; ar(cereal::make_nvp("index", index), cereal::make_nvp("virtual_speedup", virtual_speedup), cereal::make_nvp("sampling_period", sampling_period), cereal::make_nvp("start_time", start_time), cereal::make_nvp("end_time", end_time), cereal::make_nvp("experiment_time", experiment_time), cereal::make_nvp("batch_size", batch_size), cereal::make_nvp("duration", duration), cereal::make_nvp("scaling_factor", scaling_factor), cereal::make_nvp("selected", selected), cereal::make_nvp("sample_delay", sample_delay), cereal::make_nvp("delay_scaling", delay_scaling), cereal::make_nvp("total_delay", total_delay), cereal::make_nvp("global_delay", global_delay), cereal::make_nvp("selection", selection)); if constexpr(concepts::is_input_archive::value) { auto _ppts = std::vector{}; init_progress.clear(); fini_progress.clear(); ar(cereal::make_nvp("progress_points", _ppts)); for(auto itr : _ppts) fini_progress.emplace(itr.get_hash(), itr); } else { auto _ppts = std::vector{}; { auto ppts = fini_progress; for(auto& pitr : ppts) pitr.second.set_hash(pitr.first); for(auto pitr : init_progress) ppts[pitr.first] -= pitr.second; _ppts.reserve(ppts.size()); for(auto& pitr : ppts) _ppts.emplace_back(pitr.second); } ar(cereal::make_nvp("progress_points", _ppts)); } } std::string experiment::label() { return "causal_experiment"; } std::string experiment::description() { return "Records an experiment for causal profiling"; } const std::atomic& experiment::get_current_experiment() { return current_experiment; } bool experiment::start() { if(running && tracing::now() < start_time + experiment_time) return false; selection = sample_selection(); if(!selection) return false; // sampling period in nanoseconds sampling_period = backtrace_causal::get_period(units::nsec); // experiment time is scaled up for longer speedups index = experiment_history.size() + 1; virtual_speedup = sample_virtual_speedup(); delay_scaling = virtual_speedup / 100.0; if(use_exp_speedup_scaling) scaling_factor *= (1.0 + delay_scaling); experiment_time = global_scaling * scaling_factor * sampling_period * batch_size; sample_delay = sampling_period * delay_scaling; total_delay = delay::sync(); init_progress = component::progress_point::get_progress_points(); start_time = tracing::now(); LOG_INFO("Starting causal experiment #{}: {}", index, as_string()); if(get_state() < State::Finalized) { current_experiment_value = *this; current_selected_count.store(0); current_experiment.store(this); return true; } return false; } bool experiment::wait() const { auto _now = tracing::now(); auto _wait = experiment_time - (_now - start_time); auto _end = _now + _wait; auto _incr = std::min(_wait / 100, 1000000); while(tracing::now() < _end && get_state() < State::Finalized) { std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::nanoseconds{ _incr }); } return (tracing::now() >= _end); } bool experiment::stop() { auto _now = tracing::now(); if(_now < start_time + experiment_time) return false; current_experiment.store(nullptr); selected = current_selected_count.load(); running = false; end_time = _now; experiment_time = (end_time - start_time); global_delay = delay::compute_total_delay(0); total_delay = (global_delay - total_delay); duration = (experiment_time > total_delay) ? (experiment_time - total_delay) : 0; fini_progress = component::progress_point::get_progress_points(); // sync data delay::sync(); auto _prog_stats = tim::statistics{}; auto _prog_vals = std::vector{}; _prog_vals.reserve(fini_progress.size()); for(auto fitr : fini_progress) { auto _pt = fitr.second - init_progress[fitr.first]; int64_t _num = std::max({ _pt.get_laps(), _pt.get_arrival(), _pt.get_departure() }); if(_num > 0) _prog_vals.emplace_back(_num); } std::sort(_prog_vals.begin(), _prog_vals.end()); for(auto itr : _prog_vals) _prog_stats += itr; auto _nvals = _prog_vals.size(); auto _medi = (_nvals > 2) ? _prog_vals.at(_nvals / 2) : _prog_vals.front(); auto _mean = (_nvals > 0) ? _prog_stats.get_mean() : 0; auto _high = (_nvals > 0) ? _prog_stats.get_max() : 0; auto _lowv = (_nvals > 0) ? _prog_stats.get_min() : 0; if(_lowv <= 3 && (_mean < 5 || _medi < 5)) { LOG_DEBUG("[progress points] increasing experiment time :: low: {}, high: {}, " "mean: {}, median: {}", _lowv, _high, _mean, _medi); global_scaling *= 2; ++global_scaling_increments; // keep track of how many successive increments have // been performed } else if(_mean > 10 && _lowv >= 8 && global_scaling > 1) { LOG_DEBUG("[progress points] decreasing experiment time :: low: {}, high: {}, " "mean: {}, median: {}", _lowv, _high, _mean, _medi); global_scaling /= 2; global_scaling_increments = 0; } if(ROCPROFSYS_UNLIKELY(global_scaling_increments >= 5)) { LOG_WARNING( "causal experimentation hasn't seen at least 5 progress points " "in the last {} experiments. Progress points are necessary for measuring " "the effect of the virtual speed-up. Please visit " "https://rocm.docs.amd.com/projects/rocprofiler-systems/en/latest/ for " "documentation on progress points and how to add them", global_scaling_increments); } if(_high > 0) experiment_history.emplace_back(*this); std::this_thread::sleep_for( std::chrono::nanoseconds{ 5 * sampling_period * batch_size }); return true; } std::string experiment::as_string() const { std::stringstream _ss{}; auto _dur = static_cast(experiment_time) / static_cast(units::sec); _ss << std::boolalpha << "speed-up: " << std::setw(3) << virtual_speedup << "%, period: " << std::setw(4) << std::fixed << std::setprecision(2) << (sampling_period / static_cast(units::msec)) << " msec"; if(!config::get_causal_end_to_end()) _ss << ", duration: " << std::setw(5) << std::fixed << std::setprecision(3) << _dur << " sec"; _ss << " :: experiment: " << fmt::format("0x{:X}", selection.address) << " "; if(selection.symbol_address > 0 && selection.address != selection.symbol_address) _ss << "(symbol@" << fmt::format("0x{:X}", selection.symbol_address) << ") "; if(!selection.symbol.file.empty() && selection.symbol.line > 0) _ss << "[" << filepath::basename(selection.symbol.file) << ":" << selection.symbol.line << "]"; auto _patch = [](std::string _v) { auto _pos = std::string::npos; using strpair_t = std::pair; for(const auto& itr : { strpair_t{ "::basic_string, std::allocator > ", "::string" }, strpair_t{ "::__cxx11::", "::" } }) { while((_pos = _v.find(itr.first)) != std::string::npos) _v = _v.replace(_pos, itr.first.length(), itr.second); } return _v; }; auto _func = _patch(rocprofsys::utility::demangle(selection.symbol.func)); _ss << "['" << _func << "']"; return _ss.str(); } // in nanoseconds uint64_t experiment::get_delay() { if(!current_experiment.load()) return 0; return current_experiment_value.sample_delay; } double experiment::get_delay_scaling() { if(!current_experiment.load()) return 0; return current_experiment_value.delay_scaling; } uint32_t experiment::get_index() { if(!is_active()) return 0; return current_experiment_value.index; } bool experiment::is_active() { return (current_experiment.load(std::memory_order_relaxed) != nullptr); } bool experiment::is_selected(uint64_t _addr) { return (is_active() && current_experiment_value.selection.contains(_addr)); } bool experiment::is_selected(unwind_addr_t _stack) { if(is_active()) { for(auto itr : _stack) if(itr > 0 && current_experiment_value.selection.contains(itr)) return true; } return false; } bool experiment::is_selected(container::c_array _stack) { if(is_active()) { for(auto itr : _stack) if(itr > 0 && current_experiment_value.selection.contains(itr)) return true; } return false; } void experiment::add_selected() { if(current_experiment.load() == nullptr) return; ++current_selected_count; } std::vector experiment::get_experiments() { return experiment_history; } void experiment::save_experiments() { auto _cfg = settings::compose_filename_config{}; _cfg.subdirectory = "causal"; _cfg.use_suffix = config::get_use_pid(); save_experiments(config::get_causal_output_filename(), _cfg); } void // NOLINTNEXTLINE experiment::save_experiments(std::string _fname_base, const filename_config_t& _cfg) { const auto& _info0 = thread_info::get(0, InternalTID); auto current_record = record{}; current_record.startup = _info0->lifetime.first; // update experiments { for(auto& itr : experiment_history) { if(itr.duration == 0 || itr.experiment_time == 0) continue; current_record.experiments.emplace_back(std::move(itr)); } experiment_history.clear(); } // update runtime value { uint64_t _beg_runtime = std::numeric_limits::max(); uint64_t _end_runtime = std::numeric_limits::min(); for(auto& itr : current_record.experiments) { if(itr.duration == 0) continue; if(itr.experiment_time == 0) continue; _beg_runtime = std::min(_beg_runtime, itr.start_time); _end_runtime = std::max(_end_runtime, itr.end_time); } current_record.runtime = (_end_runtime - _beg_runtime); } // update sample data { auto _add_sample = [¤t_record](sample&& _v) { current_record.samples.emplace_back(std::move(_v)); }; auto _total_samples = std::map{}; for(const auto& itr : get_samples()) { for(const auto& sitr : itr.second) { _total_samples[sitr.address] += sitr.count; } } LOG_DEBUG("Processing line info for {} sampled addresses...", _total_samples.size()); for(const auto& itr : _total_samples) { auto _entry = binary::lookup_ipaddr_entry(itr.first); if(_entry) _add_sample(sample{ *_entry, itr.second }); } auto _binfo_cfg = settings::compose_filename_config{}; _binfo_cfg.subdirectory = "causal/binary-info"; _binfo_cfg.use_suffix = config::get_use_pid(); save_line_info(_binfo_cfg, config::get_verbose()); } bool _causal_output_reset = config::get_setting_value("ROCPROFSYS_CAUSAL_FILE_RESET").value_or(false); { auto _saved_experiments = (_causal_output_reset) ? std::vector{} : load_experiments(_fname_base, _cfg, false); _saved_experiments.emplace_back(current_record); std::stringstream oss{}; { auto ar = tim::policy::output_archive::get(oss); ar->setNextName("rocprofsys"); ar->startNode(); ar->setNextName("causal"); ar->startNode(); (*ar)(cereal::make_nvp("records", _saved_experiments)); ar->finishNode(); ar->finishNode(); } auto _fname = tim::settings::compose_output_filename(_fname_base, "json", _cfg); auto ofs = std::ofstream{}; if(tim::filepath::open(ofs, _fname)) { if(get_verbose() >= 0) operation::file_output_message{}( _fname, std::string{ "causal_experiments" }); ofs << oss.str() << "\n"; } else { throw std::runtime_error( fmt::format("Error opening causal experiments output file: %s", _fname)); } } auto _fname = tim::settings::compose_output_filename(_fname_base, "coz", _cfg); // read in existing data auto _existing = std::stringstream{}; if(!_causal_output_reset) { std::ifstream ifs{ _fname }; if(ifs) { while(ifs && ifs.good()) { std::string _line; std::getline(ifs, _line); _existing << _line << "\n"; } } } std::ofstream ofs{}; ofs.setf(std::ios::fixed); if(tim::filepath::open(ofs, _fname)) { if(get_verbose() >= 0) operation::file_output_message{}( _fname, std::string{ "causal_experiments" }); ofs << _existing.str(); ofs << "startup\ttime=" << current_record.startup << "\n"; for(auto& itr : current_record.experiments) { auto& _selection = itr.selection; auto& _line_info = _selection.symbol; std::string _name = (_selection.symbol_address > 0) ? _line_info.func : fmt::format("{}:{}", _line_info.file, _line_info.line); if(_name.empty()) { throw std::runtime_error(fmt::format("Error! causal experiment selection " "has no name: address={}, file={}, " "line={}, func={}", _line_info.address.as_hex(), _line_info.file, _line_info.line, _line_info.func)); } ofs << "experiment\tselected=" << rocprofsys::utility::demangle(_name) << "\tspeedup=" << std::setprecision(2) << static_cast(itr.virtual_speedup / 100.0) << "\tduration=" << itr.duration << "\tselected-samples=" << itr.selected << "\n"; auto ppts = itr.fini_progress; for(auto pitr : itr.init_progress) ppts[pitr.first] -= pitr.second; for(auto pitr : ppts) { // if(pitr.second.get_laps() == 0) continue; if(get_causal_end_to_end() && pitr.second.get_laps() > 1) continue; if(pitr.second.is_throughput_point() && pitr.second.get_delta() != 0) { ofs << "throughput-point\tname=" << rocprofsys::utility::demangle( tim::get_hash_identifier(pitr.first)) << "\tdelta=" << pitr.second.get_delta() << "\n"; if(get_causal_end_to_end()) break; } if(pitr.second.is_latency_point()) { if(get_causal_end_to_end()) continue; auto _delta = std::max(pitr.second.get_latency_delta(), 1); ofs << "latency-point\tname=" << rocprofsys::utility::demangle( tim::get_hash_identifier(pitr.first)) << "\tarrivals=" << pitr.second.get_arrival() << "\tdepartures=" << pitr.second.get_departure() << "\tdifference=" << _delta << "\n"; } } } ofs << "runtime\ttime=" << current_record.runtime << "\n"; for(const auto& itr : current_record.samples) { ofs << "samples\tlocation=" << itr.get_identifier() << "\tcount=" << itr.count; if(config::get_debug()) ofs << "\taddress=" << fmt::format("0x{:X}", itr.address); ofs << "\n"; } } else { throw std::runtime_error( fmt::format("Error opening causal experiments output file: {}", _fname)); } } std::vector experiment::load_experiments(bool _throw_on_error) { auto _cfg = settings::compose_filename_config{}; _cfg.subdirectory = "causal"; _cfg.use_suffix = config::get_use_pid(); return load_experiments(config::get_causal_output_filename(), _cfg, _throw_on_error); } std::vector experiment::load_experiments(std::string _fname, const filename_config_t& _cfg, bool _throw_on_error) { _fname = tim::settings::compose_input_filename(_fname, "json", _cfg); auto ifs = std::ifstream{}; auto _data = std::vector{}; if(tim::filepath::open(ifs, _fname)) { auto ar = tim::policy::input_archive::get(ifs); ar->setNextName("rocprofsys"); ar->startNode(); ar->setNextName("causal"); ar->startNode(); (*ar)(cereal::make_nvp("records", _data)); ar->finishNode(); ar->finishNode(); } else { if(_throw_on_error) { throw std::runtime_error( fmt::format("Error opening causal experiments input file: %s", _fname)); } } return _data; } } // namespace causal } // namespace rocprofsys