// MIT License // // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. #include "library/causal/data.hpp" #include "binary/address_multirange.hpp" #include "binary/analysis.hpp" #include "binary/binary_info.hpp" #include "binary/link_map.hpp" #include "binary/scope_filter.hpp" #include "core/binary/fwd.hpp" #include "core/config.hpp" #include "core/containers/c_array.hpp" #include "core/demangler.hpp" #include "core/state.hpp" #include "core/utility.hpp" #include "library/causal/delay.hpp" #include "library/causal/experiment.hpp" #include "library/causal/fwd.hpp" #include "library/causal/sample_data.hpp" #include "library/causal/sampling.hpp" #include "library/causal/selected_entry.hpp" #include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" #include "library/thread_info.hpp" #include #include #include #include #include #include #include #include #include #include "logger/debug.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include namespace rocprofsys { namespace causal { namespace { using random_engine_t = std::mt19937_64; using progress_bundles_t = component_bundle_cache; auto speedup_seeds = std::vector{}; auto speedup_divisions = get_env("ROCPROFSYS_CAUSAL_SPEEDUP_DIVISIONS", 5); auto speedup_dist = []() { size_t _n = std::max(1, 100 / speedup_divisions); std::vector _v(_n, uint16_t{ 0 }); std::generate(_v.begin(), _v.end(), [_value = 0]() mutable { return (_value += speedup_divisions); }); // approximately 25% of bins should be zero speedup size_t _nzero = std::ceil(_v.size() / 4.0); _v.resize(_v.size() + _nzero, 0); std::sort(_v.begin(), _v.end()); if(get_is_continuous_integration() && _v.back() > 100) { throw std::runtime_error( fmt::format("Error! last value is too large: {}", _v.back())); } return _v; }(); auto perform_experiment_impl_completed = std::unique_ptr>{}; auto num_progress_points = std::atomic{ 0 }; auto& get_progress_bundles(int64_t _tid = utility::get_thread_index()) { return progress_bundles_t::instance(construct_on_thread{ _tid }); } template auto& get_engine() { static auto _seed = []() -> hash_value_t { auto _seed_v = config::get_setting_value("ROCPROFSYS_CAUSAL_RANDOM_SEED") .value_or(0); if(_seed_v == 0) _seed_v = std::random_device{}(); return _seed_v; }(); static thread_local auto _v = random_engine_t{ tim::get_hash_id(_seed, utility::get_thread_index()) }; return _v; } binary::address_multirange& get_eligible_address_ranges() { static auto _v = binary::address_multirange{}; return _v; } using sf = binary::scope_filter; auto get_filters(const std::set& _scopes = { sf::BINARY_FILTER, sf::SOURCE_FILTER, sf::FUNCTION_FILTER }) { auto _filters = std::vector{}; // exclude internal libraries used by rocprof-sys if(_scopes.count(sf::BINARY_FILTER) > 0) _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::BINARY_FILTER, "lib(rocprof-sys[-\\.]|dyninst|" "tbbmalloc|gotcha\\.|unwind\\.so\\.99)" }); // in function mode, it generally doesn't help to experiment on main function since // telling the user to "make the main function" faster is literally useless since it // contains everything that could be made faster if(config::get_causal_mode() == CausalMode::Function && _scopes.count(sf::FUNCTION_FILTER) > 0) _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "( main\\(|^main$|^main\\.cold$)" }); bool _use_default_excludes = config::get_setting_value("ROCPROFSYS_CAUSAL_FUNCTION_EXCLUDE_DEFAULTS") .value_or(true); if(_use_default_excludes && _scopes.count(sf::FUNCTION_FILTER) > 0) { // symbols starting with leading underscore are generally system functions _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "^_" }); if(config::get_causal_mode() == CausalMode::Function) { // exclude STL implementation functions _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "::_M" }); } } // in function mode, it generally doesn't help to claim // "make main function" faster since it contains everything // that could be made faster if(config::get_causal_mode() == CausalMode::Function && _scopes.count(sf::FUNCTION_FILTER) > 0) { _filters.emplace_back(sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, "(^main$|^main.cold$|int main\\()" }); } using utility::get_regex_or; auto _source_end_converter = [](const std::string& _v) { return _v + "$"; }; // include handling { auto _binary_include = get_regex_or(config::get_causal_binary_scope(), ""); auto _source_include = get_regex_or(config::get_causal_source_scope(), _source_end_converter, ""); auto _function_include = get_regex_or(config::get_causal_function_scope(), ""); auto _current_include = std::make_tuple(_binary_include, _source_include, _function_include); static auto _former_include = decltype(_current_include){}; if(_former_include != _current_include) { if(!_binary_include.empty()) LOG_DEBUG("[causal] binary scope : {}", _binary_include); if(!_source_include.empty()) LOG_DEBUG("[causal] source scope : {}", _source_include); if(!_function_include.empty()) LOG_DEBUG("[causal] function scope : {}", _function_include); _former_include = _current_include; } if(!_binary_include.empty() && _scopes.count(sf::BINARY_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_INCLUDE, sf::BINARY_FILTER, _binary_include }); if(!_source_include.empty() && _scopes.count(sf::SOURCE_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_INCLUDE, sf::SOURCE_FILTER, _source_include }); if(!_function_include.empty() && _scopes.count(sf::FUNCTION_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_INCLUDE, sf::FUNCTION_FILTER, _function_include }); } // exclude handling { auto _binary_exclude = get_regex_or(config::get_causal_binary_exclude(), ""); auto _source_exclude = get_regex_or(config::get_causal_source_exclude(), _source_end_converter, ""); auto _function_exclude = get_regex_or(config::get_causal_function_exclude(), ""); auto _current_exclude = std::make_tuple(_binary_exclude, _source_exclude, _function_exclude); static auto _former_exclude = decltype(_current_exclude){}; if(_former_exclude != _current_exclude) { if(!_binary_exclude.empty()) LOG_DEBUG("[causal] binary exclude : {}", _binary_exclude); if(!_source_exclude.empty()) LOG_DEBUG("[causal] source exclude : {}", _source_exclude); if(!_function_exclude.empty()) LOG_DEBUG("[causal] function exclude : {}", _function_exclude); _former_exclude = _current_exclude; } if(!_binary_exclude.empty() && _scopes.count(sf::BINARY_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_EXCLUDE, sf::BINARY_FILTER, _binary_exclude }); if(!_source_exclude.empty() && _scopes.count(sf::SOURCE_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_EXCLUDE, sf::SOURCE_FILTER, _source_exclude }); if(!_function_exclude.empty() && _scopes.count(sf::FUNCTION_FILTER) > 0) _filters.emplace_back( sf{ sf::FILTER_EXCLUDE, sf::FUNCTION_FILTER, _function_exclude }); } return _filters; } using binary_info_t = std::vector; std::pair& get_cached_binary_info() { static auto _v = []() { // get the linked binaries for the exe (excluding ones from librocprof-sys) auto _link_map = binary::get_link_map(); auto _files = std::vector{}; _files.reserve(_link_map.size()); for(const auto& itr : _link_map) _files.emplace_back(itr.real()); auto _discarded = std::vector{}; auto _requested = binary::get_binary_info(_files, get_filters()); return std::make_pair(_requested, _discarded); }(); return _v; } bool satisfies_filter(const binary::scope_filter::filter_scope& _scope, const std::string& _value) { static auto _filters = get_filters(); return binary::scope_filter::satisfies_filter(_filters, _scope, _value); } auto compute_eligible_lines_impl() { const auto& _binary_info = get_cached_binary_info().first; auto& _scoped_info = get_cached_binary_info().second; auto _filters = get_filters(); for(const auto& litr : _binary_info) { auto& _scoped = _scoped_info.emplace_back(); _scoped.bfd = litr.bfd; _scoped.mappings = litr.mappings; _scoped.sections = litr.sections; for(const auto& ditr : litr.symbols) { auto _sym = ditr.clone(); _sym.inlines = ditr.get_inline_symbols>(_filters); _sym.dwarf_info = ditr.get_debug_line_info>(_filters); if(ditr(_filters) || (_sym.inlines.size() + _sym.dwarf_info.size()) > 0) { _scoped.ranges.emplace_back(_sym.ipaddr()); _scoped.symbols.emplace_back(_sym); } } for(const auto& ditr : litr.debug_info) { if(sf::satisfies_filter(_filters, sf::SOURCE_FILTER, ditr.file) || sf::satisfies_filter(_filters, sf::SOURCE_FILTER, fmt::format("{}:{}", ditr.file, ditr.line))) { _scoped.debug_info.emplace_back(ditr); } } _scoped.sort(); } auto& _eligible_ar = get_eligible_address_ranges(); for(const auto& litr : _scoped_info) { for(const auto& ditr : litr.symbols) { _eligible_ar += ditr.ipaddr(); } for(auto ditr : litr.ranges) { _eligible_ar += ditr; } } LOG_DEBUG("[causal] eligible address ranges: {}, coarse address range: {} [{}]", _eligible_ar.size(), _eligible_ar.range_size(), _eligible_ar.get_coarse_range().as_string()); if(_eligible_ar.empty()) { auto _cfg = settings::compose_filename_config{}; _cfg.subdirectory = "causal/binary-info"; _cfg.use_suffix = config::get_use_pid(); save_line_info(_cfg, config::get_verbose()); } if(_eligible_ar.empty()) { throw std::runtime_error( "Error! binary analysis (after filters) resulted in " "zero eligible instruction pointer addresses for causal experimentation"); } } void save_maps_info_impl(std::ostream& _ofs) { auto _maps_file = fmt::format("/proc/{}/maps", process::get_id()); auto _ifs = std::ifstream{ _maps_file }; auto _maps = std::stringstream{}; if(_ifs) { _maps << _maps_file << "\n"; while(_ifs) { std::string _line{}; getline(_ifs, _line); if(!_line.empty()) _maps << " " << _line << "\n"; } } _ofs << _maps.str(); } void save_line_info_impl(std::ostream& _ofs, const std::vector& _binary_data, const std::array& _info = { true, true, true }) { auto _write_impl = [&_ofs, &_info](const binary::binary_info& _data) { for(const auto& itr : _data.mappings) { _ofs << itr.pathname << " [" << fmt::format("0x{:X}", itr.load_address) << " - " << fmt::format("0x{:X}", itr.last_address) << "]\n"; } auto _emitted_dwarf_addresses = std::set{}; for(const auto& itr : _data.symbols) { auto _addr = itr.address; auto _addr_off = itr.address + itr.load_address; _ofs << " " << _addr_off.as_hex() << " [" << _addr.as_hex() << "] :: " << itr.file; if(itr.line > 0) _ofs << ":" << itr.line; if(!itr.func.empty()) _ofs << " [" << rocprofsys::utility::demangle(itr.func) << "]"; _ofs << "\n"; if(std::get<0>(_info)) { for(const auto& ditr : itr.inlines) { _ofs << " " << ditr.file << ":" << ditr.line; if(!ditr.func.empty()) _ofs << " [" << rocprofsys::utility::demangle(ditr.func) << "]"; _ofs << "\n"; } } if(std::get<1>(_info)) { for(const auto& ditr : itr.dwarf_info) { _ofs << " " << ditr.address.as_hex() << " :: " << ditr.file << ":" << ditr.line; _ofs << "\n"; _emitted_dwarf_addresses.emplace(ditr.address.low); } } } if(std::get<2>(_info)) { for(const auto& itr : _data.debug_info) { if(_emitted_dwarf_addresses.count(itr.address.low) > 0) continue; _ofs << " " << itr.address.as_hex() << " :: " << itr.file << ":" << itr.line; _ofs << "\n"; } } _ofs << "\n" << std::flush; }; for(const auto& itr : _binary_data) _write_impl(itr); } void compute_eligible_lines() { static auto _once = std::once_flag{}; std::call_once(_once, []() { compute_eligible_lines_impl(); auto _cfg = settings::compose_filename_config{}; _cfg.subdirectory = "causal/binary-info"; _cfg.use_suffix = config::get_use_pid(); save_line_info(_cfg, config::get_verbose()); }); } auto eligible_pc_history = std::map{}; auto eligible_pc_idx = std::atomic{ 0 }; auto eligible_pc_candidates = std::atomic{ 0 }; void perform_experiment_impl(std::shared_ptr> _started) // NOLINT { using clock_type = std::chrono::high_resolution_clock; using duration_nsec_t = std::chrono::duration; using duration_sec_t = std::chrono::duration>; const auto& _thr_info = thread_info::init(true); set_thread_state(ThreadState::Disabled); if(!_thr_info->is_offset) { throw std::runtime_error("Error! causal profiling thread should be offset"); } if(!perform_experiment_impl_completed) perform_experiment_impl_completed = std::make_unique>(); perform_experiment_impl_completed->set_value_at_thread_exit(); compute_eligible_lines(); // notify that thread has started if(_started) _started->set_value(); if(!config::get_causal_end_to_end()) { // wait for at least one progress point to start while(num_progress_points.load(std::memory_order_relaxed) == 0) { std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::milliseconds{ 1 }); } } // allow ~10 samples to be collected std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::milliseconds{ 10 }); double _delay_sec = config::get_setting_value("ROCPROFSYS_CAUSAL_DELAY").value_or(0.0); double _duration_sec = config::get_setting_value("ROCPROFSYS_CAUSAL_DURATION").value_or(0.0); auto _duration_nsec = duration_nsec_t{ _duration_sec * units::sec }; if(_delay_sec > 0.0) { LOG_DEBUG("[causal] delaying experimentation for {} seconds...", _delay_sec); uint64_t _delay_nsec = _delay_sec * units::sec; std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::nanoseconds{ _delay_nsec }); } auto _impl_count = 0; auto _start_time = clock_type::now(); auto _exceeded_duration = [&]() { if(_duration_sec > 1.0e-3) { auto _elapsed = clock_type::now() - _start_time; if(_elapsed >= _duration_nsec) { LOG_DEBUG("[causal] stopping experimentation after {} seconds " "(elapsed: {} seconds)...", _duration_sec, std::chrono::duration_cast(_elapsed).count()); causal::sampling::post_process(); return true; } } return false; }; while(get_state() < State::Finalized) { auto _impl_no = _impl_count++; auto _experim = experiment{}; // loop until started or finalized while(!_experim.start()) { if(get_state() == State::Finalized) { if(_impl_no > 0) return; LOG_DEBUG( "[causal] experiment failed to start. Number of PC candidates: {}", eligible_pc_candidates.load()); auto _memory = std::stringstream{}; auto _binary = std::stringstream{}; auto _scoped = std::stringstream{}; auto _sample = std::stringstream{}; auto _eligible = std::stringstream{}; save_maps_info_impl(_memory); save_line_info_impl(_binary, get_cached_binary_info().first, { true, true, false }); save_line_info_impl(_scoped, get_cached_binary_info().second, { true, true, false }); auto _samples_map = std::map{}; for(const auto& itr : get_samples()) { for(const auto& iitr : itr.second) { _samples_map[iitr.address] += iitr.count; } } auto _eligible_pc_hist = std::vector>{}; for(const auto& itr : eligible_pc_history) { _eligible_pc_hist.emplace_back(std::make_pair(itr.first, itr.second)); } std::sort( _eligible_pc_hist.begin(), _eligible_pc_hist.end(), [](auto&& _lhs, auto&& _rhs) { return _lhs.second > _rhs.second; }); for(const auto& itr : _eligible_pc_hist) { _eligible << " " << std::setw(8) << itr.second << " :: " << fmt::format("0x{:X}", itr.first) << "\n"; } auto _samples = std::vector>{}; for(const auto& itr : _samples_map) _samples.emplace_back(std::make_pair(itr.first, itr.second)); // sort by most samples std::sort(_samples.begin(), _samples.end(), [](const auto& _lhs, const auto& _rhs) { return _lhs.second > _rhs.second; }); for(const auto& itr : _samples) { if(itr.second > 0) { auto _is_eligible = is_eligible_address(itr.first) && !get_line_info(itr.first, false).empty(); auto _linfo = binary::lookup_ipaddr_entry(itr.first); if(_linfo) { _sample << " " << std::setw(8) << itr.second << " :: " << std::setw(5) << std::boolalpha << _is_eligible << " :: " << fmt::format("0x{:X}", itr.first) << " " << _linfo->location << ":" << _linfo->lineno << " [" << rocprofsys::utility::demangle(_linfo->name) << "]\n"; for(const auto& iitr : _linfo->lineinfo.lines) { _sample << " " << std::setw(8) << itr.second << " :: " << std::setw(5) << std::boolalpha << _is_eligible << " :: " << fmt::format("0x{:X}", itr.first) << " " << iitr.location << ":" << iitr.line << " [" << rocprofsys::utility::demangle(iitr.name) << "]\n"; } } } } LOG_CRITICAL("Causal experiment never started"); LOG_WARNING("pc samples: {} eligible pcs: {} scoped pcs: {}", _sample.str(), _eligible.str(), _scoped.str()); if(get_verbose() >= 1) { LOG_WARNING("binary pcs: {} maps: {}", _binary.str(), _memory.str()); } // if launched via rocprof-sys-causal, allow end-to-end runs that do not // start experiments auto _omni_causal_launcher = get_env("ROCPROFSYS_LAUNCHER", "", false) == "rocprof-sys-causal"; if(!(get_causal_end_to_end() && _omni_causal_launcher)) { if(_impl_no == 0) { throw std::runtime_error( "Error! Causal experiment never started"); } } return; } else { LOG_DEBUG( "[causal] experiment failed to start. Number of PC candidates: {}", eligible_pc_candidates.load()); } } LOG_TRACE("[causal] experiment started. Number of PC candidates: {}", eligible_pc_candidates.load()); reset_sample_selection(); // wait for the experiment to complete if(config::get_causal_end_to_end()) { mark_progress_point(config::get_exe_name(), true); while(get_state() < State::Finalized) { std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::milliseconds{ 100 }); if(_exceeded_duration()) return; } } else { _experim.wait(); } while(!_experim.stop()) { if(get_state() == State::Finalized) return; } if(_exceeded_duration()) return; } } // latest_eligible_pcs is an array of unwind_depth size -> samples will // use lowest indexes for most recent functions address in the call-stack auto latest_eligible_pc = []() { using atomic_uintptr_t = std::atomic; constexpr size_t array_size = unwind_depth; auto _arr = std::array, array_size>{}; for(auto& itr : _arr) itr = std::make_unique>(0); return _arr; }(); } // namespace //--------------------------------------------------------------------------------------// bool is_eligible_address(uintptr_t _v) { return get_eligible_address_ranges().contains(_v); } void save_line_info(const settings::compose_filename_config& _cfg, int _verbose) { auto _write = [_verbose](const std::string& ofname, const auto& _data, const std::array& _info) { auto _ofs = std::ofstream{}; if(tim::filepath::open(_ofs, ofname)) { if(_verbose >= 0) operation::file_output_message{}( ofname, std::string{ "causal_symbol_info" }); save_line_info_impl(_ofs, _data, _info); save_maps_info_impl(_ofs); } else { throw std::runtime_error(fmt::format("Error opening {}", ofname)); } }; _write( tim::settings::compose_output_filename( fmt::format("{}-binary", config::get_causal_output_filename()), "txt", _cfg), get_cached_binary_info().first, { true, true, true }); _write( tim::settings::compose_output_filename( fmt::format("{}-scoped", config::get_causal_output_filename()), "txt", _cfg), get_cached_binary_info().second, { true, true, false }); } size_t set_current_selection(unwind_addr_t _stack) { for(auto itr : _stack) { if(itr == 0) continue; ++eligible_pc_candidates; if(is_eligible_address(itr)) { auto _idx = eligible_pc_idx++ % latest_eligible_pc.size(); latest_eligible_pc.at(_idx)->store(itr); } } return eligible_pc_idx.load(std::memory_order_relaxed); } size_t set_current_selection(container::c_array _stack) { for(auto itr : _stack) { if(itr == 0) continue; ++eligible_pc_candidates; if(is_eligible_address(itr)) { auto _idx = eligible_pc_idx++ % latest_eligible_pc.size(); latest_eligible_pc.at(_idx)->store(itr); } } return eligible_pc_idx.load(std::memory_order_relaxed); } void reset_sample_selection() { eligible_pc_idx.store(0); eligible_pc_candidates.store(0); for(auto& itr : latest_eligible_pc) { if(itr) itr->store(0); } } selected_entry sample_selection(size_t _nitr, size_t _wait_ns) { ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); auto _select_address = [&](auto& _address_vec) { // this isn't necessary bc of check before calling this lambda but // kept because of size() - 1 in distribution range if(ROCPROFSYS_UNLIKELY(_address_vec.empty())) { LOG_WARNING("No addresses for sample selection..."); return selected_entry{}; } while(!_address_vec.empty()) { // randomly select an address auto _dist = std::uniform_int_distribution{ 0, _address_vec.size() - 1 }; auto _idx = _dist(get_engine()); uintptr_t _addr = _address_vec.at(_idx); uintptr_t _sym_addr = 0; uintptr_t _lookup_addr = _addr; auto _dl_info = unwind::dlinfo::construct(_addr); _address_vec.erase(_address_vec.begin() + _idx); eligible_pc_history[_addr] += 1; if(get_causal_mode() == CausalMode::Function) _sym_addr = (_dl_info.symbol) ? _dl_info.symbol.address() : _addr; // lookup the PC line info at either the address or the symbol address auto linfo = get_line_info(_lookup_addr, false); // unlikely this will be empty but just in case if(linfo.empty()) continue; // debugging for continuous integration if(ROCPROFSYS_UNLIKELY(config::get_is_continuous_integration() || config::get_debug())) { auto _location = (_dl_info.location) ? filepath::realpath(std::string{ _dl_info.location.name }, nullptr, false) : std::string{}; for(const auto& itr : linfo) { if(ROCPROFSYS_UNLIKELY(config::get_debug())) { LOG_WARNING("[{}][{}][{}] [{}] {} [{}:{}][{}][{}]", fmt::format("0x{:X}", _lookup_addr), fmt::format("0x{:X}", _addr), fmt::format("0x{:X}", _sym_addr), (_location.empty()) ? "" : _location, rocprofsys::utility::demangle(itr.func), itr.file, itr.line, itr.address.as_string(), itr.address.size()); } } } auto& _linfo_v = (config::get_causal_mode() == CausalMode::Function) ? linfo.front() : linfo.back(); return selected_entry{ _addr, _sym_addr, _linfo_v }; } return selected_entry{}; }; while(eligible_pc_idx.load(std::memory_order_relaxed) == 0) { std::this_thread::yield(); std::this_thread::sleep_for(std::chrono::nanoseconds{ _wait_ns }); } for(size_t _n = 0; _n < _nitr; ++_n) { auto _addresses = std::deque{}; for(auto& aitr : latest_eligible_pc) { if(ROCPROFSYS_UNLIKELY(!aitr)) { LOG_WARNING("Invalid atomic pc..."); continue; } uintptr_t _addr = aitr->load(); if(_addr > 0) _addresses.emplace_back(_addr); } if(!_addresses.empty()) { auto _selection = _select_address(_addresses); if(_selection) return _selection; } } return selected_entry{}; } std::deque get_line_info(uintptr_t _addr, bool _include_discarded) { static auto _glob_filters = get_filters({ sf::BINARY_FILTER }); static auto _scope_filters = get_filters(); auto _data = std::deque{}; auto _get_line_info = [&](const auto& _info, const auto& _filters) { // search for exact matches first for(const binary::binary_info& litr : _info) { auto _local_data = std::deque{}; // make sure the address is in the coarse grained mapped regions // before performing an exhaustive search bool _is_mapped = std::find_if(litr.mappings.begin(), litr.mappings.end(), [_addr](const auto& mitr) { return binary::address_range{ mitr.load_address, mitr.last_address } .contains(_addr); }) != litr.mappings.end(); if(!_is_mapped) return; for(const auto& ditr : litr.symbols) { // skip if load address is greater than address if(_addr < ditr.load_address) continue; // compute the symbols ip address range auto _ipaddr = ditr.ipaddr(); // if the lower bound of the ip address range is greater than the address, // all following symbols are not worth searching since they are at higher // addresses than this symbol (sorted by address) // if(_ipaddr.low > _addr) break; if(!_ipaddr.contains(_addr)) continue; if(_include_discarded || config::get_causal_mode() == CausalMode::Function) { // check if the primary symbol satisfy the constraints if(ditr(_filters)) _local_data.emplace_back(ditr); // the primary symbol may not satisfy the constraints but the inlined // functions may utility::combine(_local_data, ditr.get_inline_symbols(_filters)); } if(_include_discarded || config::get_causal_mode() == CausalMode::Line) { auto _debug_data = std::deque{}; for(const auto& itr : ditr.get_debug_line_info(_filters)) { if(!_ipaddr.contains(itr.ipaddr())) throw std::runtime_error( fmt::format("Error! debug line info ipaddr ({}) is not " "contained in symbol ipaddr ({})", itr.ipaddr().as_hex(), _ipaddr.as_hex())); if(itr.ipaddr().contains(_addr)) _debug_data.emplace_back(itr); } utility::combine(_local_data, _debug_data); } } if(!_local_data.empty()) { // combine and only allow first match utility::combine(_data, _local_data); if(!_include_discarded) break; } } }; if(_include_discarded) _get_line_info(get_cached_binary_info().first, _glob_filters); else _get_line_info(get_cached_binary_info().second, _scope_filters); return _data; } void push_progress_point(std::string_view _name) { if(config::get_causal_end_to_end()) return; ++num_progress_points; auto _hash = tim::add_hash_id(_name); auto& _data = get_progress_bundles(); if(ROCPROFSYS_LIKELY(_data != nullptr)) { auto* _bundle = _data->construct(_hash); _bundle->push(); _bundle->start(); } } void pop_progress_point(std::string_view _name) { if(config::get_causal_end_to_end()) return; auto& _data = get_progress_bundles(); if(ROCPROFSYS_UNLIKELY(!_data || _data->empty())) return; if(_name.empty()) { auto* itr = _data->back(); itr->stop(); itr->pop(); _data->pop_back(); return; } else { auto _hash = tim::add_hash_id(_name); for(auto itr = _data->rbegin(); itr != _data->rend(); ++itr) { if((*itr)->get_hash() == _hash) { (*itr)->stop(); (*itr)->pop(); _data->destroy(itr); return; } } } } void mark_progress_point(std::string_view _name, bool _force) { if(config::get_causal_end_to_end() && !_force) return; ++num_progress_points; auto _hash = tim::add_hash_id(_name); auto& _data = get_progress_bundles(); if(ROCPROFSYS_LIKELY(_data != nullptr)) { auto* _bundle = _data->construct(_hash); _bundle->push(); _bundle->mark(); _bundle->pop(); _data->pop_back(); } } uint16_t sample_virtual_speedup() { if(speedup_dist.empty()) return 0; else if(speedup_dist.size() == 1) return speedup_dist.front(); else { struct virtual_speedup {}; auto _dist = std::uniform_int_distribution{ size_t{ 0 }, speedup_dist.size() - 1 }; return speedup_dist.at(_dist(get_engine())); } } void start_experimenting() { ROCPROFSYS_SCOPED_THREAD_STATE(ThreadState::Internal); auto _user_speedup_dist = config::get_causal_fixed_speedup(); if(!_user_speedup_dist.empty()) { speedup_dist.clear(); for(auto itr : _user_speedup_dist) { if(itr > 100) { LOG_DEBUG("Virtual speedups must be in range [0, 100]. " "Invalid virtual speedup: {}", itr); } speedup_dist.emplace_back(static_cast(itr)); } } delay::setup(); compute_eligible_lines(); if(get_state() < State::Finalized) { ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); auto _promise = std::make_shared>(); std::thread{ perform_experiment_impl, _promise }.detach(); _promise->get_future().wait_for(std::chrono::seconds{ 2 }); } } void finish_experimenting() { if(perform_experiment_impl_completed) { perform_experiment_impl_completed->get_future().wait_for( std::chrono::seconds{ 5 }); perform_experiment_impl_completed.reset(); } sampling::post_process(); experiment::save_experiments(); } } // namespace causal } // namespace rocprofsys