diff --git a/docker/build-docker-release.sh b/docker/build-docker-release.sh index 317203c559..147e84b0c1 100755 --- a/docker/build-docker-release.sh +++ b/docker/build-docker-release.sh @@ -33,8 +33,8 @@ usage() print_default_option versions "[VERSION] [VERSION...]" "Ubuntu or OpenSUSE release" "${VERSIONS}" print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}" print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}" - print_default_option user "[USERNAME]" "DockerHub username" "${USER}" - #print_default_option lto "[on|off]" "Enable LTO" "${LTO}" + print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}" + print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}" echo "" echo "Usage: ${BASH_SOURCE[0]} -- " @@ -52,8 +52,26 @@ send-error() verbose-run() { - echo -e "\n### Executing \"${@}\"... ###\n" - exec "${@}" + echo -e "\n### Executing \"${@}\" a maximum of ${RETRY} times... ###\n" + for i in $(seq 1 1 ${RETRY}) + do + set +e + eval "${@}" + local RETC=$? + set -e + if [ "${RETC}" -eq 0 ]; then + break + else + echo -en "\n### Command failed with error code ${RETC}... " + if [ "${i}" -ne "${RETRY}" ]; then + echo -e "Retrying... ###\n" + sleep 3 + else + echo -e "Exiting... ###\n" + exit ${RETC} + fi + fi + done } build-release() @@ -68,7 +86,7 @@ build-release() shift local DOCKER_ARGS="" tty -s && DOCKER_ARGS="-it" || DOCKER_ARGS="" - verbose-run docker run ${DOCKER_ARGS} --rm -v ${PWD}:/home/omnitrace --stop-signal "SIGINT" --env DISTRO=${OS} --env ROCM_VERSION=${ROCM_VERSION} --env VERSION=${CODE_VERSION} --env PYTHON_VERSIONS="${PYTHON_VERSIONS}" --env IS_DOCKER=1 ${CONTAINER} /home/omnitrace/scripts/build-release.sh ${@} + verbose-run docker run ${DOCKER_ARGS} --rm -v ${PWD}:/home/omnitrace --stop-signal "SIGINT" --env DISTRO=${OS} --env ROCM_VERSION=${ROCM_VERSION} --env VERSION=${CODE_VERSION} --env PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" --env IS_DOCKER=1 ${CONTAINER} /home/omnitrace/scripts/build-release.sh ${@} } reset-last() @@ -84,6 +102,7 @@ reset-last : ${ROCM_VERSIONS:=5.0 4.5 4.3} : ${MPI:=0} : ${PYTHON_VERSIONS:="6 7 8 9 10"} +: ${RETRY:=3} n=0 while [[ $# -gt 0 ]] @@ -118,6 +137,11 @@ do USER=${1} reset-last ;; + --retry|-r) + shift + RETRY=${1} + reset-last + ;; "--") shift SCRIPT_ARGS=${@} diff --git a/docker/build-docker.sh b/docker/build-docker.sh index 02b1557f61..52a8d32bb4 100755 --- a/docker/build-docker.sh +++ b/docker/build-docker.sh @@ -34,7 +34,7 @@ usage() print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}" print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}" print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}" - print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors" "${RETRY}" + print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}" print_default_option push "" "Push the image to Dockerhub" "" #print_default_option lto "[on|off]" "Enable LTO" "${LTO}" } diff --git a/source/bin/omnitrace-critical-trace/critical-trace.cpp b/source/bin/omnitrace-critical-trace/critical-trace.cpp index bb33992142..5431083466 100644 --- a/source/bin/omnitrace-critical-trace/critical-trace.cpp +++ b/source/bin/omnitrace-critical-trace/critical-trace.cpp @@ -26,10 +26,12 @@ #include "library/config.hpp" #include "library/debug.hpp" #include "library/perfetto.hpp" +#include "library/tracing.hpp" #include #include +#include namespace config = omnitrace::config; namespace critical_trace = omnitrace::critical_trace; @@ -750,36 +752,42 @@ generate_perfetto(const std::vector& _data) // run in separate thread(s) so that it ends up in unique row if(_nrows < 1) _nrows = _data.size(); - std::string _dev = (DevT == Device::NONE) ? "" - : (DevT == Device::ANY) ? "CPU + GPU " - : (DevT == Device::CPU) ? "CPU " - : "GPU "; - std::string _cpname = _dev + "CritPath"; - auto _func = [&](size_t _idx, size_t _beg, size_t _end) { - if(DevT != Device::NONE) - { - if(_nrows != 1) - threading::set_thread_name(TIMEMORY_JOIN(" ", _cpname, _idx).c_str()); - else - threading::set_thread_name(_cpname.c_str()); - } - // ensure all hash ids exist - copy_hash_ids(); - std::set _used{}; + std::string _dev = (DevT == Device::NONE) ? "" + : (DevT == Device::ANY) ? "CPU + GPU " + : (DevT == Device::CPU) ? "CPU " + : "GPU "; + + using category_t = std::conditional_t< + DevT == Device::ANY, omnitrace::category::critical_trace, + std::conditional_t>; + + // ensure all hash ids exist + copy_hash_ids(); + std::set _used{}; + + auto _func = [&](size_t _idx, size_t _beg, size_t _end) { + auto&& _name_generator = [](auto _dev_type, auto _rows, auto _idx_v) { + return (_rows < 2) + ? TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path") + : TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path", + _idx_v); + }; + auto _track = + (DevT == Device::NONE) + ? ::perfetto::ProcessTrack::Current() + : omnitrace::tracing::get_perfetto_track( + category_t{}, std::move(_name_generator), DevT, _nrows, _idx); + for(size_t i = _beg; i < _end; ++i) { if(i >= _data.size()) break; - _data.at(i).generate_perfetto(_used); + _data.at(i).generate_perfetto(_track, _used); } }; for(size_t i = 0; i < _data.size(); i += _nrows) - { - if(DevT == Device::NONE) - _func(i, i, i + _nrows); - else - std::thread{ _func, i, i, i + _nrows }.join(); - } + _func(i, i, i + _nrows); } template class ContainerT, typename... Args, diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.cpp b/source/lib/omnitrace/library/components/backtrace_metrics.cpp index 634e55198a..9eca1133ca 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.cpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.cpp @@ -137,6 +137,13 @@ backtrace_metrics::description() return "Records sampling data"; } +std::vector +backtrace_metrics::get_hw_counter_labels(int64_t _tid) +{ + auto& _v = get_papi_labels(_tid); + return (_v) ? *_v : std::vector{}; +} + void backtrace_metrics::start() {} @@ -192,10 +199,8 @@ backtrace_metrics::configure(bool _setup, int64_t _tid) OMNITRACE_DEBUG("HW COUNTER: starting...\n"); if(get_papi_vector(_tid)) { - using common_type_t = typename hw_counters::common_type; get_papi_vector(_tid)->start(); - *get_papi_labels(_tid) = - comp::papi_common::get_config()->labels; + *get_papi_labels(_tid) = get_papi_vector(_tid)->get_config()->labels; } } } diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.hpp b/source/lib/omnitrace/library/components/backtrace_metrics.hpp index f727992d78..7eb6f9cfa7 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.hpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.hpp @@ -71,9 +71,10 @@ struct backtrace_metrics backtrace_metrics& operator=(const backtrace_metrics&) = default; backtrace_metrics& operator=(backtrace_metrics&&) noexcept = default; - static void configure(bool, int64_t _tid = threading::get_id()); - static void init_perfetto(int64_t _tid); - static void fini_perfetto(int64_t _tid); + static void configure(bool, int64_t _tid = threading::get_id()); + static void init_perfetto(int64_t _tid); + static void fini_perfetto(int64_t _tid); + static std::vector get_hw_counter_labels(int64_t); static void start(); static void stop(); diff --git a/source/lib/omnitrace/library/components/exit_gotcha.cpp b/source/lib/omnitrace/library/components/exit_gotcha.cpp index fb3175e55e..39e3bc4a3f 100644 --- a/source/lib/omnitrace/library/components/exit_gotcha.cpp +++ b/source/lib/omnitrace/library/components/exit_gotcha.cpp @@ -50,6 +50,8 @@ exit_gotcha::configure() namespace { +auto _exit_info = exit_gotcha::exit_info{}; + template void invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _args) @@ -87,6 +89,7 @@ invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _ void exit_gotcha::operator()(const gotcha_data& _data, exit_func_t _func, int _ec) const { + _exit_info = { true, _data.tool_id.find("quick") != std::string::npos, _ec }; invoke_exit_gotcha(_data, _func, _ec); } @@ -96,5 +99,11 @@ exit_gotcha::operator()(const gotcha_data& _data, abort_func_t _func) const { invoke_exit_gotcha(_data, _func); } + +exit_gotcha::exit_info +exit_gotcha::get_exit_info() +{ + return _exit_info; +} } // namespace component } // namespace omnitrace diff --git a/source/lib/omnitrace/library/components/exit_gotcha.hpp b/source/lib/omnitrace/library/components/exit_gotcha.hpp index 685b018fb9..32489308f7 100644 --- a/source/lib/omnitrace/library/components/exit_gotcha.hpp +++ b/source/lib/omnitrace/library/components/exit_gotcha.hpp @@ -60,6 +60,14 @@ struct exit_gotcha : tim::component::base void operator()(const gotcha_data&, exit_func_t, int) const; // abort void operator()(const gotcha_data&, abort_func_t) const; + + struct exit_info + { + bool is_known = false; + bool is_quick = false; + int exit_code = EXIT_SUCCESS; + }; + static exit_info get_exit_info(); }; } // namespace component diff --git a/source/lib/omnitrace/library/critical_trace.cpp b/source/lib/omnitrace/library/critical_trace.cpp index 3388c9e72a..8d90070cc0 100644 --- a/source/lib/omnitrace/library/critical_trace.cpp +++ b/source/lib/omnitrace/library/critical_trace.cpp @@ -28,6 +28,8 @@ #include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" +#include "library/tracing.hpp" +#include "library/tracing/annotation.hpp" #include #include @@ -438,99 +440,82 @@ call_chain::get_top_chains() return _v; } -template <> +template void -call_chain::generate_perfetto(std::set& _used) const +call_chain::generate_perfetto(::perfetto::Track _track, std::set& _used) const { + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + static std::set _static_strings{}; + static std::mutex _static_mutex{}; + for(const auto& itr : *this) { if(!_used.emplace(itr).second) continue; - if(itr.device == Device::CPU) + + auto&& _annotater = [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", itr.begin_ns); + tracing::add_perfetto_annotation(ctx, "end_ns", itr.end_ns); + } + }; + + if constexpr(DevT == Device::NONE) { - TRACE_EVENT_BEGIN("device-critical-trace", "CPU", - static_cast(itr.begin_ns), "begin_ns", - itr.begin_ns); + if(itr.device == Device::CPU) + { + tracing::push_perfetto_track(category::host_critical_trace{}, "CPU", + _track, itr.begin_ns, std::move(_annotater)); + tracing::pop_perfetto_track(category::host_critical_trace{}, "CPU", + _track, itr.end_ns); + } + else if(itr.device == Device::GPU) + { + tracing::push_perfetto_track(category::device_critical_trace{}, "GPU", + _track, itr.begin_ns, std::move(_annotater)); + tracing::pop_perfetto_track(category::device_critical_trace{}, "GPU", + _track, itr.end_ns); + } } - else if(itr.device == Device::GPU) + else { - TRACE_EVENT_BEGIN("device-critical-trace", "GPU", - static_cast(itr.begin_ns), "begin_ns", - itr.begin_ns); + using category_t = std::conditional_t< + DevT == Device::ANY, omnitrace::category::critical_trace, + std::conditional_t>; + + if constexpr(DevT != Device::ANY) + { + if(itr.device != DevT) continue; + } + + std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash)); + _static_mutex.lock(); + auto sitr = _static_strings.emplace(_name); + _static_mutex.unlock(); + + tracing::push_perfetto_track(category_t{}, sitr.first->c_str(), _track, + itr.begin_ns, std::move(_annotater)); + tracing::pop_perfetto_track(category_t{}, sitr.first->c_str(), _track, + itr.end_ns); } - TRACE_EVENT_END("device-critical-trace", static_cast(itr.end_ns), - "end_ns", itr.end_ns); } } -template <> -void -call_chain::generate_perfetto(std::set& _used) const -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - static std::set _static_strings{}; - static std::mutex _static_mutex{}; - for(const auto& itr : *this) - { - if(!_used.emplace(itr).second) continue; - if(itr.device != Device::CPU) continue; - std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash)); - _static_mutex.lock(); - auto sitr = _static_strings.emplace(_name); - _static_mutex.unlock(); - TRACE_EVENT_BEGIN("host-critical-trace", - perfetto::StaticString{ sitr.first->c_str() }, - static_cast(itr.begin_ns), "begin_ns", - static_cast(itr.begin_ns)); - TRACE_EVENT_END("host-critical-trace", static_cast(itr.end_ns), - "end_ns", static_cast(itr.end_ns)); - } -} +// explicit instantiations +template void +call_chain::generate_perfetto(::perfetto::Track, std::set&) const; -template <> -void -call_chain::generate_perfetto(std::set& _used) const -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - static std::set _static_strings{}; - static std::mutex _static_mutex{}; - for(const auto& itr : *this) - { - if(!_used.emplace(itr).second) continue; - if(itr.device != Device::GPU) continue; - std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash)); - _static_mutex.lock(); - auto sitr = _static_strings.emplace(_name); - _static_mutex.unlock(); - TRACE_EVENT_BEGIN("device-critical-trace", - perfetto::StaticString{ sitr.first->c_str() }, - static_cast(itr.begin_ns), "begin_ns", - static_cast(itr.begin_ns)); - TRACE_EVENT_END("device-critical-trace", static_cast(itr.end_ns), - "end_ns", static_cast(itr.end_ns)); - } -} +template void +call_chain::generate_perfetto(::perfetto::Track, std::set&) const; -template <> -void -call_chain::generate_perfetto(std::set& _used) const -{ - OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - static std::set _static_strings{}; - static std::mutex _static_mutex{}; - for(const auto& itr : *this) - { - if(!_used.emplace(itr).second) continue; - std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash)); - _static_mutex.lock(); - auto sitr = _static_strings.emplace(_name); - _static_mutex.unlock(); - TRACE_EVENT_BEGIN("critical-trace", perfetto::StaticString{ sitr.first->c_str() }, - static_cast(itr.begin_ns), "begin_ns", - static_cast(itr.begin_ns)); - TRACE_EVENT_END("critical-trace", static_cast(itr.end_ns), "end_ns", - static_cast(itr.end_ns)); - } -} +template void +call_chain::generate_perfetto(::perfetto::Track, std::set&) const; + +template void +call_chain::generate_perfetto(::perfetto::Track, std::set&) const; //--------------------------------------------------------------------------------------// // diff --git a/source/lib/omnitrace/library/critical_trace.hpp b/source/lib/omnitrace/library/critical_trace.hpp index 0fca34db04..460ff0c62c 100644 --- a/source/lib/omnitrace/library/critical_trace.hpp +++ b/source/lib/omnitrace/library/critical_trace.hpp @@ -25,6 +25,7 @@ #include "library/common.hpp" #include "library/config.hpp" #include "library/defines.hpp" +#include "library/perfetto.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" @@ -245,7 +246,7 @@ struct call_chain : private std::vector } template - void generate_perfetto(std::set& _used) const; + void generate_perfetto(::perfetto::Track, std::set& _used) const; template bool query(FuncT&&) const; @@ -366,3 +367,20 @@ add_critical_trace(int32_t _targ_tid, size_t _cpu_cid, size_t _gpu_cid, _ts_val, _devid, _queue, _hash, _depth, _prio, num_mutexes); } } // namespace omnitrace + +namespace std +{ +inline std::string +to_string(::omnitrace::critical_trace::Device _v) +{ + using Device = ::omnitrace::critical_trace::Device; + switch(_v) + { + case Device::NONE: return std::string{}; + case Device::CPU: return std::string{ "CPU" }; + case Device::GPU: return std::string{ "GPU" }; + case Device::ANY: return std::string{ "CPU + GPU" }; + } + return std::string{ "Unknown Device" }; +} +} // namespace std diff --git a/source/lib/omnitrace/library/ompt.cpp b/source/lib/omnitrace/library/ompt.cpp index c5864cd9de..c833c320b7 100644 --- a/source/lib/omnitrace/library/ompt.cpp +++ b/source/lib/omnitrace/library/ompt.cpp @@ -60,6 +60,7 @@ namespace std::unique_ptr f_bundle = {}; bool _init_toolset_off = (trait::runtime_enabled::set(false), trait::runtime_enabled::set(false), true); +tim::ompt::finalize_tool_func_t f_finalize = nullptr; } // namespace void @@ -79,6 +80,9 @@ setup() void shutdown() { + static bool _protect = false; + if(_protect) return; + _protect = true; if(f_bundle) { f_bundle->stop(); @@ -86,21 +90,26 @@ shutdown() trait::runtime_enabled::set(false); trait::runtime_enabled::set(false); comp::user_ompt_bundle::reset(); + // call the OMPT finalize callback + if(f_finalize) (*f_finalize)(); } f_bundle.reset(); + _protect = false; } -} // namespace ompt -} // namespace omnitrace -extern "C" ompt_start_tool_result_t* -ompt_start_tool(unsigned int omp_version, const char* runtime_version) +namespace { - TIMEMORY_PRINTF(stderr, "OpenMP version: %u, runtime version: %s\n", omp_version, - runtime_version); - - OMNITRACE_METADATA("OMP_VERSION", omp_version); - OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version); +bool& +use_tool() +{ + static bool _v = false; + return _v; +} +int +tool_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t* tool_data) +{ if(!omnitrace::settings_are_configured()) { OMNITRACE_BASIC_WARNING( @@ -111,25 +120,38 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version) omnitrace::configure_settings(); } - static bool _use_ompt = omnitrace::config::get_use_ompt(); - static auto ompt_initialize = [](ompt_function_lookup_t lookup, - int initial_device_num, - ompt_data_t* tool_data) -> int { - _use_ompt = omnitrace::config::get_use_ompt(); - if(_use_ompt) - { - TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", - initial_device_num); - tim::ompt::configure(lookup, initial_device_num, - tool_data); - } - return 1; // success - }; + use_tool() = omnitrace::config::get_use_ompt(); + if(use_tool()) + { + TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n", + initial_device_num); + f_finalize = tim::ompt::configure( + lookup, initial_device_num, tool_data); + } + return 1; // success +} - static auto ompt_finalize = [](ompt_data_t*) {}; +void +tool_finalize(ompt_data_t*) +{ + shutdown(); +} +} // namespace +} // namespace ompt +} // namespace omnitrace - static auto data = ompt_start_tool_result_t{ ompt_initialize, ompt_finalize, { 0 } }; - return (ompt_start_tool_result_t*) &data; +extern "C" ompt_start_tool_result_t* +ompt_start_tool(unsigned int omp_version, const char* runtime_version) +{ + OMNITRACE_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n", omp_version, + runtime_version); + OMNITRACE_METADATA("OMP_VERSION", omp_version); + OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version); + + static auto* data = new ompt_start_tool_result_t{ &omnitrace::ompt::tool_initialize, + &omnitrace::ompt::tool_finalize, + { 0 } }; + return data; } #else diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index 97f56596f4..0e8059f993 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -806,155 +806,166 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, backtrace_metrics::fini_perfetto(_tid); } - auto _process_perfetto = [_tid, - _init](const std::vector& _data_v) { - thread_info::init(true); - OMNITRACE_VERBOSE(3 || get_debug_sampling(), - "[%li] Post-processing backtraces for perfetto...\n", _tid); + OMNITRACE_VERBOSE(3 || get_debug_sampling(), + "[%li] Post-processing backtraces for perfetto...\n", _tid); - const auto& _thread_info = thread_info::get(_tid, SequentTID); - OMNITRACE_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); + const auto& _thread_info = thread_info::get(_tid, SequentTID); + OMNITRACE_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); - if(!_thread_info) return; + if(!_thread_info) return; - uint64_t _beg_ns = _thread_info->get_start(); - uint64_t _end_ns = _thread_info->get_stop(); - uint64_t _last_ts = std::max( - _init->get()->get_timestamp(), _beg_ns); + uint64_t _beg_ns = _thread_info->get_start(); + uint64_t _end_ns = _thread_info->get_stop(); + uint64_t _last_ts = + std::max(_init->get()->get_timestamp(), _beg_ns); - tracing::push_perfetto_ts(category::sampling{}, "samples [omnitrace]", _beg_ns, - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation( - ctx, "begin_ns", _beg_ns); - } - }); + auto _track = tracing::get_perfetto_track( + category::sampling{}, + [](auto _seq_id, auto _sys_id) { + return TIMEMORY_JOIN(" ", "Thread", _seq_id, "(S)", _sys_id); + }, + _thread_info->index_data->sequent_value, _thread_info->index_data->system_value); - auto _as_hex = [](auto _v) { return JOIN("", "0x", std::hex, _v); }; + tracing::push_perfetto_track(category::sampling{}, "samples [omnitrace]", _track, + _beg_ns, [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", + _beg_ns); + } + }); - for(const auto& itr : _data_v) + auto _as_hex = [](auto _v) { return JOIN("", "0x", std::hex, _v); }; + + auto _labels = backtrace_metrics::get_hw_counter_labels(_tid); + for(const auto& itr : _data) + { + const auto* _bt_ts = itr->get(); + const auto* _bt_cs = itr->get(); + const auto* _bt_mt = itr->get(); + + if(!_bt_ts || !_bt_cs || !_bt_mt) continue; + if(_bt_ts->get_tid() != _tid) continue; + + static std::set _static_strings{}; + const auto* _last = _init; + auto _patched_data = backtrace::filter_and_patch(_bt_cs->get()); + size_t _ncount = 0; + for(const auto& iitr : _patched_data) { - const auto* _bt_ts = itr->get(); - const auto* _bt_cs = itr->get(); + uint64_t _beg = _last_ts; + uint64_t _end = _bt_ts->get_timestamp(); + if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue; - if(!_bt_ts || !_bt_cs) continue; - if(_bt_ts->get_tid() != _tid) continue; + auto _ncur = _ncount++; + // the begin/end + HW counters will be same for entire call-stack so only + // annotate + // the top and the bottom functons to keep the data consumption low + bool _include_common = (_ncur == 0 || _ncur + 1 == _patched_data.size()); - static std::set _static_strings{}; - for(const auto& iitr : backtrace::filter_and_patch(_bt_cs->get())) - { - uint64_t _beg = _last_ts; - uint64_t _end = _bt_ts->get_timestamp(); - if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue; + // Only annotate HW counters when: + // 1. when we can compute a difference from the last sample + // 2. when the number of HW counters b/t this sample and last are the same + bool _include_hw = + _include_common && (_last != nullptr) && + _bt_mt->get_hw_counters().size() == + _last->get()->get_hw_counters().size(); - if(get_sampling_include_inlines() && iitr.lineinfo) + // annotations common to both modes + auto _common_annotate = [&](::perfetto::EventContext& ctx, bool _is_last) { + if(_include_common && _is_last) { - auto _lines = iitr.lineinfo.lines; - std::reverse(_lines.begin(), _lines.end()); - size_t _n = 0; - for(const auto& litr : _lines) - { - const auto* _name = - _static_strings.emplace(demangle(litr.name)).first->c_str(); - auto _info = JOIN(':', litr.location, litr.line); - tracing::push_perfetto_ts( - category::sampling{}, _name, _beg, - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", - _beg); - tracing::add_perfetto_annotation(ctx, "lineinfo", - _info); - tracing::add_perfetto_annotation(ctx, "inlined", - (_n++ > 0)); - } - }); - tracing::pop_perfetto_ts( - category::sampling{}, _name, _end, - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", _end); - } - }); - } + tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); + tracing::add_perfetto_annotation(ctx, "end_ns", _end); } - else + if(_include_hw && _is_last) { - const auto* _name = _static_strings.emplace(iitr.name).first->c_str(); - tracing::push_perfetto_ts( - category::sampling{}, _name, _beg, + // current values when read + auto _hw_cnt_vals = _bt_mt->get_hw_counters(); + // compute difference from last sample to provide the HW counters for + // this sample + tim::math::minus(_hw_cnt_vals, + _last->get()->get_hw_counters()); + for(size_t i = 0; i < _labels.size(); ++i) + tracing::add_perfetto_annotation(ctx, _labels.at(i), + _hw_cnt_vals.at(i)); + } + }; + + if(get_sampling_include_inlines() && iitr.lineinfo) + { + auto _lines = iitr.lineinfo.lines; + std::reverse(_lines.begin(), _lines.end()); + size_t _n = 0; + for(const auto& litr : _lines) + { + const auto* _name = + _static_strings.emplace(demangle(litr.name)).first->c_str(); + auto _info = JOIN(':', litr.location, litr.line); + tracing::push_perfetto_track( + category::sampling{}, _name, _track, _beg, [&](perfetto::EventContext ctx) { if(config::get_perfetto_annotations()) { - tracing::add_perfetto_annotation(ctx, "begin_ns", _beg); + _common_annotate(ctx, (_n == 0 && _ncur == 0) || + (_n + 1 == _lines.size())); tracing::add_perfetto_annotation(ctx, "file", iitr.location); - tracing::add_perfetto_annotation(ctx, "pc", - _as_hex(iitr.address)); - tracing::add_perfetto_annotation( - ctx, "line_address", _as_hex(iitr.line_address)); - if(iitr.lineinfo) - { - auto _lines = iitr.lineinfo.lines; - std::reverse(_lines.begin(), _lines.end()); - size_t _n = 0; - for(const auto& litr : _lines) - { - auto _label = JOIN('-', "lineinfo", _n++); - tracing::add_perfetto_annotation( - ctx, _label.c_str(), - JOIN('@', demangle(litr.name), - JOIN(':', litr.location, litr.line))); - } - } + tracing::add_perfetto_annotation(ctx, "lineinfo", _info); + tracing::add_perfetto_annotation(ctx, "inlined", + (_n++ > 0)); } }); - - tracing::pop_perfetto_ts(category::sampling{}, _name, _end, - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation( - ctx, "end_ns", _end); - } - }); + tracing::pop_perfetto_track(category::sampling{}, _name, _track, + _end); } } - _last_ts = _bt_ts->get_timestamp(); + else + { + const auto* _name = _static_strings.emplace(iitr.name).first->c_str(); + tracing::push_perfetto_track( + category::sampling{}, _name, _track, _beg, + [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + _common_annotate(ctx, true); + tracing::add_perfetto_annotation(ctx, "file", iitr.location); + tracing::add_perfetto_annotation(ctx, "pc", + _as_hex(iitr.address)); + tracing::add_perfetto_annotation(ctx, "line_address", + _as_hex(iitr.line_address)); + if(iitr.lineinfo) + { + auto _lines = iitr.lineinfo.lines; + std::reverse(_lines.begin(), _lines.end()); + size_t _n = 0; + for(const auto& litr : _lines) + { + auto _label = JOIN('-', "lineinfo", _n++); + tracing::add_perfetto_annotation( + ctx, _label.c_str(), + JOIN('@', demangle(litr.name), + JOIN(':', litr.location, litr.line))); + } + } + } + }); + + tracing::pop_perfetto_track(category::sampling{}, _name, _track, _end); + } } + _last_ts = _bt_ts->get_timestamp(); + _last = itr; + } - tracing::pop_perfetto_ts(category::sampling{}, "samples [omnitrace]", _end_ns, - [&](perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", - _end_ns); - } - }); - }; - - auto _processing_thread = threading::get_tid(); - auto _process_perfetto_wrapper = [&]() { - if(threading::get_tid() != _processing_thread) - threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid, "(S)").c_str()); - - try - { - _process_perfetto(_data); - } catch(std::runtime_error& _e) - { - OMNITRACE_PRINT("[sampling][post_process_perfetto] Exception: %s\n", - _e.what()); - OMNITRACE_CI_ABORT(true, "[sampling][post_process_perfetto] Exception: %s\n", - _e.what()); - } - }; - - OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - std::thread{ _process_perfetto_wrapper }.join(); + tracing::pop_perfetto_track(category::sampling{}, "samples [omnitrace]", _track, + _end_ns, [&](perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "end_ns", + _end_ns); + } + }); } void