Perfetto updates (#211)
* Retry support in build-docker-release.sh * critical-trace perfetto update - use ::perfetto::Track instead of threads to create rows - refactor call_chain::generate_perfetto * Fix backtrace_metrics for perfetto - get_papi_labels is now properly populated * Refactor sampling::post_process_perfetto - include HW counter delta in sample debug annotations - reduce the amount debug annotation data stored in the call-stack - if the data is common to the entire stack, it is only annotated in the first and the last call-stack entry * exit_gotcha::exit_info * Improve OMPT shutdown - cause spurious test failures * Update source/lib/omnitrace/library/ompt.cpp
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
589a729702
Коммит
2ebfe3fc30
@@ -33,8 +33,8 @@ usage()
|
||||
print_default_option versions "[VERSION] [VERSION...]" "Ubuntu or OpenSUSE release" "${VERSIONS}"
|
||||
print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}"
|
||||
print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}"
|
||||
print_default_option user "[USERNAME]" "DockerHub username" "${USER}"
|
||||
#print_default_option lto "[on|off]" "Enable LTO" "${LTO}"
|
||||
print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}"
|
||||
print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}"
|
||||
|
||||
echo ""
|
||||
echo "Usage: ${BASH_SOURCE[0]} <OPTIONS> -- <build-release.sh OPTIONS>"
|
||||
@@ -52,8 +52,26 @@ send-error()
|
||||
|
||||
verbose-run()
|
||||
{
|
||||
echo -e "\n### Executing \"${@}\"... ###\n"
|
||||
exec "${@}"
|
||||
echo -e "\n### Executing \"${@}\" a maximum of ${RETRY} times... ###\n"
|
||||
for i in $(seq 1 1 ${RETRY})
|
||||
do
|
||||
set +e
|
||||
eval "${@}"
|
||||
local RETC=$?
|
||||
set -e
|
||||
if [ "${RETC}" -eq 0 ]; then
|
||||
break
|
||||
else
|
||||
echo -en "\n### Command failed with error code ${RETC}... "
|
||||
if [ "${i}" -ne "${RETRY}" ]; then
|
||||
echo -e "Retrying... ###\n"
|
||||
sleep 3
|
||||
else
|
||||
echo -e "Exiting... ###\n"
|
||||
exit ${RETC}
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
build-release()
|
||||
@@ -68,7 +86,7 @@ build-release()
|
||||
shift
|
||||
local DOCKER_ARGS=""
|
||||
tty -s && DOCKER_ARGS="-it" || DOCKER_ARGS=""
|
||||
verbose-run docker run ${DOCKER_ARGS} --rm -v ${PWD}:/home/omnitrace --stop-signal "SIGINT" --env DISTRO=${OS} --env ROCM_VERSION=${ROCM_VERSION} --env VERSION=${CODE_VERSION} --env PYTHON_VERSIONS="${PYTHON_VERSIONS}" --env IS_DOCKER=1 ${CONTAINER} /home/omnitrace/scripts/build-release.sh ${@}
|
||||
verbose-run docker run ${DOCKER_ARGS} --rm -v ${PWD}:/home/omnitrace --stop-signal "SIGINT" --env DISTRO=${OS} --env ROCM_VERSION=${ROCM_VERSION} --env VERSION=${CODE_VERSION} --env PYTHON_VERSIONS=\"${PYTHON_VERSIONS}\" --env IS_DOCKER=1 ${CONTAINER} /home/omnitrace/scripts/build-release.sh ${@}
|
||||
}
|
||||
|
||||
reset-last()
|
||||
@@ -84,6 +102,7 @@ reset-last
|
||||
: ${ROCM_VERSIONS:=5.0 4.5 4.3}
|
||||
: ${MPI:=0}
|
||||
: ${PYTHON_VERSIONS:="6 7 8 9 10"}
|
||||
: ${RETRY:=3}
|
||||
|
||||
n=0
|
||||
while [[ $# -gt 0 ]]
|
||||
@@ -118,6 +137,11 @@ do
|
||||
USER=${1}
|
||||
reset-last
|
||||
;;
|
||||
--retry|-r)
|
||||
shift
|
||||
RETRY=${1}
|
||||
reset-last
|
||||
;;
|
||||
"--")
|
||||
shift
|
||||
SCRIPT_ARGS=${@}
|
||||
|
||||
@@ -34,7 +34,7 @@ usage()
|
||||
print_default_option rocm-versions "[VERSION] [VERSION...]" "ROCm versions" "${ROCM_VERSIONS}"
|
||||
print_default_option python-versions "[VERSION] [VERSION...]" "Python 3 minor releases" "${PYTHON_VERSIONS}"
|
||||
print_default_option "user -u" "[USERNAME]" "DockerHub username" "${USER}"
|
||||
print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors" "${RETRY}"
|
||||
print_default_option "retry -r" "[N]" "Number of attempts to build (to account for network errors)" "${RETRY}"
|
||||
print_default_option push "" "Push the image to Dockerhub" ""
|
||||
#print_default_option lto "[on|off]" "Enable LTO" "${LTO}"
|
||||
}
|
||||
|
||||
@@ -26,10 +26,12 @@
|
||||
#include "library/config.hpp"
|
||||
#include "library/debug.hpp"
|
||||
#include "library/perfetto.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <timemory/hash/types.hpp>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
|
||||
namespace config = omnitrace::config;
|
||||
namespace critical_trace = omnitrace::critical_trace;
|
||||
@@ -750,36 +752,42 @@ generate_perfetto(const std::vector<call_chain>& _data)
|
||||
// run in separate thread(s) so that it ends up in unique row
|
||||
if(_nrows < 1) _nrows = _data.size();
|
||||
|
||||
std::string _dev = (DevT == Device::NONE) ? ""
|
||||
: (DevT == Device::ANY) ? "CPU + GPU "
|
||||
: (DevT == Device::CPU) ? "CPU "
|
||||
: "GPU ";
|
||||
std::string _cpname = _dev + "CritPath";
|
||||
auto _func = [&](size_t _idx, size_t _beg, size_t _end) {
|
||||
if(DevT != Device::NONE)
|
||||
{
|
||||
if(_nrows != 1)
|
||||
threading::set_thread_name(TIMEMORY_JOIN(" ", _cpname, _idx).c_str());
|
||||
else
|
||||
threading::set_thread_name(_cpname.c_str());
|
||||
}
|
||||
// ensure all hash ids exist
|
||||
copy_hash_ids();
|
||||
std::set<entry> _used{};
|
||||
std::string _dev = (DevT == Device::NONE) ? ""
|
||||
: (DevT == Device::ANY) ? "CPU + GPU "
|
||||
: (DevT == Device::CPU) ? "CPU "
|
||||
: "GPU ";
|
||||
|
||||
using category_t = std::conditional_t<
|
||||
DevT == Device::ANY, omnitrace::category::critical_trace,
|
||||
std::conditional_t<DevT == Device::CPU, omnitrace::category::host_critical_trace,
|
||||
omnitrace::category::device_critical_trace>>;
|
||||
|
||||
// ensure all hash ids exist
|
||||
copy_hash_ids();
|
||||
std::set<entry> _used{};
|
||||
|
||||
auto _func = [&](size_t _idx, size_t _beg, size_t _end) {
|
||||
auto&& _name_generator = [](auto _dev_type, auto _rows, auto _idx_v) {
|
||||
return (_rows < 2)
|
||||
? TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path")
|
||||
: TIMEMORY_JOIN(" ", std::to_string(_dev_type), "Critical Path",
|
||||
_idx_v);
|
||||
};
|
||||
auto _track =
|
||||
(DevT == Device::NONE)
|
||||
? ::perfetto::ProcessTrack::Current()
|
||||
: omnitrace::tracing::get_perfetto_track(
|
||||
category_t{}, std::move(_name_generator), DevT, _nrows, _idx);
|
||||
|
||||
for(size_t i = _beg; i < _end; ++i)
|
||||
{
|
||||
if(i >= _data.size()) break;
|
||||
_data.at(i).generate_perfetto<DevT>(_used);
|
||||
_data.at(i).generate_perfetto<DevT>(_track, _used);
|
||||
}
|
||||
};
|
||||
|
||||
for(size_t i = 0; i < _data.size(); i += _nrows)
|
||||
{
|
||||
if(DevT == Device::NONE)
|
||||
_func(i, i, i + _nrows);
|
||||
else
|
||||
std::thread{ _func, i, i, i + _nrows }.join();
|
||||
}
|
||||
_func(i, i, i + _nrows);
|
||||
}
|
||||
|
||||
template <typename Tp, template <typename...> class ContainerT, typename... Args,
|
||||
|
||||
@@ -137,6 +137,13 @@ backtrace_metrics::description()
|
||||
return "Records sampling data";
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
backtrace_metrics::get_hw_counter_labels(int64_t _tid)
|
||||
{
|
||||
auto& _v = get_papi_labels(_tid);
|
||||
return (_v) ? *_v : std::vector<std::string>{};
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::start()
|
||||
{}
|
||||
@@ -192,10 +199,8 @@ backtrace_metrics::configure(bool _setup, int64_t _tid)
|
||||
OMNITRACE_DEBUG("HW COUNTER: starting...\n");
|
||||
if(get_papi_vector(_tid))
|
||||
{
|
||||
using common_type_t = typename hw_counters::common_type;
|
||||
get_papi_vector(_tid)->start();
|
||||
*get_papi_labels(_tid) =
|
||||
comp::papi_common<common_type_t>::get_config()->labels;
|
||||
*get_papi_labels(_tid) = get_papi_vector(_tid)->get_config()->labels;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,9 +71,10 @@ struct backtrace_metrics
|
||||
backtrace_metrics& operator=(const backtrace_metrics&) = default;
|
||||
backtrace_metrics& operator=(backtrace_metrics&&) noexcept = default;
|
||||
|
||||
static void configure(bool, int64_t _tid = threading::get_id());
|
||||
static void init_perfetto(int64_t _tid);
|
||||
static void fini_perfetto(int64_t _tid);
|
||||
static void configure(bool, int64_t _tid = threading::get_id());
|
||||
static void init_perfetto(int64_t _tid);
|
||||
static void fini_perfetto(int64_t _tid);
|
||||
static std::vector<std::string> get_hw_counter_labels(int64_t);
|
||||
|
||||
static void start();
|
||||
static void stop();
|
||||
|
||||
@@ -50,6 +50,8 @@ exit_gotcha::configure()
|
||||
|
||||
namespace
|
||||
{
|
||||
auto _exit_info = exit_gotcha::exit_info{};
|
||||
|
||||
template <typename FuncT, typename... Args>
|
||||
void
|
||||
invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _args)
|
||||
@@ -87,6 +89,7 @@ invoke_exit_gotcha(const exit_gotcha::gotcha_data& _data, FuncT _func, Args... _
|
||||
void
|
||||
exit_gotcha::operator()(const gotcha_data& _data, exit_func_t _func, int _ec) const
|
||||
{
|
||||
_exit_info = { true, _data.tool_id.find("quick") != std::string::npos, _ec };
|
||||
invoke_exit_gotcha(_data, _func, _ec);
|
||||
}
|
||||
|
||||
@@ -96,5 +99,11 @@ exit_gotcha::operator()(const gotcha_data& _data, abort_func_t _func) const
|
||||
{
|
||||
invoke_exit_gotcha(_data, _func);
|
||||
}
|
||||
|
||||
exit_gotcha::exit_info
|
||||
exit_gotcha::get_exit_info()
|
||||
{
|
||||
return _exit_info;
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace omnitrace
|
||||
|
||||
@@ -60,6 +60,14 @@ struct exit_gotcha : tim::component::base<exit_gotcha, void>
|
||||
void operator()(const gotcha_data&, exit_func_t, int) const;
|
||||
// abort
|
||||
void operator()(const gotcha_data&, abort_func_t) const;
|
||||
|
||||
struct exit_info
|
||||
{
|
||||
bool is_known = false;
|
||||
bool is_quick = false;
|
||||
int exit_code = EXIT_SUCCESS;
|
||||
};
|
||||
static exit_info get_exit_info();
|
||||
};
|
||||
} // namespace component
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
#include "library/tracing/annotation.hpp"
|
||||
|
||||
#include <PTL/ThreadPool.hh>
|
||||
#include <timemory/backends/dmp.hpp>
|
||||
@@ -438,99 +440,82 @@ call_chain::get_top_chains()
|
||||
return _v;
|
||||
}
|
||||
|
||||
template <>
|
||||
template <Device DevT>
|
||||
void
|
||||
call_chain::generate_perfetto<Device::NONE>(std::set<entry>& _used) const
|
||||
call_chain::generate_perfetto(::perfetto::Track _track, std::set<entry>& _used) const
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
|
||||
static std::set<std::string> _static_strings{};
|
||||
static std::mutex _static_mutex{};
|
||||
|
||||
for(const auto& itr : *this)
|
||||
{
|
||||
if(!_used.emplace(itr).second) continue;
|
||||
if(itr.device == Device::CPU)
|
||||
|
||||
auto&& _annotater = [&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", itr.begin_ns);
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", itr.end_ns);
|
||||
}
|
||||
};
|
||||
|
||||
if constexpr(DevT == Device::NONE)
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device-critical-trace", "CPU",
|
||||
static_cast<uint64_t>(itr.begin_ns), "begin_ns",
|
||||
itr.begin_ns);
|
||||
if(itr.device == Device::CPU)
|
||||
{
|
||||
tracing::push_perfetto_track(category::host_critical_trace{}, "CPU",
|
||||
_track, itr.begin_ns, std::move(_annotater));
|
||||
tracing::pop_perfetto_track(category::host_critical_trace{}, "CPU",
|
||||
_track, itr.end_ns);
|
||||
}
|
||||
else if(itr.device == Device::GPU)
|
||||
{
|
||||
tracing::push_perfetto_track(category::device_critical_trace{}, "GPU",
|
||||
_track, itr.begin_ns, std::move(_annotater));
|
||||
tracing::pop_perfetto_track(category::device_critical_trace{}, "GPU",
|
||||
_track, itr.end_ns);
|
||||
}
|
||||
}
|
||||
else if(itr.device == Device::GPU)
|
||||
else
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device-critical-trace", "GPU",
|
||||
static_cast<uint64_t>(itr.begin_ns), "begin_ns",
|
||||
itr.begin_ns);
|
||||
using category_t = std::conditional_t<
|
||||
DevT == Device::ANY, omnitrace::category::critical_trace,
|
||||
std::conditional_t<DevT == Device::CPU,
|
||||
omnitrace::category::host_critical_trace,
|
||||
omnitrace::category::device_critical_trace>>;
|
||||
|
||||
if constexpr(DevT != Device::ANY)
|
||||
{
|
||||
if(itr.device != DevT) continue;
|
||||
}
|
||||
|
||||
std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash));
|
||||
_static_mutex.lock();
|
||||
auto sitr = _static_strings.emplace(_name);
|
||||
_static_mutex.unlock();
|
||||
|
||||
tracing::push_perfetto_track(category_t{}, sitr.first->c_str(), _track,
|
||||
itr.begin_ns, std::move(_annotater));
|
||||
tracing::pop_perfetto_track(category_t{}, sitr.first->c_str(), _track,
|
||||
itr.end_ns);
|
||||
}
|
||||
TRACE_EVENT_END("device-critical-trace", static_cast<uint64_t>(itr.end_ns),
|
||||
"end_ns", itr.end_ns);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void
|
||||
call_chain::generate_perfetto<Device::CPU>(std::set<entry>& _used) const
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
static std::set<std::string> _static_strings{};
|
||||
static std::mutex _static_mutex{};
|
||||
for(const auto& itr : *this)
|
||||
{
|
||||
if(!_used.emplace(itr).second) continue;
|
||||
if(itr.device != Device::CPU) continue;
|
||||
std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash));
|
||||
_static_mutex.lock();
|
||||
auto sitr = _static_strings.emplace(_name);
|
||||
_static_mutex.unlock();
|
||||
TRACE_EVENT_BEGIN("host-critical-trace",
|
||||
perfetto::StaticString{ sitr.first->c_str() },
|
||||
static_cast<uint64_t>(itr.begin_ns), "begin_ns",
|
||||
static_cast<uint64_t>(itr.begin_ns));
|
||||
TRACE_EVENT_END("host-critical-trace", static_cast<uint64_t>(itr.end_ns),
|
||||
"end_ns", static_cast<uint64_t>(itr.end_ns));
|
||||
}
|
||||
}
|
||||
// explicit instantiations
|
||||
template void
|
||||
call_chain::generate_perfetto<Device::NONE>(::perfetto::Track, std::set<entry>&) const;
|
||||
|
||||
template <>
|
||||
void
|
||||
call_chain::generate_perfetto<Device::GPU>(std::set<entry>& _used) const
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
static std::set<std::string> _static_strings{};
|
||||
static std::mutex _static_mutex{};
|
||||
for(const auto& itr : *this)
|
||||
{
|
||||
if(!_used.emplace(itr).second) continue;
|
||||
if(itr.device != Device::GPU) continue;
|
||||
std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash));
|
||||
_static_mutex.lock();
|
||||
auto sitr = _static_strings.emplace(_name);
|
||||
_static_mutex.unlock();
|
||||
TRACE_EVENT_BEGIN("device-critical-trace",
|
||||
perfetto::StaticString{ sitr.first->c_str() },
|
||||
static_cast<uint64_t>(itr.begin_ns), "begin_ns",
|
||||
static_cast<uint64_t>(itr.begin_ns));
|
||||
TRACE_EVENT_END("device-critical-trace", static_cast<uint64_t>(itr.end_ns),
|
||||
"end_ns", static_cast<uint64_t>(itr.end_ns));
|
||||
}
|
||||
}
|
||||
template void
|
||||
call_chain::generate_perfetto<Device::CPU>(::perfetto::Track, std::set<entry>&) const;
|
||||
|
||||
template <>
|
||||
void
|
||||
call_chain::generate_perfetto<Device::ANY>(std::set<entry>& _used) const
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
static std::set<std::string> _static_strings{};
|
||||
static std::mutex _static_mutex{};
|
||||
for(const auto& itr : *this)
|
||||
{
|
||||
if(!_used.emplace(itr).second) continue;
|
||||
std::string _name = tim::demangle(tim::get_hash_identifier(itr.hash));
|
||||
_static_mutex.lock();
|
||||
auto sitr = _static_strings.emplace(_name);
|
||||
_static_mutex.unlock();
|
||||
TRACE_EVENT_BEGIN("critical-trace", perfetto::StaticString{ sitr.first->c_str() },
|
||||
static_cast<uint64_t>(itr.begin_ns), "begin_ns",
|
||||
static_cast<uint64_t>(itr.begin_ns));
|
||||
TRACE_EVENT_END("critical-trace", static_cast<uint64_t>(itr.end_ns), "end_ns",
|
||||
static_cast<uint64_t>(itr.end_ns));
|
||||
}
|
||||
}
|
||||
template void
|
||||
call_chain::generate_perfetto<Device::GPU>(::perfetto::Track, std::set<entry>&) const;
|
||||
|
||||
template void
|
||||
call_chain::generate_perfetto<Device::ANY>(::perfetto::Track, std::set<entry>&) const;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
//
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "library/common.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/defines.hpp"
|
||||
#include "library/perfetto.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
|
||||
@@ -245,7 +246,7 @@ struct call_chain : private std::vector<entry>
|
||||
}
|
||||
|
||||
template <Device DevT>
|
||||
void generate_perfetto(std::set<entry>& _used) const;
|
||||
void generate_perfetto(::perfetto::Track, std::set<entry>& _used) const;
|
||||
|
||||
template <bool BoolV = true, typename FuncT>
|
||||
bool query(FuncT&&) const;
|
||||
@@ -366,3 +367,20 @@ add_critical_trace(int32_t _targ_tid, size_t _cpu_cid, size_t _gpu_cid,
|
||||
_ts_val, _devid, _queue, _hash, _depth, _prio, num_mutexes);
|
||||
}
|
||||
} // namespace omnitrace
|
||||
|
||||
namespace std
|
||||
{
|
||||
inline std::string
|
||||
to_string(::omnitrace::critical_trace::Device _v)
|
||||
{
|
||||
using Device = ::omnitrace::critical_trace::Device;
|
||||
switch(_v)
|
||||
{
|
||||
case Device::NONE: return std::string{};
|
||||
case Device::CPU: return std::string{ "CPU" };
|
||||
case Device::GPU: return std::string{ "GPU" };
|
||||
case Device::ANY: return std::string{ "CPU + GPU" };
|
||||
}
|
||||
return std::string{ "Unknown Device" };
|
||||
}
|
||||
} // namespace std
|
||||
|
||||
@@ -60,6 +60,7 @@ namespace
|
||||
std::unique_ptr<ompt_bundle_t> f_bundle = {};
|
||||
bool _init_toolset_off = (trait::runtime_enabled<ompt_toolset_t>::set(false),
|
||||
trait::runtime_enabled<ompt_context_t>::set(false), true);
|
||||
tim::ompt::finalize_tool_func_t f_finalize = nullptr;
|
||||
} // namespace
|
||||
|
||||
void
|
||||
@@ -79,6 +80,9 @@ setup()
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
static bool _protect = false;
|
||||
if(_protect) return;
|
||||
_protect = true;
|
||||
if(f_bundle)
|
||||
{
|
||||
f_bundle->stop();
|
||||
@@ -86,21 +90,26 @@ shutdown()
|
||||
trait::runtime_enabled<ompt_toolset_t>::set(false);
|
||||
trait::runtime_enabled<ompt_context_t>::set(false);
|
||||
comp::user_ompt_bundle::reset();
|
||||
// call the OMPT finalize callback
|
||||
if(f_finalize) (*f_finalize)();
|
||||
}
|
||||
f_bundle.reset();
|
||||
_protect = false;
|
||||
}
|
||||
} // namespace ompt
|
||||
} // namespace omnitrace
|
||||
|
||||
extern "C" ompt_start_tool_result_t*
|
||||
ompt_start_tool(unsigned int omp_version, const char* runtime_version)
|
||||
namespace
|
||||
{
|
||||
TIMEMORY_PRINTF(stderr, "OpenMP version: %u, runtime version: %s\n", omp_version,
|
||||
runtime_version);
|
||||
|
||||
OMNITRACE_METADATA("OMP_VERSION", omp_version);
|
||||
OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version);
|
||||
bool&
|
||||
use_tool()
|
||||
{
|
||||
static bool _v = false;
|
||||
return _v;
|
||||
}
|
||||
|
||||
int
|
||||
tool_initialize(ompt_function_lookup_t lookup, int initial_device_num,
|
||||
ompt_data_t* tool_data)
|
||||
{
|
||||
if(!omnitrace::settings_are_configured())
|
||||
{
|
||||
OMNITRACE_BASIC_WARNING(
|
||||
@@ -111,25 +120,38 @@ ompt_start_tool(unsigned int omp_version, const char* runtime_version)
|
||||
omnitrace::configure_settings();
|
||||
}
|
||||
|
||||
static bool _use_ompt = omnitrace::config::get_use_ompt();
|
||||
static auto ompt_initialize = [](ompt_function_lookup_t lookup,
|
||||
int initial_device_num,
|
||||
ompt_data_t* tool_data) -> int {
|
||||
_use_ompt = omnitrace::config::get_use_ompt();
|
||||
if(_use_ompt)
|
||||
{
|
||||
TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n",
|
||||
initial_device_num);
|
||||
tim::ompt::configure<TIMEMORY_OMPT_API_TAG>(lookup, initial_device_num,
|
||||
tool_data);
|
||||
}
|
||||
return 1; // success
|
||||
};
|
||||
use_tool() = omnitrace::config::get_use_ompt();
|
||||
if(use_tool())
|
||||
{
|
||||
TIMEMORY_PRINTF(stderr, "OpenMP-tools configuring for initial device %i\n\n",
|
||||
initial_device_num);
|
||||
f_finalize = tim::ompt::configure<TIMEMORY_OMPT_API_TAG>(
|
||||
lookup, initial_device_num, tool_data);
|
||||
}
|
||||
return 1; // success
|
||||
}
|
||||
|
||||
static auto ompt_finalize = [](ompt_data_t*) {};
|
||||
void
|
||||
tool_finalize(ompt_data_t*)
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
} // namespace
|
||||
} // namespace ompt
|
||||
} // namespace omnitrace
|
||||
|
||||
static auto data = ompt_start_tool_result_t{ ompt_initialize, ompt_finalize, { 0 } };
|
||||
return (ompt_start_tool_result_t*) &data;
|
||||
extern "C" ompt_start_tool_result_t*
|
||||
ompt_start_tool(unsigned int omp_version, const char* runtime_version)
|
||||
{
|
||||
OMNITRACE_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n", omp_version,
|
||||
runtime_version);
|
||||
OMNITRACE_METADATA("OMP_VERSION", omp_version);
|
||||
OMNITRACE_METADATA("OMP_RUNTIME_VERSION", runtime_version);
|
||||
|
||||
static auto* data = new ompt_start_tool_result_t{ &omnitrace::ompt::tool_initialize,
|
||||
&omnitrace::ompt::tool_finalize,
|
||||
{ 0 } };
|
||||
return data;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
@@ -806,155 +806,166 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init,
|
||||
backtrace_metrics::fini_perfetto(_tid);
|
||||
}
|
||||
|
||||
auto _process_perfetto = [_tid,
|
||||
_init](const std::vector<sampling::bundle_t*>& _data_v) {
|
||||
thread_info::init(true);
|
||||
OMNITRACE_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing backtraces for perfetto...\n", _tid);
|
||||
OMNITRACE_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing backtraces for perfetto...\n", _tid);
|
||||
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
OMNITRACE_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
OMNITRACE_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
|
||||
|
||||
if(!_thread_info) return;
|
||||
if(!_thread_info) return;
|
||||
|
||||
uint64_t _beg_ns = _thread_info->get_start();
|
||||
uint64_t _end_ns = _thread_info->get_stop();
|
||||
uint64_t _last_ts = std::max<uint64_t>(
|
||||
_init->get<backtrace_timestamp>()->get_timestamp(), _beg_ns);
|
||||
uint64_t _beg_ns = _thread_info->get_start();
|
||||
uint64_t _end_ns = _thread_info->get_stop();
|
||||
uint64_t _last_ts =
|
||||
std::max<uint64_t>(_init->get<backtrace_timestamp>()->get_timestamp(), _beg_ns);
|
||||
|
||||
tracing::push_perfetto_ts(category::sampling{}, "samples [omnitrace]", _beg_ns,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, "begin_ns", _beg_ns);
|
||||
}
|
||||
});
|
||||
auto _track = tracing::get_perfetto_track(
|
||||
category::sampling{},
|
||||
[](auto _seq_id, auto _sys_id) {
|
||||
return TIMEMORY_JOIN(" ", "Thread", _seq_id, "(S)", _sys_id);
|
||||
},
|
||||
_thread_info->index_data->sequent_value, _thread_info->index_data->system_value);
|
||||
|
||||
auto _as_hex = [](auto _v) { return JOIN("", "0x", std::hex, _v); };
|
||||
tracing::push_perfetto_track(category::sampling{}, "samples [omnitrace]", _track,
|
||||
_beg_ns, [&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns",
|
||||
_beg_ns);
|
||||
}
|
||||
});
|
||||
|
||||
for(const auto& itr : _data_v)
|
||||
auto _as_hex = [](auto _v) { return JOIN("", "0x", std::hex, _v); };
|
||||
|
||||
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
const auto* _bt_ts = itr->get<backtrace_timestamp>();
|
||||
const auto* _bt_cs = itr->get<backtrace>();
|
||||
const auto* _bt_mt = itr->get<backtrace_metrics>();
|
||||
|
||||
if(!_bt_ts || !_bt_cs || !_bt_mt) continue;
|
||||
if(_bt_ts->get_tid() != _tid) continue;
|
||||
|
||||
static std::set<std::string> _static_strings{};
|
||||
const auto* _last = _init;
|
||||
auto _patched_data = backtrace::filter_and_patch(_bt_cs->get());
|
||||
size_t _ncount = 0;
|
||||
for(const auto& iitr : _patched_data)
|
||||
{
|
||||
const auto* _bt_ts = itr->get<backtrace_timestamp>();
|
||||
const auto* _bt_cs = itr->get<backtrace>();
|
||||
uint64_t _beg = _last_ts;
|
||||
uint64_t _end = _bt_ts->get_timestamp();
|
||||
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
|
||||
|
||||
if(!_bt_ts || !_bt_cs) continue;
|
||||
if(_bt_ts->get_tid() != _tid) continue;
|
||||
auto _ncur = _ncount++;
|
||||
// the begin/end + HW counters will be same for entire call-stack so only
|
||||
// annotate
|
||||
// the top and the bottom functons to keep the data consumption low
|
||||
bool _include_common = (_ncur == 0 || _ncur + 1 == _patched_data.size());
|
||||
|
||||
static std::set<std::string> _static_strings{};
|
||||
for(const auto& iitr : backtrace::filter_and_patch(_bt_cs->get()))
|
||||
{
|
||||
uint64_t _beg = _last_ts;
|
||||
uint64_t _end = _bt_ts->get_timestamp();
|
||||
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
|
||||
// Only annotate HW counters when:
|
||||
// 1. when we can compute a difference from the last sample
|
||||
// 2. when the number of HW counters b/t this sample and last are the same
|
||||
bool _include_hw =
|
||||
_include_common && (_last != nullptr) &&
|
||||
_bt_mt->get_hw_counters().size() ==
|
||||
_last->get<backtrace_metrics>()->get_hw_counters().size();
|
||||
|
||||
if(get_sampling_include_inlines() && iitr.lineinfo)
|
||||
// annotations common to both modes
|
||||
auto _common_annotate = [&](::perfetto::EventContext& ctx, bool _is_last) {
|
||||
if(_include_common && _is_last)
|
||||
{
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
{
|
||||
const auto* _name =
|
||||
_static_strings.emplace(demangle(litr.name)).first->c_str();
|
||||
auto _info = JOIN(':', litr.location, litr.line);
|
||||
tracing::push_perfetto_ts(
|
||||
category::sampling{}, _name, _beg,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns",
|
||||
_beg);
|
||||
tracing::add_perfetto_annotation(ctx, "lineinfo",
|
||||
_info);
|
||||
tracing::add_perfetto_annotation(ctx, "inlined",
|
||||
(_n++ > 0));
|
||||
}
|
||||
});
|
||||
tracing::pop_perfetto_ts(
|
||||
category::sampling{}, _name, _end,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _end);
|
||||
}
|
||||
});
|
||||
}
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg);
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _end);
|
||||
}
|
||||
else
|
||||
if(_include_hw && _is_last)
|
||||
{
|
||||
const auto* _name = _static_strings.emplace(iitr.name).first->c_str();
|
||||
tracing::push_perfetto_ts(
|
||||
category::sampling{}, _name, _beg,
|
||||
// current values when read
|
||||
auto _hw_cnt_vals = _bt_mt->get_hw_counters();
|
||||
// compute difference from last sample to provide the HW counters for
|
||||
// this sample
|
||||
tim::math::minus(_hw_cnt_vals,
|
||||
_last->get<backtrace_metrics>()->get_hw_counters());
|
||||
for(size_t i = 0; i < _labels.size(); ++i)
|
||||
tracing::add_perfetto_annotation(ctx, _labels.at(i),
|
||||
_hw_cnt_vals.at(i));
|
||||
}
|
||||
};
|
||||
|
||||
if(get_sampling_include_inlines() && iitr.lineinfo)
|
||||
{
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
{
|
||||
const auto* _name =
|
||||
_static_strings.emplace(demangle(litr.name)).first->c_str();
|
||||
auto _info = JOIN(':', litr.location, litr.line);
|
||||
tracing::push_perfetto_track(
|
||||
category::sampling{}, _name, _track, _beg,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg);
|
||||
_common_annotate(ctx, (_n == 0 && _ncur == 0) ||
|
||||
(_n + 1 == _lines.size()));
|
||||
tracing::add_perfetto_annotation(ctx, "file",
|
||||
iitr.location);
|
||||
tracing::add_perfetto_annotation(ctx, "pc",
|
||||
_as_hex(iitr.address));
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, "line_address", _as_hex(iitr.line_address));
|
||||
if(iitr.lineinfo)
|
||||
{
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
{
|
||||
auto _label = JOIN('-', "lineinfo", _n++);
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, _label.c_str(),
|
||||
JOIN('@', demangle(litr.name),
|
||||
JOIN(':', litr.location, litr.line)));
|
||||
}
|
||||
}
|
||||
tracing::add_perfetto_annotation(ctx, "lineinfo", _info);
|
||||
tracing::add_perfetto_annotation(ctx, "inlined",
|
||||
(_n++ > 0));
|
||||
}
|
||||
});
|
||||
|
||||
tracing::pop_perfetto_ts(category::sampling{}, _name, _end,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, "end_ns", _end);
|
||||
}
|
||||
});
|
||||
tracing::pop_perfetto_track(category::sampling{}, _name, _track,
|
||||
_end);
|
||||
}
|
||||
}
|
||||
_last_ts = _bt_ts->get_timestamp();
|
||||
else
|
||||
{
|
||||
const auto* _name = _static_strings.emplace(iitr.name).first->c_str();
|
||||
tracing::push_perfetto_track(
|
||||
category::sampling{}, _name, _track, _beg,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
_common_annotate(ctx, true);
|
||||
tracing::add_perfetto_annotation(ctx, "file", iitr.location);
|
||||
tracing::add_perfetto_annotation(ctx, "pc",
|
||||
_as_hex(iitr.address));
|
||||
tracing::add_perfetto_annotation(ctx, "line_address",
|
||||
_as_hex(iitr.line_address));
|
||||
if(iitr.lineinfo)
|
||||
{
|
||||
auto _lines = iitr.lineinfo.lines;
|
||||
std::reverse(_lines.begin(), _lines.end());
|
||||
size_t _n = 0;
|
||||
for(const auto& litr : _lines)
|
||||
{
|
||||
auto _label = JOIN('-', "lineinfo", _n++);
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, _label.c_str(),
|
||||
JOIN('@', demangle(litr.name),
|
||||
JOIN(':', litr.location, litr.line)));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
tracing::pop_perfetto_track(category::sampling{}, _name, _track, _end);
|
||||
}
|
||||
}
|
||||
_last_ts = _bt_ts->get_timestamp();
|
||||
_last = itr;
|
||||
}
|
||||
|
||||
tracing::pop_perfetto_ts(category::sampling{}, "samples [omnitrace]", _end_ns,
|
||||
[&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns",
|
||||
_end_ns);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
auto _processing_thread = threading::get_tid();
|
||||
auto _process_perfetto_wrapper = [&]() {
|
||||
if(threading::get_tid() != _processing_thread)
|
||||
threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid, "(S)").c_str());
|
||||
|
||||
try
|
||||
{
|
||||
_process_perfetto(_data);
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
OMNITRACE_PRINT("[sampling][post_process_perfetto] Exception: %s\n",
|
||||
_e.what());
|
||||
OMNITRACE_CI_ABORT(true, "[sampling][post_process_perfetto] Exception: %s\n",
|
||||
_e.what());
|
||||
}
|
||||
};
|
||||
|
||||
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
std::thread{ _process_perfetto_wrapper }.join();
|
||||
tracing::pop_perfetto_track(category::sampling{}, "samples [omnitrace]", _track,
|
||||
_end_ns, [&](perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns",
|
||||
_end_ns);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Ссылка в новой задаче
Block a user