1688a027d8
- additional miscellaneous tweaks to workflows and docker scripts, e.g. install perfetto python bindings - improves the stability of MPI finalization - reduces some debug messages within timemory when `OMNITRACE_DEBUG=ON` - fixes issue found in RHEL where libunwind is using mutex and omnitrace was not treating this as an internal mutex call - this may have been affecting the causal profiling slightly (tests seem a bit more stable now) - fix data race in timemory * Add RedHat CI and release packaging - additional miscellaneous tweaks to workflows and docker scripts, e.g. install perfetto python bindings * Fix URL for ROCm packages in redhat workflow * Fix dnf --enable-repo for ROCm perl packages * Dockerfile.rhel and redhat.yml updates - Fix dnf repo for ROCm PERL packages - Disable python in CI (interpreter segfaults) - Exclude parallel-overhead-locks tests due to inclusion of internal locks - This needs to be remedied in the future * Exclude _dl_relocate_static_pie from instrumentation * Testing updates - OMNITRACE_SAMPLING_KEEP_INTERNAL=OFF for parallel-overhead-locks * Fix redhat workflow * redhat.yml update - remove if condition on config/build/test step * Update timemory submodule - tweaks to verbosity messages * Set thread state before unw_step - on Redhat, unw_step calls mutex * Update timemory submodule - verbosity changes - gotcha uses spin_lock/spin_mutex * Remove using gsplit-dwarf unless OMNITRACE_BUILD_NUMBER > 2 * Re-enable parallel-overhead-locks tests in redhat workflow * Always disable timemory manager metadata auto output * testing updates - tweak parallel-overhead-locks-timemory to higher instruction count min - OMNITRACE_SAMPLING_KEEP_INTERNAL=OFF for parallel-overhead-locks-perfetto * Update timemory submodule - quiet realpath queries * omnitrace exe updates - detect text files - improved bin/lib locating * cmake format * test-install.sh and redhat workflow updates - handle testing when ls is script - re-enable python testing on redhat workflow - invoke test-install.sh in redhat workflow * Misc guards for finalization * omnitrace-exe, testing updates - test-install.sh: LS_EXEC -> LS_NAME - handle /usr/bin/ls being script in source/bin/tests - improve locating the binary * Fix mpi_gotcha compile error * omnitrace-exe updates - improve file locating * formatting * Misc fixes - remove -static-libstdc++ for RHEL packaging (rocky-linux doesn't distribute static lib) * omnitrace-exe paths * Replace realpath with absolute - using absolute path to symlink fixes issues with locating libdyninstAPI_RT at runtime * omnitrace exe updates - judicious use of realpath * Update timemory submodule - fix update main hash ids/aliases data race in merge * bin tests update - change working directory of omnitrace-exe-simulate-lib-basename * omnitrace exe updates - Update resolved exe/lib messaging * bin tests update - change working directory of omnitrace-exe-simulate-lib-basename
209 linhas
6.6 KiB
C++
209 linhas
6.6 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "library/causal/components/backtrace.hpp"
|
|
#include "core/concepts.hpp"
|
|
#include "core/config.hpp"
|
|
#include "core/debug.hpp"
|
|
#include "core/state.hpp"
|
|
#include "core/utility.hpp"
|
|
#include "library/causal/data.hpp"
|
|
#include "library/causal/delay.hpp"
|
|
#include "library/causal/experiment.hpp"
|
|
#include "library/runtime.hpp"
|
|
#include "library/thread_data.hpp"
|
|
#include "library/thread_info.hpp"
|
|
#include "library/tracing.hpp"
|
|
|
|
#include <timemory/components/timing/backends.hpp>
|
|
#include <timemory/components/timing/wall_clock.hpp>
|
|
#include <timemory/mpl/concepts.hpp>
|
|
#include <timemory/mpl/type_traits.hpp>
|
|
#include <timemory/mpl/types.hpp>
|
|
#include <timemory/process/threading.hpp>
|
|
#include <timemory/units.hpp>
|
|
#include <timemory/utility/backtrace.hpp>
|
|
|
|
#include <atomic>
|
|
#include <ctime>
|
|
#include <type_traits>
|
|
|
|
namespace omnitrace
|
|
{
|
|
namespace causal
|
|
{
|
|
namespace component
|
|
{
|
|
namespace
|
|
{
|
|
using ::tim::backtrace::get_unw_signal_frame_stack_raw;
|
|
|
|
auto&
|
|
get_delay_statistics()
|
|
{
|
|
using thread_data_t =
|
|
thread_data<identity<tim::statistics<int64_t>>, category::sampling>;
|
|
|
|
static_assert(
|
|
use_placement_new_when_generating_unique_ptr<thread_data_t>::value,
|
|
"delay statistics thread data should use placement new to allocate unique_ptr");
|
|
|
|
static auto& _v = thread_data_t::instance(construct_on_init{});
|
|
return _v;
|
|
}
|
|
} // namespace
|
|
|
|
void
|
|
backtrace::start()
|
|
{
|
|
// do not delete these lines. The thread data needs to be allocated
|
|
// before it is called in sampler or else a deadlock will occur when
|
|
// the sample interrupts a malloc call
|
|
(void) get_delay_statistics();
|
|
}
|
|
|
|
void
|
|
backtrace::stop()
|
|
{}
|
|
|
|
void
|
|
sample_rate::sample(int _sig)
|
|
{
|
|
if(_sig != get_realtime_signal()) return;
|
|
|
|
// update the last sample for backtrace signal(s) even when in use
|
|
static thread_local int64_t _last_sample = 0;
|
|
|
|
auto _this_sample = tracing::now();
|
|
auto& _period_stat = get_delay_statistics()->at(threading::get_id());
|
|
if(_last_sample > 0) _period_stat += (_this_sample - _last_sample);
|
|
_last_sample = _this_sample;
|
|
}
|
|
|
|
void
|
|
backtrace::sample(int _sig)
|
|
{
|
|
constexpr size_t depth = ::omnitrace::causal::unwind_depth;
|
|
constexpr int64_t ignore_depth = ::omnitrace::causal::unwind_offset;
|
|
|
|
// update the last sample for backtrace signal(s) even when in use
|
|
static thread_local size_t _protect_flag = 0;
|
|
|
|
// sampling_guard _guard{};
|
|
|
|
if((_protect_flag & 1) == 1 ||
|
|
OMNITRACE_UNLIKELY(!trait::runtime_enabled<causal::component::backtrace>::get()))
|
|
{
|
|
return;
|
|
}
|
|
|
|
++_protect_flag;
|
|
// on RedHat, the unw_step within get_unw_signal_frame_stack_raw involves a mutex lock
|
|
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
|
m_index = causal::experiment::get_index();
|
|
m_stack = get_unw_signal_frame_stack_raw<depth, ignore_depth>();
|
|
|
|
// the batch handler timer delivers a signal according to the thread CPU
|
|
// clock, ensuring that setting the current selection and processing the
|
|
// delays only happens when the thread is active
|
|
if(_sig == get_cputime_signal())
|
|
{
|
|
if(!causal::experiment::is_active())
|
|
causal::set_current_selection(m_stack);
|
|
else
|
|
causal::delay::process();
|
|
}
|
|
else if(_sig == get_realtime_signal())
|
|
{
|
|
static thread_local auto _tid = threading::get_id();
|
|
auto& _period_stat = get_delay_statistics()->at(_tid);
|
|
|
|
if(causal::experiment::is_active() && causal::experiment::is_selected(m_stack))
|
|
{
|
|
m_selected = true;
|
|
causal::experiment::add_selected();
|
|
// compute the delay time based on the rate of taking samples,
|
|
// unless we have taken less than 10, in which case, we just
|
|
// use the pre-computed value.
|
|
auto _delay =
|
|
(_period_stat.get_count() < 10)
|
|
? causal::experiment::get_delay()
|
|
: (_period_stat.get_mean() * causal::experiment::get_delay_scaling());
|
|
causal::delay::get_local() += _delay;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
OMNITRACE_THROW("unhandled signal %i\n", _sig);
|
|
}
|
|
|
|
++_protect_flag;
|
|
}
|
|
|
|
template <typename Tp>
|
|
Tp
|
|
backtrace::get_period(uint64_t _units)
|
|
{
|
|
using cast_type = std::conditional_t<std::is_floating_point<Tp>::value, Tp, double>;
|
|
|
|
double _realtime_freq =
|
|
(get_use_sampling_realtime()) ? get_sampling_real_freq() : 0.0;
|
|
double _cputime_freq = (get_use_sampling_cputime()) ? get_sampling_cpu_freq() : 0.0;
|
|
|
|
auto _freq = std::max<double>(_realtime_freq, _cputime_freq);
|
|
double _period = 1.0 / _freq;
|
|
int64_t _period_nsec = static_cast<int64_t>(_period * units::sec) % units::sec;
|
|
return static_cast<Tp>(_period_nsec) / static_cast<cast_type>(_units);
|
|
}
|
|
|
|
tim::statistics<int64_t>
|
|
backtrace::get_period_stats()
|
|
{
|
|
auto _data = tim::statistics<int64_t>{};
|
|
if(!get_delay_statistics()) return _data;
|
|
for(auto itr : *get_delay_statistics())
|
|
{
|
|
if(itr.get_count() > 1) _data += itr;
|
|
}
|
|
return _data;
|
|
}
|
|
|
|
void
|
|
backtrace::reset_period_stats()
|
|
{
|
|
for(auto& itr : *get_delay_statistics())
|
|
{
|
|
itr.reset();
|
|
}
|
|
}
|
|
} // namespace component
|
|
} // namespace causal
|
|
} // namespace omnitrace
|
|
|
|
#define INSTANTIATE_BT_CAUSAL_PERIOD(TYPE) \
|
|
template TYPE omnitrace::causal::component::backtrace::get_period<TYPE>(uint64_t);
|
|
|
|
INSTANTIATE_BT_CAUSAL_PERIOD(float)
|
|
INSTANTIATE_BT_CAUSAL_PERIOD(double)
|
|
INSTANTIATE_BT_CAUSAL_PERIOD(int64_t)
|
|
INSTANTIATE_BT_CAUSAL_PERIOD(uint64_t)
|