3e2fa69a14
* Update perfetto args.gn.in - remove enable_perfetto_tools_trace_to_text (unused) * core timeout implementation - requires OMNITRACE_CI=ON - requires OMNITRACE_CI_TIMEOUT=<sec> - adds pthread_self and std::this_thread::get_id to thread info - pthread_create_gotcha stores native handles (pthread_self) * Testing updates - improve detection of segfault/failures with PASS_REGEX exists - add OMNITRACE_CI_TIMEOUT env variable to all tests * Line-info in releases - e.g. -g1 + more options to minimize size of debug info * Fix typo in config exit action message * OMNITRACE_UNLIKELY around debug/verbose messages * format fixes * Overflow tests + capability check * transpose example update - link to threads library * roctracer/rocprofiler update - in ROCm 5.5.0, cannot include rocprofiler.h and roctracer.h in same file due to conflicting enum defs - Moved HSA tracing setup/shutdown to component::roctracer * roctracer update - fix definition of roctracer::setup when disabled * Update fork example - detach threads on main PID - flush io outputs when printing info * Update overflow tests - pass regular expressions - overflow on PERF_COUNT_SW_CPU_CLOCK event * fork gotcha update - use getpid() instead of getppid() * update fork example - wait on threads calling fork * timeout update - wait on timeout thread to launch before proceeding
232 lines
8.3 KiB
C++
232 lines
8.3 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "core/categories.hpp"
|
|
#include "core/config.hpp"
|
|
#include "core/debug.hpp"
|
|
#include "core/locking.hpp"
|
|
#include "core/state.hpp"
|
|
#include "library/components/pthread_gotcha.hpp"
|
|
#include "library/runtime.hpp"
|
|
#include "library/thread_info.hpp"
|
|
|
|
#include <timemory/log/color.hpp>
|
|
#include <timemory/signals/types.hpp>
|
|
#include <timemory/unwind/backtrace.hpp>
|
|
|
|
#include <chrono>
|
|
#include <sstream>
|
|
#include <thread>
|
|
|
|
namespace omnitrace
|
|
{
|
|
namespace timeout
|
|
{
|
|
void
|
|
setup() OMNITRACE_INTERNAL_API;
|
|
|
|
namespace
|
|
{
|
|
namespace unwind = ::tim::unwind;
|
|
namespace signals = ::tim::signals;
|
|
namespace log = ::tim::log;
|
|
|
|
constexpr auto timeout_signal = signals::sys_signal::Hangup;
|
|
constexpr auto timeout_signal_v = static_cast<int>(timeout_signal);
|
|
|
|
auto main_thread_native_handle = pthread_self();
|
|
bool ci_timeout_active = false;
|
|
auto ci_timeout_mutex = locking::atomic_mutex{};
|
|
uint64_t ci_timeout_backtrace_global_count = 1;
|
|
uint64_t ci_timeout_backtrace_global_done = 0;
|
|
thread_local uint64_t ci_timeout_backtrace_local_count = 0;
|
|
|
|
void
|
|
ci_timeout_backtrace(int)
|
|
{
|
|
if(ci_timeout_backtrace_local_count >= ci_timeout_backtrace_global_count) return;
|
|
++ci_timeout_backtrace_local_count;
|
|
|
|
auto _err = std::stringstream{};
|
|
auto _cfg = unwind::detailed_backtrace_config{};
|
|
_cfg.proc_pid_maps = false;
|
|
_cfg.unwind_lineinfo = false;
|
|
_cfg.force_color = !log::monochrome();
|
|
|
|
unwind::detailed_backtrace<0>(_err, _cfg);
|
|
|
|
static auto _mutex = locking::atomic_mutex{};
|
|
auto _lk = locking::atomic_lock{ _mutex };
|
|
OMNITRACE_PRINT("%s\n", _err.str().c_str());
|
|
|
|
++ci_timeout_backtrace_global_done;
|
|
}
|
|
|
|
void
|
|
ensure_ci_timeout_backtrace(double _ci_timeout_seconds,
|
|
std::promise<void> _ci_timeout_ready)
|
|
{
|
|
_ci_timeout_ready.set_value();
|
|
|
|
thread_info::init(true);
|
|
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Disabled);
|
|
|
|
auto _factor = 3.0;
|
|
while(_ci_timeout_seconds <= _factor)
|
|
_factor /= 1.25;
|
|
|
|
uint64_t _ci_timeout_nitr = 0;
|
|
int64_t _ci_timeout_nanosec = (_ci_timeout_seconds - _factor) * units::sec;
|
|
auto _ci_timeout_total_count =
|
|
get_env<uint64_t>("OMNITRACE_CI_TIMEOUT_COUNT", 1, false);
|
|
const auto root_pid =
|
|
get_env<pid_t>("OMNITRACE_ROOT_PROCESS", process::get_id(), false);
|
|
|
|
while(get_state() < State::Finalized && _ci_timeout_nitr < _ci_timeout_total_count)
|
|
{
|
|
// sleep until timeout reached
|
|
std::this_thread::sleep_for(std::chrono::nanoseconds{ _ci_timeout_nanosec });
|
|
|
|
// guard against thread in fork
|
|
if(process::get_id() != root_pid)
|
|
{
|
|
ci_timeout_active = false;
|
|
setup();
|
|
return;
|
|
}
|
|
|
|
auto _tids = pthread_gotcha::get_native_handles();
|
|
int64_t _ci_timeout_pause = (_factor * units::sec) / (3 * (_tids.size() + 1));
|
|
auto _kill_thread = [_ci_timeout_pause](auto _handle) {
|
|
// execute the pthread_kill and wait until ci_timeout_backtrace increments
|
|
// ci_timeout_backtrace_global_done (or 50 iterations pass) to avoid
|
|
// the backtraces overlapping output
|
|
auto _n = 0;
|
|
auto _done_v = ci_timeout_backtrace_global_done;
|
|
if(::pthread_kill(_handle, timeout_signal_v) != 0)
|
|
{
|
|
const auto& _info = thread_info::get(_handle);
|
|
if(_info)
|
|
{
|
|
OMNITRACE_WARNING_F(
|
|
0, "pthread_kill(%zu, %i) failed for thread %zi (info: %s)\n",
|
|
_handle, timeout_signal_v, _info->index_data->sequent_value,
|
|
_info->as_string().c_str());
|
|
}
|
|
else
|
|
{
|
|
OMNITRACE_WARNING_F(0,
|
|
"pthread_kill(%zu, %i) failed. executing generic "
|
|
"kill(%i, %i)...\n",
|
|
_handle, timeout_signal_v, process::get_id(),
|
|
timeout_signal_v);
|
|
}
|
|
|
|
::kill(process::get_id(), timeout_signal_v);
|
|
}
|
|
|
|
// wait until the signal has been delivered
|
|
while(ci_timeout_backtrace_global_done == _done_v && _n++ < 50)
|
|
std::this_thread::sleep_for(
|
|
std::chrono::nanoseconds{ _ci_timeout_pause });
|
|
};
|
|
|
|
_tids.erase(main_thread_native_handle);
|
|
OMNITRACE_WARNING_F(-127,
|
|
"timeout after %8.3f seconds... Generating backtraces for "
|
|
"%zu threads...\n",
|
|
_ci_timeout_seconds, _tids.size() + 1);
|
|
|
|
for(auto itr : _tids)
|
|
_kill_thread(itr);
|
|
|
|
_kill_thread(main_thread_native_handle);
|
|
|
|
::omnitrace::debug::flush();
|
|
::omnitrace::debug::lock _debug_lk{};
|
|
|
|
if(++_ci_timeout_nitr >= _ci_timeout_total_count)
|
|
{
|
|
// use SIGQUIT because it will generate a core dump
|
|
::kill(process::get_id(), SIGQUIT);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
++ci_timeout_backtrace_global_count;
|
|
}
|
|
}
|
|
|
|
OMNITRACE_WARNING_F(0, "timeout thread exiting...\n");
|
|
}
|
|
} // namespace
|
|
|
|
void
|
|
setup()
|
|
{
|
|
// make sure there isn't any datarace for ci_timeout_active
|
|
auto _lk = locking::atomic_lock{ ci_timeout_mutex };
|
|
|
|
if(ci_timeout_active) return;
|
|
|
|
// in CI mode, if OMNITRACE_CI_TIMEOUT or OMNITRACE_CI_TIMEOUT_OVERRIDE is
|
|
// set, start a thread that will print out the backtrace for each thread
|
|
// before the timeout is hit (i.e. killed by CTest) so we can potentially
|
|
// diagnose where the code is stuck
|
|
auto _ci = get_env("OMNITRACE_CI", false, false);
|
|
if(_ci)
|
|
{
|
|
// set by CTest
|
|
auto _ci_timeout_default = get_env("OMNITRACE_CI_TIMEOUT", -1.0, false);
|
|
// allow override by user
|
|
auto _ci_timeout_seconds =
|
|
get_env("OMNITRACE_CI_TIMEOUT_OVERRIDE", _ci_timeout_default, false);
|
|
|
|
if(_ci_timeout_seconds > 0.0)
|
|
{
|
|
// lock served its purpose after setting to true
|
|
ci_timeout_active = true;
|
|
_lk.unlock();
|
|
|
|
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
|
OMNITRACE_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
|
|
|
// enable the signal handler for when the timeout is reached
|
|
struct sigaction _action = {};
|
|
sigemptyset(&_action.sa_mask);
|
|
_action.sa_flags = SA_RESTART;
|
|
_action.sa_handler = ci_timeout_backtrace;
|
|
sigaction(timeout_signal_v, &_action, nullptr);
|
|
|
|
// start a background thread that handles waiting for the timeout
|
|
auto _ci_timeout_ready = std::promise<void>{};
|
|
auto _ci_timeout_wait = _ci_timeout_ready.get_future();
|
|
std::thread{ ensure_ci_timeout_backtrace, _ci_timeout_seconds,
|
|
std::move(_ci_timeout_ready) }
|
|
.detach();
|
|
_ci_timeout_wait.wait_for(std::chrono::seconds{ 1 });
|
|
}
|
|
}
|
|
}
|
|
} // namespace timeout
|
|
} // namespace omnitrace
|