Files
rocm-systems/source/lib/omnitrace/library/causal/delay.cpp
T
Jonathan R. Madsen e7d3125459 restructure libomnitrace + tasking and omnitrace-causal updates (#237)
* restructured libomnitrace

- this is necessary to incorporate some of the binary analysis capabilities into omnitrace exe
- created libomnitrace-core (static)
- created libomnitrace-binary (static)
- created libomnitrace (static)
- omnitrace-avail links to libomnitrace.a
- omnitrace-critical-trace links to libomnitrace.a
- tweaked the testing
  - reduced verbosity on some of MPI tests
  - excluded trace-time-window from tests on Ubuntu 18.04
  - reduced causal e2e iterations
- minor tweak to tasking
  - manually create `PTL::UserTaskQueue` instance instead of relying on `PTL::ThreadPool` to create it

* Update formatting workflow

- source formatting uses ubuntu-22.04
- check-includes doesn't generate false positive for 'include "timemory.hpp"'

* omnitrace-causal --generate-configs

- fix config generation in omnitrace causal
- add test for omnitrace-causal + generating configs

* Fix omnitrace-object-library build

- accidentally included rocm sources in non-rocm builds

* Fix rocm compilation w/o rocprofiler

* update timemory submodule with mpi_get warning messages

* sampling offload file updates

- more verbose messages
- disable offload before stopping

* testing updates

- increase causal e2e iterations to 12
- increase lock_environment verbose to 2 (for sampling offload messages)
- fix return for omnitrace_add_validation_test
2023-02-04 10:59:50 -06:00

194 строки
5.5 KiB
C++

// MIT License
//
// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "library/causal/delay.hpp"
#include "core/state.hpp"
#include "core/utility.hpp"
#include "library/causal/experiment.hpp"
#include "library/runtime.hpp"
#include "library/thread_data.hpp"
#include "library/thread_info.hpp"
#include "library/tracing.hpp"
#include <timemory/components/macros.hpp>
#include <timemory/mpl/concepts.hpp>
#include <timemory/mpl/types.hpp>
#include <timemory/process/threading.hpp>
#include <atomic>
#include <chrono>
#include <random>
namespace omnitrace
{
namespace causal
{
namespace
{
auto&
get_delay_data()
{
using thread_data_t = thread_data<identity<int64_t>, delay>;
static auto& _v = thread_data_t::construct(
construct_on_init{}, []() { return delay::get_global().load(); });
return _v;
}
int64_t
compute_sleep_for_overhead()
{
using random_engine_t = std::mt19937_64;
auto _engine = random_engine_t{ std::random_device{}() };
auto _dist = std::uniform_int_distribution<int64_t>{ 0, 5 };
size_t _ntot = 250;
size_t _nwarm = 50;
auto _stats = tim::statistics<double>{};
for(size_t i = 0; i < _ntot; ++i)
{
auto _val = _dist(_engine);
int64_t _beg = tracing::now();
std::this_thread::sleep_for(std::chrono::nanoseconds{ _val });
int64_t _end = tracing::now();
if(i < _nwarm) continue;
auto _diff = (_end - _beg);
OMNITRACE_CONDITIONAL_THROW(
_diff < _val, "Error! sleep_for(%zu) [nanoseconds] >= %zu", _val, _diff);
_stats += (_diff - _val);
}
OMNITRACE_BASIC_VERBOSE(2,
"[causal] overhead of std::this_thread::sleep_for(...) "
"invocation = %6.3f usec +/- %e\n",
_stats.get_mean() / units::usec,
_stats.get_stddev() / units::usec);
tim::manager::instance()->add_metadata([_stats](auto& ar) {
ar(tim::cereal::make_nvp("causal thread sleep overhead [nsec]", _stats));
});
(void) get_delay_data();
return _stats.get_mean();
}
int64_t sleep_for_overhead = compute_sleep_for_overhead();
} // namespace
void
delay::process()
{
if(!trait::runtime_enabled<delay>::get()) return;
if(get_state() >= ::omnitrace::State::Finalized) return;
if(causal::experiment::is_active())
{
if(get_global() < get_local())
{
auto _diff = (get_local() - get_global());
if(_diff > sleep_for_overhead) get_global() += _diff;
}
else if(get_global() > get_local())
{
auto _beg = tracing::now();
std::this_thread::sleep_for(
std::chrono::nanoseconds{ get_global() - get_local() });
get_local() += (tracing::now() - _beg);
}
}
else
{
get_local() = get_global();
}
}
void
delay::credit()
{
if(!trait::runtime_enabled<delay>::get()) return;
if(get_state() >= ::omnitrace::State::Finalized) return;
auto _diff = get_global() - get_local();
if(_diff > 0)
{
get_local() += _diff;
}
}
void
delay::preblock()
{
if(!trait::runtime_enabled<delay>::get()) return;
if(get_state() >= ::omnitrace::State::Finalized) return;
auto _diff = get_global() - get_local();
if(_diff > 0)
{
get_local() += _diff;
}
}
void
delay::postblock(int64_t _preblock_global_delay_value)
{
if(!trait::runtime_enabled<delay>::get()) return;
if(get_state() >= ::omnitrace::State::Finalized) return;
get_local() += (get_global() - _preblock_global_delay_value);
}
int64_t
delay::sync()
{
auto _v = get_global().load(std::memory_order_seq_cst);
if(get_delay_data()) get_delay_data()->fill(_v);
return _v;
}
std::atomic<int64_t>&
delay::get_global()
{
static auto _v = std::atomic<int64_t>{ 0 };
return _v;
}
int64_t&
delay::get_local(int64_t _tid)
{
auto& _data = get_delay_data();
static thread_local auto _thr_init = []() {
using thread_data_t = thread_data<identity<int64_t>, delay>;
thread_data_t::construct(construct_on_thread{ threading::get_id() },
get_global().load());
return true;
}();
return _data->at(_tid);
(void) _thr_init;
}
uint64_t
delay::compute_total_delay(uint64_t _baseline)
{
return get_global().load() - _baseline;
}
} // namespace causal
} // namespace omnitrace