Created push/pop system for whether sampling is enabled (#27)

- also permitted turning off sampling in sampling mode
- also fixed ambiguous rocm_smi namespace issue in roctracer

[ROCm/rocprofiler-systems commit: 3151dd3aeb]
Этот коммит содержится в:
Jonathan R. Madsen
2022-02-25 05:33:59 -06:00
коммит произвёл GitHub
родитель 2403bbde49
Коммит 1ad5529697
7 изменённых файлов: 102 добавлений и 33 удалений
+15 -2
Просмотреть файл
@@ -26,6 +26,7 @@
#include "library/defines.hpp"
#include "library/timemory.hpp"
#include <cstdint>
#include <future>
namespace omnitrace
@@ -59,12 +60,24 @@ struct pthread_gotcha : tim::component::base<pthread_gotcha, void>
static void configure();
static void shutdown();
// threads can set this to avoid starting sampling on child threads
static bool& enable_sampling_on_child_threads();
// query current value
static bool sampling_enabled_on_child_threads();
// use this to disable sampling in a region (e.g. right before thread creation)
static bool push_enable_sampling_on_child_threads(bool _v);
// use this to restore previous setting
static bool pop_enable_sampling_on_child_threads();
// make sure every newly created thead starts with this value
static void set_sampling_on_all_future_threads(bool _v);
// pthread_create
int operator()(pthread_t* thread, const pthread_attr_t* attr,
void* (*start_routine)(void*), void* arg) const;
private:
static bool& sampling_on_child_threads();
};
using pthread_gotcha_t = tim::component::gotcha<2, std::tuple<>, pthread_gotcha>;
+12 -5
Просмотреть файл
@@ -26,6 +26,7 @@
#include "library/components/functors.hpp"
#include "library/components/fwd.hpp"
#include "library/components/mpi_gotcha.hpp"
#include "library/components/pthread_gotcha.hpp"
#include "library/config.hpp"
#include "library/critical_trace.hpp"
#include "library/debug.hpp"
@@ -383,7 +384,7 @@ omnitrace_init_library_hidden()
OMNITRACE_CONDITIONAL_PRINT_F(get_verbose() >= 0,
"Disabling critical trace in %s mode...\n",
std::to_string(_mode).c_str());
get_use_sampling() = true;
get_use_sampling() = tim::get_env("OMNITRACE_USE_SAMPLING", true);
get_use_critical_trace() = false;
}
@@ -441,10 +442,11 @@ omnitrace_init_tooling_hidden()
auto _dtor = scope::destructor{ []() {
if(get_use_sampling())
{
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
thread_sampler::setup();
sampling::setup();
pthread_gotcha::enable_sampling_on_child_threads() = true;
pthread_gotcha::pop_enable_sampling_on_child_threads();
pthread_gotcha::push_enable_sampling_on_child_threads(get_use_sampling());
sampling::unblock_signals();
}
get_main_bundle()->start();
@@ -453,7 +455,7 @@ omnitrace_init_tooling_hidden()
if(get_use_sampling())
{
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
sampling::block_signals();
}
@@ -692,6 +694,10 @@ omnitrace_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _a
tim::set_env("OMNITRACE_MODE", _mode, 0);
config::is_binary_rewrite() = _is_binary_rewrite;
// set OMNITRACE_USE_SAMPLING to ON by default if mode is sampling
tim::set_env("OMNITRACE_USE_SAMPLING", (get_mode() == Mode::Sampling) ? "ON" : "OFF",
0);
// default to KokkosP enabled when sampling, otherwise default to off
tim::set_env("OMNITRACE_USE_KOKKOSP", (get_mode() == Mode::Sampling) ? "ON" : "OFF",
0);
@@ -738,7 +744,8 @@ omnitrace_finalize_hidden(void)
library_functors::configure([](const char*) {}, [](const char*) {});
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
pthread_gotcha::set_sampling_on_all_future_threads(false);
auto _debug_init = get_debug_finalize();
auto _debug_value = get_debug();
+2 -3
Просмотреть файл
@@ -595,10 +595,9 @@ backtrace::post_process(int64_t _tid)
_process_perfetto(_data, false);
else
{
auto _v = pthread_gotcha::enable_sampling_on_child_threads();
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
std::thread{ _process_perfetto, _data, true }.join();
pthread_gotcha::enable_sampling_on_child_threads() = _v;
pthread_gotcha::pop_enable_sampling_on_child_threads();
}
}
+67 -14
Просмотреть файл
@@ -26,6 +26,7 @@
#include "library/config.hpp"
#include "library/debug.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include <timemory/backends/threading.hpp>
#include <timemory/sampling/allocator.hpp>
@@ -83,6 +84,21 @@ stop_bundle(bundle_t& _bundle, int64_t _tid)
// exclude popping wall-clock
_bundle.pop(main_pw_t{}, _tid);
}
auto
get_thread_index()
{
static std::atomic<int64_t> _c{ 0 };
static thread_local int64_t _v = _c++;
return _v;
}
auto&
get_sampling_on_child_threads_history(int64_t _idx = get_thread_index())
{
static auto _v = std::array<std::vector<bool>, OMNITRACE_MAX_THREADS>{};
return _v.at(_idx);
}
} // namespace
pthread_gotcha::wrapper::wrapper(routine_t _routine, void* _arg, bool _enable_sampling,
@@ -99,11 +115,10 @@ pthread_gotcha::wrapper::operator()() const
{
std::shared_ptr<bundle_t> _bundle{};
std::set<int> _signals{};
auto& _enable_sampling = pthread_gotcha::enable_sampling_on_child_threads();
auto _active = (get_state() == omnitrace::State::Active);
int64_t _tid = -1;
auto _is_sampling = false;
auto _dtor = scope::destructor{ [&]() {
auto _active = (get_state() == omnitrace::State::Active);
int64_t _tid = -1;
auto _is_sampling = false;
auto _dtor = scope::destructor{ [&]() {
if(_is_sampling)
{
sampling::block_signals(_signals);
@@ -121,7 +136,7 @@ pthread_gotcha::wrapper::operator()() const
if(_active) get_cpu_cid_stack(threading::get_id(), m_parent_tid);
if(m_enable_sampling && _enable_sampling && _active)
if(m_enable_sampling && _active)
{
_tid = threading::get_id();
threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str());
@@ -133,10 +148,10 @@ pthread_gotcha::wrapper::operator()() const
.first->second;
}
if(_bundle) start_bundle(*_bundle);
_is_sampling = true;
_enable_sampling = false;
_signals = sampling::setup();
_enable_sampling = true;
_is_sampling = true;
push_enable_sampling_on_child_threads(false);
_signals = sampling::setup();
pop_enable_sampling_on_child_threads();
sampling::unblock_signals();
}
@@ -187,10 +202,48 @@ pthread_gotcha::shutdown()
bundles.clear();
}
bool&
pthread_gotcha::enable_sampling_on_child_threads()
bool
pthread_gotcha::sampling_enabled_on_child_threads()
{
static thread_local bool _v = get_use_sampling();
return sampling_on_child_threads();
}
bool
pthread_gotcha::push_enable_sampling_on_child_threads(bool _v)
{
auto& _hist = get_sampling_on_child_threads_history();
bool _last = sampling_on_child_threads();
_hist.emplace_back(_last);
sampling_on_child_threads() = _v;
return _last;
}
bool
pthread_gotcha::pop_enable_sampling_on_child_threads()
{
auto& _hist = get_sampling_on_child_threads_history();
if(!_hist.empty())
{
bool _restored = _hist.back();
_hist.pop_back();
sampling_on_child_threads() = _restored;
}
return sampling_on_child_threads();
}
void
pthread_gotcha::set_sampling_on_all_future_threads(bool _v)
{
for(size_t i = 0; i < max_supported_threads; ++i)
get_sampling_on_child_threads_history(i).emplace_back(_v);
}
bool&
pthread_gotcha::sampling_on_child_threads()
{
static thread_local bool _v = get_sampling_on_child_threads_history().empty()
? false
: get_sampling_on_child_threads_history().back();
return _v;
}
@@ -200,7 +253,7 @@ pthread_gotcha::operator()(pthread_t* thread, const pthread_attr_t* attr,
void* (*start_routine)(void*), void* arg) const
{
bundle_t _bundle{ "pthread_create" };
auto _enable_sampling = enable_sampling_on_child_threads();
auto _enable_sampling = sampling_enabled_on_child_threads();
auto _active = (get_state() == omnitrace::State::Active);
int64_t _tid = (_active) ? threading::get_id() : 0;
+2 -3
Просмотреть файл
@@ -347,8 +347,7 @@ setup()
if(is_initialized() || !get_use_rocm_smi()) return;
auto _enable_samp = pthread_gotcha::enable_sampling_on_child_threads();
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
// assign the data value to determined by rocm-smi
data::device_count = device_count();
@@ -387,7 +386,7 @@ setup()
data::setup();
pthread_gotcha::enable_sampling_on_child_threads() = _enable_samp;
pthread_gotcha::pop_enable_sampling_on_child_threads();
}
void
+2 -3
Просмотреть файл
@@ -30,7 +30,6 @@
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
namespace rocm_smi = omnitrace::rocm_smi;
using namespace omnitrace;
namespace tim
@@ -204,7 +203,7 @@ extern "C"
bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names)
{
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_debug_env() || get_verbose_env() > 0,
"[%s]\n", __FUNCTION__);
tim::consume_parameters(table, runtime_version, failed_tool_count,
@@ -297,7 +296,7 @@ extern "C"
rocm_smi::set_state(State::Active);
comp::roctracer::setup();
pthread_gotcha::enable_sampling_on_child_threads() = true;
pthread_gotcha::pop_enable_sampling_on_child_threads();
return true;
}
+2 -3
Просмотреть файл
@@ -104,8 +104,7 @@ sampler::setup()
// shutdown if already running
shutdown();
auto _enable_samp = pthread_gotcha::enable_sampling_on_child_threads();
pthread_gotcha::enable_sampling_on_child_threads() = false;
pthread_gotcha::push_enable_sampling_on_child_threads(false);
if(get_use_rocm_smi())
{
@@ -142,7 +141,7 @@ sampler::setup()
_fut.wait();
pthread_gotcha::enable_sampling_on_child_threads() = _enable_samp;
pthread_gotcha::pop_enable_sampling_on_child_threads();
set_state(State::Active);
}