Trace thread config + paranoid level + preload (#176)
- OMNITRACE_TRACE_THREAD_BARRIERS config option
- set to OFF to disable wrapping `pthread_barrier`
- OMNITRACE_TRACE_THREAD_JOIN config option
- set to OFF to disable wrapping `pthread_join`
- allow PAPI with perf_event_paranoid at level 2
- default to no PAPI events
- setenv LD_PRELOAD to not include libomnitrace after preload
- closes #175
- bump version to 1.7.1
[ROCm/rocprofiler-systems commit: a3439d5bf2]
This commit is contained in:
committato da
GitHub
parent
45e5450bf2
commit
0ec0d18ac8
@@ -1 +1 @@
|
||||
1.7.0
|
||||
1.7.1
|
||||
|
||||
+1
-1
Submodule projects/rocprofiler-systems/external/timemory updated: 46f25fbb46...ddc49db2ec
@@ -96,13 +96,17 @@ variable to be enabled (i.e., `OMNITRACE_USE_ROCPROFILER=ON`).
|
||||
Example configuration for hardware counters:
|
||||
|
||||
```console
|
||||
# using papi identifiers
|
||||
OMNITRACE_PAPI_EVENTS = PAPI_TOT_CYC PAPI_TOT_INS
|
||||
|
||||
# using perf identifiers
|
||||
OMNITRACE_PAPI_EVENTS = perf::INSTRUCTIONS perf::CACHE-REFERENCES perf::CACHE-MISSES
|
||||
```
|
||||
|
||||
#### OMNITRACE_PAPI_EVENTS
|
||||
|
||||
In order to collect the majority of hardware counters via PAPI, you need to make sure the `/proc/sys/kernel/perf_event_paranoid`
|
||||
has a value of less than 2. If you have sudo access, you can use the following command to modify the value:
|
||||
has a value <= 2. If you have sudo access, you can use the following command to modify the value:
|
||||
|
||||
```shell
|
||||
echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
|
||||
@@ -98,9 +98,32 @@ get_omnitrace_dl_env()
|
||||
inline bool
|
||||
get_omnitrace_preload()
|
||||
{
|
||||
auto&& _preload = get_env("OMNITRACE_PRELOAD", true);
|
||||
static bool _v = []() {
|
||||
auto&& _preload = get_env("OMNITRACE_PRELOAD", true);
|
||||
auto&& _preload_libs = get_env("LD_PRELOAD", std::string{});
|
||||
return (_preload &&
|
||||
_preload_libs.find("libomnitrace-dl.so") != std::string::npos);
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
inline void
|
||||
reset_omnitrace_preload()
|
||||
{
|
||||
auto&& _preload_libs = get_env("LD_PRELOAD", std::string{});
|
||||
return (_preload && _preload_libs.find("libomnitrace-dl.so") != std::string::npos);
|
||||
if(_preload_libs.find("libomnitrace-dl.so") != std::string::npos)
|
||||
{
|
||||
auto _modified_preload = std::string{};
|
||||
for(const auto& itr : delimit(_preload_libs, ":"))
|
||||
{
|
||||
if(itr.find("libomnitrace") != std::string::npos) continue;
|
||||
_modified_preload += common::join("", ":", itr);
|
||||
}
|
||||
if(!_modified_preload.empty() && _modified_preload.find(':') == 0)
|
||||
_modified_preload = _modified_preload.substr(1);
|
||||
|
||||
setenv("LD_PRELOAD", _modified_preload.c_str(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
// environment priority:
|
||||
@@ -940,6 +963,7 @@ omnitrace_preload()
|
||||
|
||||
if(_preload)
|
||||
{
|
||||
reset_omnitrace_preload();
|
||||
omnitrace_preinit_library();
|
||||
OMNITRACE_DL_LOG(1, "[%s] invoking %s(%s)\n", __FUNCTION__, "omnitrace_init",
|
||||
::omnitrace::join(::omnitrace::QuoteStrings{}, ", ", "sampling",
|
||||
|
||||
+13
-4
@@ -71,6 +71,8 @@ pthread_mutex_gotcha::get_hashes()
|
||||
for(size_t i = 9; i < 12; ++i)
|
||||
_skip.emplace(i);
|
||||
}
|
||||
if(!config::get_trace_thread_barriers()) _skip.emplace(8);
|
||||
if(!config::get_trace_thread_join()) _skip.emplace(12);
|
||||
for(size_t i = 0; i < gotcha_capacity; ++i)
|
||||
{
|
||||
auto&& _id = _data.at(i).tool_id;
|
||||
@@ -132,8 +134,12 @@ pthread_mutex_gotcha::configure()
|
||||
"pthread_rwlock_unlock" });
|
||||
}
|
||||
|
||||
pthread_mutex_gotcha_t::configure(
|
||||
comp::gotcha_config<8, int, pthread_barrier_t*>{ "pthread_barrier_wait" });
|
||||
if(config::get_trace_thread_barriers())
|
||||
{
|
||||
pthread_mutex_gotcha_t::configure(
|
||||
comp::gotcha_config<8, int, pthread_barrier_t*>{
|
||||
"pthread_barrier_wait" });
|
||||
}
|
||||
|
||||
if(config::get_trace_thread_spin_locks())
|
||||
{
|
||||
@@ -149,8 +155,11 @@ pthread_mutex_gotcha::configure()
|
||||
"pthread_spin_unlock" });
|
||||
}
|
||||
|
||||
pthread_mutex_gotcha_t::configure(
|
||||
comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" });
|
||||
if(config::get_trace_thread_join())
|
||||
{
|
||||
pthread_mutex_gotcha_t::configure(
|
||||
comp::gotcha_config<12, int, pthread_t, void**>{ "pthread_join" });
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -459,6 +459,14 @@ configure_settings(bool _init)
|
||||
"cause deadlocks with MPI distributions.",
|
||||
false, "backend", "parallelism", "gotcha", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_BARRIERS",
|
||||
"Enable tracing calls to pthread_barrier functions.", true,
|
||||
"backend", "parallelism", "gotcha", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_TRACE_THREAD_JOIN",
|
||||
"Enable tracing calls to pthread_join functions.", true,
|
||||
"backend", "parallelism", "gotcha", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_SAMPLING_KEEP_INTERNAL",
|
||||
"Configure whether the statistical samples should include call-stack entries "
|
||||
@@ -601,7 +609,6 @@ configure_settings(bool _init)
|
||||
_config->get_max_thread_bookmarks() = 1;
|
||||
_config->get_timing_units() = "sec";
|
||||
_config->get_memory_units() = "MB";
|
||||
_config->get_papi_events() = "PAPI_TOT_CYC";
|
||||
|
||||
// settings native to timemory but critically and/or extensively used by omnitrace
|
||||
auto _add_omnitrace_category = [&_config](auto itr) {
|
||||
@@ -685,21 +692,22 @@ configure_settings(bool _init)
|
||||
if(_fparanoid) _fparanoid >> _paranoid;
|
||||
}
|
||||
|
||||
if(_paranoid > 1)
|
||||
if(_paranoid > 2)
|
||||
{
|
||||
OMNITRACE_BASIC_VERBOSE(0,
|
||||
"/proc/sys/kernel/perf_event_paranoid has a value of %i. "
|
||||
"Disabling PAPI (requires a value <= 1)...\n",
|
||||
"Disabling PAPI (requires a value <= 2)...\n",
|
||||
_paranoid);
|
||||
OMNITRACE_BASIC_VERBOSE(0,
|
||||
"In order to enable PAPI support, run 'echo N | sudo tee "
|
||||
"/proc/sys/kernel/perf_event_paranoid' where N is < 2\n");
|
||||
tim::trait::runtime_enabled<comp::papi_common<void>>::set(false);
|
||||
tim::trait::runtime_enabled<comp::papi_array_t>::set(false);
|
||||
tim::trait::runtime_enabled<comp::papi_vector>::set(false);
|
||||
tim::trait::runtime_enabled<comp::cpu_roofline_flops>::set(false);
|
||||
tim::trait::runtime_enabled<comp::cpu_roofline_dp_flops>::set(false);
|
||||
tim::trait::runtime_enabled<comp::cpu_roofline_sp_flops>::set(false);
|
||||
OMNITRACE_BASIC_VERBOSE(
|
||||
0, "In order to enable PAPI support, run 'echo N | sudo tee "
|
||||
"/proc/sys/kernel/perf_event_paranoid' where N is <= 2\n");
|
||||
trait::runtime_enabled<comp::papi_config>::set(false);
|
||||
trait::runtime_enabled<comp::papi_common<void>>::set(false);
|
||||
trait::runtime_enabled<comp::papi_array_t>::set(false);
|
||||
trait::runtime_enabled<comp::papi_vector>::set(false);
|
||||
trait::runtime_enabled<comp::cpu_roofline_flops>::set(false);
|
||||
trait::runtime_enabled<comp::cpu_roofline_dp_flops>::set(false);
|
||||
trait::runtime_enabled<comp::cpu_roofline_sp_flops>::set(false);
|
||||
_config->get_papi_events() = std::string{};
|
||||
}
|
||||
else
|
||||
@@ -812,6 +820,14 @@ configure_settings(bool _init)
|
||||
tim::set_env(std::string{ _dl_verbose->first }, _dl_verbose->second->as_string(),
|
||||
0);
|
||||
|
||||
if(_config->get_papi_events().empty())
|
||||
{
|
||||
trait::runtime_enabled<comp::papi_config>::set(false);
|
||||
trait::runtime_enabled<comp::papi_common<void>>::set(false);
|
||||
trait::runtime_enabled<comp::papi_array_t>::set(false);
|
||||
trait::runtime_enabled<comp::papi_vector>::set(false);
|
||||
}
|
||||
|
||||
configure_mode_settings();
|
||||
configure_signal_handler();
|
||||
configure_disabled_settings();
|
||||
@@ -1886,6 +1902,20 @@ get_trace_thread_spin_locks()
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_trace_thread_barriers()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_BARRIERS");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_trace_thread_join()
|
||||
{
|
||||
static auto _v = get_config()->find("OMNITRACE_TRACE_THREAD_JOIN");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
}
|
||||
|
||||
bool
|
||||
get_debug_tid()
|
||||
{
|
||||
|
||||
@@ -322,6 +322,12 @@ get_trace_thread_rwlocks();
|
||||
bool
|
||||
get_trace_thread_spin_locks();
|
||||
|
||||
bool
|
||||
get_trace_thread_barriers();
|
||||
|
||||
bool
|
||||
get_trace_thread_join();
|
||||
|
||||
std::string
|
||||
get_rocm_events();
|
||||
} // namespace config
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user