2
0

Option rename + minor fixes (#57)

- Set choices of OMNITRACE_BACKEND option
- rename OMNITRACE_SHMEM_SIZE_HINT_KB option
- rename OMNITRACE_BUFFER_SIZE_KB option
- rename OMNITRACE_COMBINE_PERFETTO_TRACES
- rename OMNITRACE_BACKEND option
- default to OMNITRACE_COLLAPSE_PROCESSES for combining perfetto traces
- OMNITRACE_PERFETTO_FILL_POLICY option
- fix unused variables due to constexpr in add_critical_trace
- rename perfetto config from "track_event" to "omnitrace"
- fix build-release.sh + python
- handle config file updating OMNITRACE_DL_VERBOSE in omnitrace-dl
- rename roctrace.cfg to omnitrace.cfg
- accept "on" and "off" for get_sampling_cpus()

[ROCm/rocprofiler-systems commit: 346f8cd0bc]
Este cometimento está contido em:
Jonathan R. Madsen
2022-05-10 17:30:45 -05:00
cometido por GitHub
ascendente 77721c2db5
cometimento 57ef312d26
11 ficheiros modificados com 141 adições e 93 eliminações
+6 -6
Ver ficheiro
@@ -55,7 +55,7 @@ or exectuable which loads the instrumented libraries normally, e.g.:
```
If you want to re-define certain settings to new default in a binary rewrite, use the `--env` option. This `omnitrace` option
will set the environment variable to the given value but will not override it. E.g. the default value of `OMNITRACE_BUFFER_SIZE_KB`
will set the environment variable to the given value but will not override it. E.g. the default value of `OMNITRACE_PERFETTO_BUFFER_SIZE_KB`
is 1024000 KB (1 GiB):
```shell
@@ -64,17 +64,17 @@ omnitrace -o app.inst -- /path/to/app
./app.inst
```
Passing `--env OMNITRACE_BUFFER_SIZE_KB=5120000` will change the default value in `app.inst` to 5120000 KiB (5 GiB):
Passing `--env OMNITRACE_PERFETTO_BUFFER_SIZE_KB=5120000` will change the default value in `app.inst` to 5120000 KiB (5 GiB):
```shell
# defaults to 5 GiB buffer size
omnitrace -o app.inst --env OMNITRACE_BUFFER_SIZE_KB=5120000 -- /path/to/app
omnitrace -o app.inst --env OMNITRACE_PERFETTO_BUFFER_SIZE_KB=5120000 -- /path/to/app
./app.inst
```
```shell
# override default 5 GiB buffer size to 200 MB
export OMNITRACE_BUFFER_SIZE_KB=200000
export OMNITRACE_PERFETTO_BUFFER_SIZE_KB=200000
./app.inst
```
@@ -140,13 +140,13 @@ In a separate window run:
```shell
pkill traced
traced --background
perfetto --out ./htrace.out --txt -c ${OMNITRACE_ROOT}/share/roctrace.cfg
perfetto --out ./htrace.out --txt -c ${OMNITRACE_ROOT}/share/omnitrace.cfg
```
then in the window running the application, configure the omnitrace instrumentation to use the system backend:
```shell
export OMNITRACE_BACKEND=system
export OMNITRACE_PERFETTO_BACKEND=system
```
for the merge use the `htrace.out`:
@@ -1,4 +1,4 @@
# perfetto --out OUTPUT_FILE --txt -c roctrace.cfg
# perfetto --out OUTPUT_FILE --txt -c omnitrace.cfg
# 2 minute trace, but can be stopped prematurely.
duration_ms: 120000
write_into_file: true
@@ -14,9 +14,6 @@ buffers {
data_sources {
config {
name: "track_event"
track_event_config {
enabled_categories: "kernels,omnitrace"
}
name: "omnitrace"
}
}
+6 -2
Ver ficheiro
@@ -120,10 +120,14 @@ build-and-package-python()
for i in ${PYTHON_VERSIONS}
do
conda activate py3.${i}
_PYTHON_ENVS="${_PYTHON_ENVS}$(dirname $(dirname $(which python)));"
if [ -z "${_PYTHON_ENVS}" ]; then
_PYTHON_ENVS="$(dirname $(dirname $(which python)))"
else
_PYTHON_ENVS="${_PYTHON_ENVS};$(dirname $(dirname $(which python)))"
fi
conda deactivate
done
build-and-package-base ${DIR}-python $@ -DOMNITRACE_USE_PYTHON=ON -DOMNITRACE_BUILD_PYTHON=ON -DOMNITRACE_PYTHON_ENVS=\"${_PYTHON_ENVS}\"
build-and-package-base ${DIR}-python $@ -DOMNITRACE_USE_PYTHON=ON -DOMNITRACE_BUILD_PYTHON=ON -DOMNITRACE_PYTHON_ROOT_DIRS=\"${_PYTHON_ENVS}\"
}
build-and-package()
+3 -3
Ver ficheiro
@@ -70,8 +70,8 @@ $ omnitrace-avail -S -bd
| ENVIRONMENT VARIABLE | DESCRIPTION |
|-----------------------------------------|-----------------------------------------|
| OMNITRACE_ADD_SECONDARY | Enable/disable components adding sec... |
| OMNITRACE_BACKEND | Specify the perfetto backend to acti... |
| OMNITRACE_BUFFER_SIZE_KB | Size of perfetto buffer (in KB) |
| OMNITRACE_PERFETTO_BACKEND | Specify the perfetto backend to acti... |
| OMNITRACE_PERFETTO_BUFFER_SIZE_KB | Size of perfetto buffer (in KB) |
| OMNITRACE_COLLAPSE_PROCESSES | Enable/disable combining process-spe... |
| OMNITRACE_COLLAPSE_THREADS | Enable/disable combining thread-spec... |
| OMNITRACE_CONFIG_FILE | Configuration file for omnitrace |
@@ -124,7 +124,7 @@ $ omnitrace-avail -S -bd
| OMNITRACE_SAMPLING_FREQ | Number of software interrupts per se... |
| OMNITRACE_SCIENTIFIC | Set the global numerical reporting t... |
| OMNITRACE_SETTINGS_DESC | Provide descriptions when printing s... |
| OMNITRACE_SHMEM_SIZE_HINT_KB | Hint for shared-memory buffer size i... |
| OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB | Hint for shared-memory buffer size i... |
| OMNITRACE_SUPPRESS_CONFIG | Disable processing of setting config... |
| OMNITRACE_SUPPRESS_PARSING | Disable parsing environment |
| OMNITRACE_TEXT_OUTPUT | Write text output files |
+10 -3
Ver ficheiro
@@ -67,14 +67,20 @@ get_omnitrace_env()
return get_env("OMNITRACE_VERBOSE", (_debug) ? 100 : 0);
}
inline int
get_omnitrace_dl_env()
{
return get_env("OMNITRACE_DL_DEBUG", false)
? 100
: get_env("OMNITRACE_DL_VERBOSE", get_omnitrace_env());
}
// environment priority:
// - OMNITRACE_DL_DEBUG
// - OMNITRACE_DL_VERBOSE
// - OMNITRACE_DEBUG
// - OMNITRACE_VERBOSE
int _omnitrace_dl_verbose = get_env("OMNITRACE_DL_DEBUG", false)
? 100
: get_env("OMNITRACE_DL_VERBOSE", get_omnitrace_env());
int _omnitrace_dl_verbose = get_omnitrace_dl_env();
// The docs for dlopen suggest that the combination of RTLD_LOCAL + RTLD_DEEPBIND
// (when available) helps ensure that the symbols in the instrumentation library
@@ -388,6 +394,7 @@ extern "C"
{
dl::get_active() = true;
dl::get_inited() = true;
dl::_omnitrace_dl_verbose = dl::get_omnitrace_dl_env();
}
}
+11 -4
Ver ficheiro
@@ -649,13 +649,16 @@ omnitrace_init_tooling_hidden()
auto shmem_size_hint = get_perfetto_shmem_size_hint();
auto buffer_size = get_perfetto_buffer_size();
auto _policy =
get_perfetto_fill_policy() == "discard"
? perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD
: perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER;
auto* buffer_config = cfg.add_buffers();
buffer_config->set_size_kb(buffer_size);
buffer_config->set_fill_policy(
perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
buffer_config->set_fill_policy(_policy);
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("track_event");
ds_cfg->set_name("omnitrace");
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
args.shmem_size_hint_kb = shmem_size_hint;
@@ -1260,8 +1263,12 @@ omnitrace_finalize_hidden(void)
else
{
// Write the trace into a file.
if(get_verbose() >= 0) fprintf(stderr, "Done\n");
ofs.write(&trace_data[0], trace_data.size());
if(get_verbose() >= 0) fprintf(stderr, "Done\n");
auto _manager = tim::manager::instance();
if(_manager)
_manager->add_file_output("protobuf", "perfetto",
get_perfetto_output_filename());
}
ofs.close();
if(get_verbose() >= 0) fprintf(stderr, "\n");
+2 -1
Ver ficheiro
@@ -118,9 +118,10 @@ add_critical_trace(int64_t _targ_tid, size_t _cpu_cid, size_t _gpu_cid,
if(_gpu_cid == 0 && _cpu_cid % _update_freq == (_update_freq - 1))
critical_trace::update(_targ_tid);
}
tim::consume_parameters(_lock);
}
tim::consume_parameters(_targ_tid, _cpu_cid, _gpu_cid, _parent_cid, _ts_beg, _ts_val,
_queue, _hash, _depth, _prio);
_queue, _hash, _depth, _prio, num_mutexes);
}
} // namespace omnitrace
@@ -142,7 +142,7 @@ configure_settings(bool _init)
auto _default_perfetto_v =
!tim::get_env<bool>("OMNITRACE_USE_TIMEMORY", false, false);
auto _system_backend = tim::get_env("OMNITRACE_BACKEND_SYSTEM", false, false);
auto _system_backend = tim::get_env("OMNITRACE_PERFETTO_BACKEND_SYSTEM", false, false);
auto _omnitrace_debug = _config->get<bool>("OMNITRACE_DEBUG");
if(_omnitrace_debug) tim::set_env("TIMEMORY_DEBUG_SETTINGS", "1", 0);
@@ -220,17 +220,20 @@ configure_settings(bool _init)
"", "thread_sampling");
auto _backend = tim::get_env_choice<std::string>(
"OMNITRACE_BACKEND",
"OMNITRACE_PERFETTO_BACKEND",
(_system_backend)
? "system" // if OMNITRACE_BACKEND_SYSTEM is true, default to system.
? "system" // if OMNITRACE_PERFETTO_BACKEND_SYSTEM is true, default to system.
: "inprocess", // Otherwise, default to inprocess
{ "inprocess", "system", "all" }, false);
OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_BACKEND",
OMNITRACE_CONFIG_SETTING(std::string, "OMNITRACE_PERFETTO_BACKEND",
"Specify the perfetto backend to activate. Options are: "
"'inprocess', 'system', or 'all'",
_backend, "perfetto");
_config->find("OMNITRACE_PERFETTO_BACKEND")
->second->set_choices({ "inprocess", "system", "all" });
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_CRITICAL_TRACE",
"Enable generation of the critical trace", false, "backend",
"critical_trace");
@@ -280,20 +283,32 @@ configure_settings(bool _init)
"Include names in serialization of critical trace (mainly for debugging)",
_omnitrace_debug, "debugging", "critical_trace");
OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_SHMEM_SIZE_HINT_KB",
OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB",
"Hint for shared-memory buffer size in perfetto (in KB)",
40960, "perfetto", "data");
OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_BUFFER_SIZE_KB",
OMNITRACE_CONFIG_SETTING(size_t, "OMNITRACE_PERFETTO_BUFFER_SIZE_KB",
"Size of perfetto buffer (in KB)", 1024000, "perfetto",
"data");
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_COMBINE_PERFETTO_TRACES",
"Combine Perfetto traces", true, "perfetto", "data");
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_PERFETTO_COMBINE_TRACES",
"Combine Perfetto traces. If not explicitly set, it will "
"default to the value of OMNITRACE_COLLAPSE_PROCESSES",
_config->get<bool>("collapse_processes"), "perfetto",
"data");
OMNITRACE_CONFIG_SETTING(
std::string, "OMNITRACE_PERFETTO_FILL_POLICY",
"Behavior when perfetto buffer is full. 'discard' will ignore new entries, "
"'ring_buffer' will overwrite old entries",
"discard", "perfetto", "data");
_config->find("OMNITRACE_PERFETTO_FILL_POLICY")
->second->set_choices({ "fill", "discard" });
OMNITRACE_CONFIG_SETTING(int64_t, "OMNITRACE_CRITICAL_TRACE_COUNT",
"Number of critical trace to export (0 == all)", 0, "data",
"critical_trace");
"critical_trace", "omnitrace-critical-trace");
OMNITRACE_CONFIG_SETTING(uint64_t, "OMNITRACE_CRITICAL_TRACE_BUFFER_COUNT",
"Number of critical trace records to store in thread-local "
@@ -423,6 +438,13 @@ configure_settings(bool _init)
_config->get_global_components() =
_config->get<std::string>("OMNITRACE_TIMEMORY_COMPONENTS");
auto _combine_perfetto_traces = _config->find("OMNITRACE_PERFETTO_COMBINE_TRACES");
if(!_combine_perfetto_traces->second->get_environ_updated() &&
_combine_perfetto_traces->second->get_config_updated())
{
_combine_perfetto_traces->second->set(_config->get<bool>("collapse_processes"));
}
scope::get_fields()[scope::flat::value] = _config->get_flat_profile();
scope::get_fields()[scope::timeline::value] = _config->get_timeline_profile();
@@ -535,9 +557,17 @@ configure_settings(bool _init)
_config->disable("profiler_components");
_config->disable("destructor_report");
_config->disable("stack_clearing");
_config->disable("auto_output");
_config->disable("file_output");
_config->disable("plot_output");
_config->disable("dart_output");
_config->disable("flamegraph_output");
_config->disable("separator_freq");
_config->disable("width");
_config->disable("max_width");
#if !defined(TIMEMORY_USE_MPI) || TIMEMORY_USE_MPI == 0
_config->disable("OMNITRACE_COMBINE_PERFETTO_TRACES");
_config->disable("OMNITRACE_PERFETTO_COMBINE_TRACES");
#endif
}
@@ -663,47 +693,8 @@ print_settings()
{
if(dmp::rank() > 0) return;
static std::set<tim::string_view_t> _sample_options = {
"OMNITRACE_SAMPLING_FREQ", "OMNITRACE_SAMPLING_DELAY",
"OMNITRACE_SAMPLING_CPUS", "OMNITRACE_FLAT_SAMPLING",
"OMNITRACE_TIMELINE_SAMPLING", "OMNITRACE_FLAT_SAMPLING",
"OMNITRACE_TIMELINE_SAMPLING",
};
static std::set<tim::string_view_t> _perfetto_options = {
"OMNITRACE_OUTPUT_FILE",
"OMNITRACE_BACKEND",
"OMNITRACE_SHMEM_SIZE_HINT_KB",
"OMNITRACE_BUFFER_SIZE_KB",
};
static std::set<tim::string_view_t> _timemory_options = {
"OMNITRACE_ROCTRACER_FLAT_PROFILE", "OMNITRACE_ROCTRACER_TIMELINE_PROFILE"
};
// generic filter for filtering relevant options
auto _is_omnitrace_option = [](const auto& _v, const auto& _c) {
if(!get_use_roctracer() && _v.find("OMNITRACE_ROCTRACER_") == 0) return false;
if(!get_use_critical_trace() && _v.find("OMNITRACE_CRITICAL_TRACE_") == 0)
return false;
if(!get_use_perfetto() && _perfetto_options.count(_v) > 0) return false;
if(!get_use_timemory() && _timemory_options.count(_v) > 0) return false;
if(!get_use_sampling() && !get_use_thread_sampling() &&
_sample_options.count(_v) > 0)
return false;
const auto npos = std::string::npos;
if(_v.find("WIDTH") != npos || _v.find("SEPARATOR_FREQ") != npos ||
_v.find("AUTO_OUTPUT") != npos || _v.find("DART_OUTPUT") != npos ||
_v.find("FILE_OUTPUT") != npos || _v.find("PLOT_OUTPUT") != npos ||
_v.find("FLAMEGRAPH_OUTPUT") != npos)
return false;
if(!_c.empty())
{
if(_c.find("omnitrace") != _c.end()) return true;
if(_c.find("debugging") != _c.end() && _v.find("DEBUG") != npos) return true;
if(_c.find("config") != _c.end()) return true;
if(_c.find("dart") != _c.end()) return false;
if(_c.find("io") != _c.end() && _v.find("_OUTPUT") != npos) return true;
if(_c.find("format") != _c.end()) return true;
return false;
}
auto _is_omnitrace_option = [](const auto& _v, const auto&) {
return (_v.find("OMNITRACE_") == 0);
};
@@ -988,14 +979,14 @@ get_critical_trace_per_row()
size_t
get_perfetto_shmem_size_hint()
{
static auto _v = get_config()->find("OMNITRACE_SHMEM_SIZE_HINT_KB");
static auto _v = get_config()->find("OMNITRACE_PERFETTO_SHMEM_SIZE_HINT_KB");
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
}
size_t
get_perfetto_buffer_size()
{
static auto _v = get_config()->find("OMNITRACE_BUFFER_SIZE_KB");
static auto _v = get_config()->find("OMNITRACE_PERFETTO_BUFFER_SIZE_KB");
return static_cast<tim::tsettings<size_t>&>(*_v->second).get();
}
@@ -1003,13 +994,20 @@ bool
get_perfetto_combined_traces()
{
#if defined(TIMEMORY_USE_MPI) && TIMEMORY_USE_MPI > 0
static auto _v = get_config()->find("OMNITRACE_COMBINE_PERFETTO_TRACES");
static auto _v = get_config()->find("OMNITRACE_PERFETTO_COMBINE_TRACES");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
#else
return false;
#endif
}
std::string
get_perfetto_fill_policy()
{
static auto _v = get_config()->find("OMNITRACE_PERFETTO_FILL_POLICY");
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
}
uint64_t
get_critical_trace_update_freq()
{
@@ -1038,7 +1036,7 @@ std::string&
get_backend()
{
// select inprocess, system, or both (i.e. all)
static auto _v = get_config()->find("OMNITRACE_BACKEND");
static auto _v = get_config()->find("OMNITRACE_PERFETTO_BACKEND");
return static_cast<tim::tsettings<std::string>&>(*_v->second).get();
}
@@ -49,7 +49,7 @@ void
configure_settings(bool _init = true);
void
print_banner(std::ostream& _os = std::cout);
print_banner(std::ostream& _os = std::cerr);
void
print_settings(
@@ -202,6 +202,9 @@ get_perfetto_buffer_size();
bool
get_perfetto_combined_traces();
std::string
get_perfetto_fill_policy();
uint64_t
get_critical_trace_update_freq();
@@ -98,9 +98,8 @@ config()
_cpu_mhz_pos.emplace_back(_pos + 1);
_ifs >> _s;
if(!_ifs.good() || !_ifs) break;
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_debug() || get_verbose() > 1,
"[%zu] %s %s (pos = %zu)\n", i,
_st.c_str(), _s.c_str(), _pos + 1);
OMNITRACE_BASIC_VERBOSE(3, "[%zu] %s %s (pos = %zu)\n", i,
_st.c_str(), _s.c_str(), _pos + 1);
break;
}
}
@@ -110,6 +109,12 @@ config()
_ifs.close();
auto _enabled_val = get_sampling_cpus();
for(auto& itr : _enabled_val)
itr = tolower(itr);
if(_enabled_val == "off")
_enabled_val = "none";
else if(_enabled_val == "on")
_enabled_val = "all";
if(_enabled_val != "none" && _enabled_val != "all")
{
auto _enabled = tim::delimit(_enabled_val, ",; \t");
@@ -145,9 +150,19 @@ config()
}
}
}
else if(_enabled_val == "all")
{
for(size_t i = 0; i < _ncpu; ++i)
enabled_cpu_frequencies.emplace(i);
}
else if(_enabled_val == "none")
{
enabled_cpu_frequencies.clear();
}
cpu_frequencies.resize(_ncpu);
cpu_mhz_pos = _cpu_mhz_pos;
ifs = std::make_unique<std::ifstream>("/proc/cpuinfo", std::ifstream::binary);
if(!enabled_cpu_frequencies.empty())
ifs = std::make_unique<std::ifstream>("/proc/cpuinfo", std::ifstream::binary);
}
void
@@ -166,11 +181,9 @@ sample()
};
auto _ts = tim::get_clock_real_now<size_t, std::nano>();
for(int64_t i = 0; i < ncpu; ++i)
for(const auto& itr : enabled_cpu_frequencies)
{
if(!enabled_cpu_frequencies.empty() && enabled_cpu_frequencies.count(i) == 0)
continue;
cpu_frequencies.at(i).emplace_back(_ts, _read_cpu_freq(i));
cpu_frequencies.at(itr).emplace_back(_ts, _read_cpu_freq(itr));
}
}
@@ -93,6 +93,8 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
{
std::this_thread::sleep_until(_now);
if(_state->load() != State::Active) continue;
if(get_state() == State::Finalized) break;
if(get_state() != State::Active) continue;
get_sampler_is_sampling().store(true);
for(auto& itr : instances)
itr->sample();
@@ -100,6 +102,8 @@ sampler::poll(std::atomic<State>* _state, nsec_t _interval, promise_t* _ready)
while(_now < std::chrono::steady_clock::now())
_now += _interval;
}
// ensure this is always false
get_sampler_is_sampling().store(false);
OMNITRACE_CONDITIONAL_BASIC_PRINT(get_debug(),
"Thread sampler polling completed...\n");
@@ -165,30 +169,44 @@ sampler::setup()
void
sampler::shutdown()
{
// set the local sampler state to finalized
set_state(State::Finalized);
// shutdown all components
for(auto& itr : instances)
itr->shutdown();
auto& _thread = get_thread();
if(_thread)
{
set_state(State::Finalized);
size_t _nitr = 0;
constexpr size_t _nitr_max = 100;
uint64_t _freq = (1.0 / get_thread_sampling_freq()) * 1.0e3;
// wait until the sampler is no longer sampling
std::this_thread::sleep_for(msec_t{ _freq });
while(get_sampler_is_sampling().load())
{}
{
if(_nitr++ > _nitr_max) break;
}
// during CI, throw an error if polling_finished is not valid
OMNITRACE_CI_THROW(!polling_finished, "polling_finished is not valid\n");
if(polling_finished)
{
auto _fut = polling_finished->get_future();
uint64_t _freq = (1.0 / get_thread_sampling_freq()) * 1.0e3;
// wait for the thread to finish
auto _fut = polling_finished->get_future();
_fut.wait_for(msec_t{ 10 * _freq });
_thread->join();
}
else
{
uint64_t _freq = (1.0 / get_thread_sampling_freq()) * 1.0e3;
// cancel the thread and detach
std::this_thread::sleep_for(msec_t{ 10 * _freq });
pthread_cancel(_thread->native_handle());
_thread->detach();
}
_thread = std::unique_ptr<std::thread>{};
_thread = std::unique_ptr<std::thread>{ nullptr };
polling_finished = std::unique_ptr<promise_t>{};
}