2
0

Appropriately filter data based on -D and -H options (#163)

- Addresses concern that device metric tracks are still shown in Perfetto trace file even when only -H is specified to rocprof-sys-sample (and vice versa).
- Update sampling call-stack docs.

[ROCm/rocprofiler-systems commit: 8ae6651357]
Este cometimento está contido em:
Luca Bruni
2025-04-30 09:50:51 -04:00
cometido por GitHub
ascendente 35b8748c20
cometimento 579596dbba
6 ficheiros modificados com 52 adições e 11 eliminações
@@ -275,7 +275,24 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCP_TOOL_LIB=/opt/rocprofiler-systems/lib/librocprof-sys.so.1.7.1
* The next snippet shows the environment updates when ``rocprof-sys-sample`` enables
profiling, tracing, host process-sampling, device process-sampling, and all the available backends:
profiling, tracing, device process-sampling, and does not enable host process-sampling:
.. code-block:: shell
$ rocprof-sys-sample -PTD -- ./parallel-overhead-locks 30 4 100
LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1
ROCPROFSYS_CPU_FREQ_ENABLED=false
ROCPROFSYS_PROFILE=true
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_AMD_SMI=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_SAMPLING=true
OMP_TOOL_LIBRARIES=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1
ROCP_TOOL_LIB=/opt/rocprofiler-systems/lib/librocprof-sys.so.1.7.1
* The next snippet shows the environment updates when ``rocprof-sys-sample`` enables
profiling, tracing, device process-sampling, host process-sampling, and all the available backends:
.. code-block:: shell
@@ -302,7 +319,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
...
* The final snippet shows the environment updates when ``rocprof-sys-sample`` enables
profiling, tracing, host process-sampling, and device process-sampling,
profiling, tracing, device process-sampling, and host process-sampling,
sets the output path to ``rocprof-sys-output`` and the output prefix to ``%tag%``, and disables
all the available backends:
@@ -329,7 +346,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCPROFSYS_PROFILE=true
...
An rocprof-sys-sample example
A rocprof-sys-sample example
========================================
Here is the full output from the previous
@@ -431,6 +431,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
auto _d = p.get<bool>("device");
update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_env, "ROCPROFSYS_CPU_FREQ_ENABLED", _h);
if(_h) update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d);
});
parser
.add_argument({ "-D", "--device" },
@@ -442,6 +443,7 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
auto _d = p.get<bool>("device");
update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d);
if(_d) update_env(_env, "ROCPROFSYS_CPU_FREQ_ENABLED", _h);
});
parser
.add_argument({ "-w", "--wait" },
@@ -457,6 +457,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
auto _d = p.get<bool>("device");
update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_data, "ROCPROFSYS_CPU_FREQ_ENABLED", _h);
if(_h) update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d);
});
_data.processed_environs.emplace("host");
@@ -476,6 +477,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
auto _d = p.get<bool>("device");
update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d);
update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d);
if(_d) update_env(_data, "ROCPROFSYS_CPU_FREQ_ENABLED", _h);
});
_data.processed_environs.emplace("device");
+16 -2
Ver ficheiro
@@ -419,7 +419,7 @@ configure_settings(bool _init)
ROCPROFSYS_CONFIG_SETTING(
double, "ROCPROFSYS_SAMPLING_FREQ",
"Number of software interrupts per second when OMNITTRACE_USE_SAMPLING=ON", 300.0,
"Number of software interrupts per second when ROCPROFSYS_USE_SAMPLING=ON", 300.0,
"sampling", "process_sampling");
ROCPROFSYS_CONFIG_SETTING(double, "ROCPROFSYS_SAMPLING_CPUTIME_FREQ",
@@ -460,9 +460,15 @@ configure_settings(bool _init)
"If > 0.0, time (in seconds) to sample before stopping",
0.0, "sampling", "process_sampling");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_CPU_FREQ_ENABLED",
"Enable tracking for CPU frequency, memory usage, virtual "
"memory usage, peak memory, context switches, page faults, "
"user time, and kernel time",
false, "process_sampling");
ROCPROFSYS_CONFIG_SETTING(
double, "ROCPROFSYS_PROCESS_SAMPLING_FREQ",
"Number of measurements per second when OMNITTRACE_USE_PROCESS_SAMPLING=ON. If "
"Number of measurements per second when ROCPROFSYS_USE_PROCESS_SAMPLING=ON. If "
"set to zero, uses ROCPROFSYS_SAMPLING_FREQ value",
0.0, "process_sampling");
@@ -1168,6 +1174,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
_set("ROCPROFSYS_USE_SAMPLING", false);
_set("ROCPROFSYS_USE_PROCESS_SAMPLING", false);
_set("ROCPROFSYS_USE_CODE_COVERAGE", false);
_set("ROCPROFSYS_CPU_FREQ_ENABLED", false);
set_setting_value("ROCPROFSYS_TIMEMORY_COMPONENTS", std::string{});
set_setting_value("ROCPROFSYS_PAPI_EVENTS", std::string{});
}
@@ -1851,6 +1858,13 @@ get_use_process_sampling()
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool&
get_cpu_freq_enabled()
{
static auto _v = get_config()->find("ROCPROFSYS_CPU_FREQ_ENABLED");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
}
bool&
get_use_pid()
{
+3
Ver ficheiro
@@ -216,6 +216,9 @@ get_use_sampling() ROCPROFSYS_HOT;
bool&
get_use_process_sampling() ROCPROFSYS_HOT;
bool&
get_cpu_freq_enabled();
bool&
get_use_pid();
@@ -150,12 +150,15 @@ sampler::setup()
_amd_smi->sample = []() { amd_smi::sample(); };
}
auto& _cpu_freq = instances.emplace_back(std::make_unique<instance>());
_cpu_freq->setup = []() { cpu_freq::setup(); };
_cpu_freq->shutdown = []() { cpu_freq::shutdown(); };
_cpu_freq->post_process = []() { cpu_freq::post_process(); };
_cpu_freq->config = []() { cpu_freq::config(); };
_cpu_freq->sample = []() { cpu_freq::sample(); };
if(get_cpu_freq_enabled())
{
auto& _cpu_freq = instances.emplace_back(std::make_unique<instance>());
_cpu_freq->setup = []() { cpu_freq::setup(); };
_cpu_freq->shutdown = []() { cpu_freq::shutdown(); };
_cpu_freq->post_process = []() { cpu_freq::post_process(); };
_cpu_freq->config = []() { cpu_freq::config(); };
_cpu_freq->sample = []() { cpu_freq::sample(); };
}
for(auto& itr : instances)
itr->setup();