diff --git a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst index e5e06ce46e..a32bab666a 100644 --- a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst +++ b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst @@ -275,7 +275,24 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCP_TOOL_LIB=/opt/rocprofiler-systems/lib/librocprof-sys.so.1.7.1 * The next snippet shows the environment updates when ``rocprof-sys-sample`` enables - profiling, tracing, host process-sampling, device process-sampling, and all the available backends: + profiling, tracing, device process-sampling, and does not enable host process-sampling: + + .. code-block:: shell + + $ rocprof-sys-sample -PTD -- ./parallel-overhead-locks 30 4 100 + + LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 + ROCPROFSYS_CPU_FREQ_ENABLED=false + ROCPROFSYS_PROFILE=true + ROCPROFSYS_TRACE=true + ROCPROFSYS_USE_AMD_SMI=true + ROCPROFSYS_USE_PROCESS_SAMPLING=true + ROCPROFSYS_USE_SAMPLING=true + OMP_TOOL_LIBRARIES=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 + ROCP_TOOL_LIB=/opt/rocprofiler-systems/lib/librocprof-sys.so.1.7.1 + +* The next snippet shows the environment updates when ``rocprof-sys-sample`` enables + profiling, tracing, device process-sampling, host process-sampling, and all the available backends: .. code-block:: shell @@ -302,7 +319,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ... * The final snippet shows the environment updates when ``rocprof-sys-sample`` enables - profiling, tracing, host process-sampling, and device process-sampling, + profiling, tracing, device process-sampling, and host process-sampling, sets the output path to ``rocprof-sys-output`` and the output prefix to ``%tag%``, and disables all the available backends: @@ -329,7 +346,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_PROFILE=true ... -An rocprof-sys-sample example +A rocprof-sys-sample example ======================================== Here is the full output from the previous diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp index 1e34a95b45..e80b0f0f27 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp @@ -431,6 +431,7 @@ parse_args(int argc, char** argv, std::vector& _env) auto _d = p.get("device"); update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); update_env(_env, "ROCPROFSYS_CPU_FREQ_ENABLED", _h); + if(_h) update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d); }); parser .add_argument({ "-D", "--device" }, @@ -442,6 +443,7 @@ parse_args(int argc, char** argv, std::vector& _env) auto _d = p.get("device"); update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); update_env(_env, "ROCPROFSYS_USE_AMD_SMI", _d); + if(_d) update_env(_env, "ROCPROFSYS_CPU_FREQ_ENABLED", _h); }); parser .add_argument({ "-w", "--wait" }, diff --git a/projects/rocprofiler-systems/source/lib/core/argparse.cpp b/projects/rocprofiler-systems/source/lib/core/argparse.cpp index 0cc74dcc86..409fe9b1a3 100644 --- a/projects/rocprofiler-systems/source/lib/core/argparse.cpp +++ b/projects/rocprofiler-systems/source/lib/core/argparse.cpp @@ -457,6 +457,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) auto _d = p.get("device"); update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); update_env(_data, "ROCPROFSYS_CPU_FREQ_ENABLED", _h); + if(_h) update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d); }); _data.processed_environs.emplace("host"); @@ -476,6 +477,7 @@ add_core_arguments(parser_t& _parser, parser_data& _data) auto _d = p.get("device"); update_env(_data, "ROCPROFSYS_USE_PROCESS_SAMPLING", _h || _d); update_env(_data, "ROCPROFSYS_USE_AMD_SMI", _d); + if(_d) update_env(_data, "ROCPROFSYS_CPU_FREQ_ENABLED", _h); }); _data.processed_environs.emplace("device"); diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index 9964045916..d7e9187961 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -419,7 +419,7 @@ configure_settings(bool _init) ROCPROFSYS_CONFIG_SETTING( double, "ROCPROFSYS_SAMPLING_FREQ", - "Number of software interrupts per second when OMNITTRACE_USE_SAMPLING=ON", 300.0, + "Number of software interrupts per second when ROCPROFSYS_USE_SAMPLING=ON", 300.0, "sampling", "process_sampling"); ROCPROFSYS_CONFIG_SETTING(double, "ROCPROFSYS_SAMPLING_CPUTIME_FREQ", @@ -460,9 +460,15 @@ configure_settings(bool _init) "If > 0.0, time (in seconds) to sample before stopping", 0.0, "sampling", "process_sampling"); + ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_CPU_FREQ_ENABLED", + "Enable tracking for CPU frequency, memory usage, virtual " + "memory usage, peak memory, context switches, page faults, " + "user time, and kernel time", + false, "process_sampling"); + ROCPROFSYS_CONFIG_SETTING( double, "ROCPROFSYS_PROCESS_SAMPLING_FREQ", - "Number of measurements per second when OMNITTRACE_USE_PROCESS_SAMPLING=ON. If " + "Number of measurements per second when ROCPROFSYS_USE_PROCESS_SAMPLING=ON. If " "set to zero, uses ROCPROFSYS_SAMPLING_FREQ value", 0.0, "process_sampling"); @@ -1168,6 +1174,7 @@ configure_mode_settings(const std::shared_ptr& _config) _set("ROCPROFSYS_USE_SAMPLING", false); _set("ROCPROFSYS_USE_PROCESS_SAMPLING", false); _set("ROCPROFSYS_USE_CODE_COVERAGE", false); + _set("ROCPROFSYS_CPU_FREQ_ENABLED", false); set_setting_value("ROCPROFSYS_TIMEMORY_COMPONENTS", std::string{}); set_setting_value("ROCPROFSYS_PAPI_EVENTS", std::string{}); } @@ -1851,6 +1858,13 @@ get_use_process_sampling() return static_cast&>(*_v->second).get(); } +bool& +get_cpu_freq_enabled() +{ + static auto _v = get_config()->find("ROCPROFSYS_CPU_FREQ_ENABLED"); + return static_cast&>(*_v->second).get(); +} + bool& get_use_pid() { diff --git a/projects/rocprofiler-systems/source/lib/core/config.hpp b/projects/rocprofiler-systems/source/lib/core/config.hpp index 5dc28a7a25..a7e9a8f384 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.hpp +++ b/projects/rocprofiler-systems/source/lib/core/config.hpp @@ -216,6 +216,9 @@ get_use_sampling() ROCPROFSYS_HOT; bool& get_use_process_sampling() ROCPROFSYS_HOT; +bool& +get_cpu_freq_enabled(); + bool& get_use_pid(); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/process_sampler.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/process_sampler.cpp index 66c7d0495c..66fe64a8f3 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/process_sampler.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/process_sampler.cpp @@ -150,12 +150,15 @@ sampler::setup() _amd_smi->sample = []() { amd_smi::sample(); }; } - auto& _cpu_freq = instances.emplace_back(std::make_unique()); - _cpu_freq->setup = []() { cpu_freq::setup(); }; - _cpu_freq->shutdown = []() { cpu_freq::shutdown(); }; - _cpu_freq->post_process = []() { cpu_freq::post_process(); }; - _cpu_freq->config = []() { cpu_freq::config(); }; - _cpu_freq->sample = []() { cpu_freq::sample(); }; + if(get_cpu_freq_enabled()) + { + auto& _cpu_freq = instances.emplace_back(std::make_unique()); + _cpu_freq->setup = []() { cpu_freq::setup(); }; + _cpu_freq->shutdown = []() { cpu_freq::shutdown(); }; + _cpu_freq->post_process = []() { cpu_freq::post_process(); }; + _cpu_freq->config = []() { cpu_freq::config(); }; + _cpu_freq->sample = []() { cpu_freq::sample(); }; + } for(auto& itr : instances) itr->setup();