Remove redundant ROCPROFSYS_TRACE_CACHED variable from the code (#2434)

This commit is contained in:
marantic-amd
2025-12-25 13:36:04 +01:00
zatwierdzone przez GitHub
rodzic c3132773c8
commit bb83791b17
23 zmienionych plików z 88 dodań i 111 usunięć
+10 -7
Wyświetl plik
@@ -8,20 +8,23 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
### Added
- Documentation for `ROCPROFSYS_TRACE_CACHED` configuration option and its performance benefits.
- Documentation for `--trace-legacy` / `-L` CLI flag for direct tracing mode.
### Changed
- `ROCPROFSYS_TRACE_CACHED` is now the default perfetto tracing mode for improved performance.
- Renamed `ROCPROFSYS_TRACE` to `ROCPROFSYS_TRACE_LEGACY` (with backward compatibility).
- `--trace` / `-T` CLI flag now uses cached mode by default.
- Added `--trace-legacy` / `-L` CLI flag for direct tracing mode.
- `ROCPROFSYS_TRACE` now controls whether perfetto tracing is enabled (default: true when tracing mode).
- `ROCPROFSYS_TRACE_LEGACY` controls whether to use legacy direct mode (true) or cached mode (false, default).
- By default, tracing uses deferred trace generation (cached mode) for improved performance and minimal runtime overhead.
- `--trace` / `-T` CLI flag enables tracing with cached mode by default.
- `--trace-legacy` / `-L` CLI flag enables legacy direct mode for tracing.
### Removed
- `ROCPROFSYS_TRACE_CACHED` environment variable (tracing now uses cached mode by default when `ROCPROFSYS_TRACE_LEGACY=false`).
### Deprecated
- `ROCPROFSYS_TRACE` environment variable (use `ROCPROFSYS_TRACE_LEGACY` for direct mode).
- `ROCPROFSYS_USE_PERFETTO` environment variable (use `ROCPROFSYS_TRACE_LEGACY`).
- `ROCPROFSYS_USE_PERFETTO` environment variable (use `ROCPROFSYS_TRACE`).
## ROCm Systems Profiler 1.3.0 for ROCm 7.2.0
@@ -176,18 +176,14 @@ Primary collection modes
Trace mode (default)
^^^^^^^^^^^^^^^^^^^^^^^^
Tracing mode generates comprehensive, deterministic traces of every event and measurement during application execution. This mode can be enabled using ``ROCPROFSYS_MODE=trace`` or by enabling one of the trace backend options.
Tracing mode generates comprehensive, deterministic traces of every event and measurement during application execution. This mode can be enabled using ``ROCPROFSYS_TRACE=true``, ``ROCPROFSYS_MODE=trace``, or by using the ``--trace`` / ``-T`` CLI flag.
ROCm Systems Profiler provides two trace backend modes:
ROCm Systems Profiler provides two tracing implementations:
- **Cached Mode (default, recommended)**: ``ROCPROFSYS_TRACE_CACHED=true`` or ``--trace`` / ``-T`` enables deferred trace generation with minimal runtime overhead. Trace data is buffered during execution and written after the application completes, significantly reducing performance impact during profiling.
- **Cached Mode (default)**: By default, when tracing is enabled, ROCm Systems Profiler uses deferred trace generation with minimal runtime overhead. Trace data is buffered during execution and written after the application completes, significantly reducing performance impact during profiling.
- **Legacy Mode**: ``ROCPROFSYS_TRACE_LEGACY=true`` or ``--trace-legacy`` / ``-L`` enables direct mode where trace data is written immediately during execution. This mode provides real-time trace generation but has higher runtime overhead compared to cached mode.
.. note::
The ``ROCPROFSYS_TRACE`` environment variable is deprecated and has been renamed to ``ROCPROFSYS_TRACE_LEGACY``. For new workflows, use ``ROCPROFSYS_TRACE_CACHED`` (default) or ``ROCPROFSYS_TRACE_LEGACY`` explicitly.
Additional configuration options to control the tracing behavior include:
- ``ROCPROFSYS_TRACE_DELAY`` (``--trace-wait``): Delay before starting trace collection (in seconds).
@@ -34,8 +34,8 @@ and tweak the default sampling values.
.. code-block:: shell
# ...
ROCPROFSYS_TRACE_CACHED = true # Recommended: deferred trace generation for minimal overhead
# ROCPROFSYS_TRACE_LEGACY = false # Alternative: direct mode with higher overhead
ROCPROFSYS_TRACE = true
# ROCPROFSYS_TRACE_LEGACY = false # Set to true for direct mode (higher overhead)
ROCPROFSYS_PROFILE = true
ROCPROFSYS_USE_SAMPLING = true
ROCPROFSYS_USE_PROCESS_SAMPLING = true
@@ -340,7 +340,7 @@ Generating a default configuration file
ROCPROFSYS_CONFIG_FILE =
ROCPROFSYS_MODE = trace
ROCPROFSYS_TRACE_CACHED = true
ROCPROFSYS_TRACE = true
ROCPROFSYS_TRACE_LEGACY = false
ROCPROFSYS_PROFILE = false
ROCPROFSYS_USE_SAMPLING = false
@@ -499,9 +499,8 @@ Viewing the setting descriptions
| ROCPROFSYS_USE_CODE_COVERAGE | Enable support for code coverage |
| ROCPROFSYS_USE_KOKKOSP | Enable support for Kokkos Tools |
| ROCPROFSYS_USE_OMPT | Enable support for OpenMP-Tools |
| ROCPROFSYS_TRACE_CACHED | Enable perfetto backend with deferred...|
| ROCPROFSYS_TRACE_LEGACY | Enable perfetto backend (legacy, dir... |
| ROCPROFSYS_TRACE | [DEPRECATED] Renamed to ROCPROFSYS_T... |
| ROCPROFSYS_TRACE | Enable perfetto backend for tracing |
| ROCPROFSYS_TRACE_LEGACY | Use legacy direct mode for tracing |
| ROCPROFSYS_USE_PID | Enable tagging filenames with proces... |
| ROCPROFSYS_USE_AMD_SMI | Enable sampling GPU power, temp, uti... |
| ROCPROFSYS_USE_ROCM | Enable ROCM tracing |
@@ -1349,8 +1348,8 @@ but do not override an existing value for the environment variable.
$SAMPLE = OFF
# use fields
ROCPROFSYS_TRACE_CACHED = $ENABLE # Recommended: deferred trace generation
ROCPROFSYS_TRACE_LEGACY = OFF # Legacy direct mode (higher overhead)
ROCPROFSYS_TRACE = $ENABLE
# ROCPROFSYS_TRACE_LEGACY = OFF # Set to ON for direct mode (higher overhead)
ROCPROFSYS_PROFILE = $ENABLE
ROCPROFSYS_USE_SAMPLING = $SAMPLE
ROCPROFSYS_USE_PROCESS_SAMPLING = $SAMPLE
@@ -75,7 +75,7 @@ The configuration parameter settings can be saved in a configuration file. Here
ROCPROFSYS_SAMPLING_DELAY=0.05
ROCPROFSYS_SAMPLING_CPUS=0-9
ROCPROFSYS_SAMPLING_GPUS=$env:HIP_VISIBLE_DEVICES
ROCPROFSYS_TRACE_CACHED=ON
ROCPROFSYS_TRACE=ON
ROCPROFSYS_PROFILE=ON
ROCPROFSYS_USE_SAMPLING=ON
ROCPROFSYS_USE_PROCESS_SAMPLING=OFF
@@ -284,7 +284,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1
ROCPROFSYS_CPU_FREQ_ENABLED=false
ROCPROFSYS_PROFILE=true
ROCPROFSYS_TRACE_CACHED=true
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_AMD_SMI=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_SAMPLING=true
@@ -307,7 +307,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCPROFSYS_USE_KOKKOSP=true
ROCPROFSYS_USE_MPIP=true
ROCPROFSYS_USE_OMPT=true
ROCPROFSYS_TRACE_CACHED=true
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_RCCLP=true
ROCPROFSYS_USE_AMD_SMI=true
@@ -337,7 +337,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ
ROCPROFSYS_USE_KOKKOSP=false
ROCPROFSYS_USE_MPIP=false
ROCPROFSYS_USE_OMPT=false
ROCPROFSYS_TRACE_CACHED=true
ROCPROFSYS_TRACE=true
ROCPROFSYS_USE_PROCESS_SAMPLING=true
ROCPROFSYS_USE_RCCLP=false
ROCPROFSYS_USE_AMD_SMI=false
@@ -362,7 +362,7 @@ Here is the full output from the previous
ROCPROFSYS_OUTPUT_PATH=rocprof-sys-output
ROCPROFSYS_OUTPUT_PREFIX=%tag%
ROCPROFSYS_PROFILE=true
ROCPROFSYS_TRACE_CACHED=true
ROCPROFSYS_TRACE=true
ROCPROFSYS_TRACE_THREAD_LOCKS=false
ROCPROFSYS_TRACE_THREAD_RW_LOCKS=false
ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS=false
@@ -17,7 +17,7 @@ For example, starting with the following base configuration:
export ROCPROFSYS_TIME_OUTPUT=ON
export ROCPROFSYS_USE_PID=OFF
export ROCPROFSYS_PROFILE=ON
export ROCPROFSYS_TRACE_CACHED=ON
export ROCPROFSYS_TRACE=ON
.. code-block:: shell
@@ -192,7 +192,7 @@ First, instrument and run the program.
ROCPROFSYS: LD_PRELOAD=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
ROCPROFSYS: OMP_TOOL_LIBRARIES=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0
ROCPROFSYS: ROCPROFSYS_PROFILE=true
ROCPROFSYS: ROCPROFSYS_TRACE_CACHED=true
ROCPROFSYS: ROCPROFSYS_TRACE=true
ROCPROFSYS: ROCPROFSYS_VERBOSE=0
[rocprof-sys][dl][1827155] rocprofsys_main
[rocprof-sys][1827155][rocprofsys_init_tooling] Instrumentation mode: Trace
@@ -339,7 +339,7 @@ generate_config(std::string _config_file, const std::set<std::string>& _config_f
if(_lomni && !_romni) return true;
if(_romni && !_lomni) return false;
for(const auto* itr :
{ "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE_CACHED",
{ "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE",
"ROCPROFSYS_TRACE_LEGACY", "ROCPROFSYS_PROFILE",
"ROCPROFSYS_USE_SAMPLING", "ROCPROFSYS_USE_PROCESS_SAMPLING",
"ROCPROFSYS_USE_ROCM", "ROCPROFSYS_USE_AMD_SMI",
@@ -186,10 +186,9 @@ get_initial_environment()
update_env(_env, "ROCPROFSYS_MODE", "causal");
update_env(_env, "ROCPROFSYS_USE_CAUSAL", true);
update_env(_env, "ROCPROFSYS_USE_SAMPLING", false);
update_env(_env, "ROCPROFSYS_TRACE_CACHED", false);
update_env(_env, "ROCPROFSYS_TRACE", false);
update_env(_env, "ROCPROFSYS_PROFILE", false);
update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", false);
update_env(_env, "ROCPROFSYS_TRACE_LEGACY", false);
update_env(_env, "ROCPROFSYS_THREAD_POOL_SIZE",
get_env<int>("ROCPROFSYS_THREAD_POOL_SIZE", 0));
update_env(_env, "ROCPROFSYS_LAUNCHER", "rocprof-sys-causal");
@@ -339,19 +339,17 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
original_envs);
});
parser
.add_argument(
{ "-T", "--trace" },
"Generate a detailed trace with deferred trace generation (perfetto output)")
.add_argument({ "-T", "--trace" }, "Generate a detailed trace (perfetto output)")
.max_count(1)
.action([&](parser_t& p) {
rocprofsys::common::update_env(_env, "ROCPROFSYS_TRACE_CACHED",
p.get<bool>("trace"), update_mode::REPLACE,
":", updated_envs, original_envs);
rocprofsys::common::update_env(_env, "ROCPROFSYS_TRACE", p.get<bool>("trace"),
update_mode::REPLACE, ":", updated_envs,
original_envs);
});
parser
.add_argument(
{ "-L", "--trace-legacy" },
"Generate a detailed trace with direct mode (perfetto output, legacy)")
.add_argument({ "-L", "--trace-legacy" },
"Use legacy direct mode for tracing instead of deferred trace "
"generation (higher overhead)")
.max_count(1)
.action([&](parser_t& p) {
rocprofsys::common::update_env(
@@ -301,17 +301,17 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
if(_data.environ_filter("trace", _data))
{
_parser
.add_argument({ "-T", "--trace" }, "Generate a detailed trace with deferred "
"trace generation (perfetto output)")
.add_argument({ "-T", "--trace" },
"Generate a detailed trace (perfetto output)")
.max_count(1)
.action([&](parser_t& p) {
update_env(_data, "ROCPROFSYS_TRACE_CACHED", p.get<bool>("trace"));
update_env(_data, "ROCPROFSYS_TRACE", p.get<bool>("trace"));
});
_parser
.add_argument(
{ "-L", "--trace-legacy" },
"Generate a detailed trace with direct mode (perfetto output, legacy)")
.add_argument({ "-L", "--trace-legacy" },
"Use legacy direct mode for tracing instead of deferred trace "
"generation (higher overhead)")
.max_count(1)
.action([&](parser_t& p) {
update_env(_data, "ROCPROFSYS_TRACE_LEGACY", p.get<bool>("trace-legacy"));
@@ -299,32 +299,27 @@ configure_settings(bool _init)
get_env<size_t>("ROCPROFSYS_NUM_THREADS", 1), "threading", "performance",
"sampling", "parallelism", "advanced");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_CACHED",
"Enable perfetto backend with deferred trace generation "
"for minimal runtime overhead",
_default_perfetto_v, "backend", "perfetto_caching");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY",
"Enable perfetto backend (legacy, direct mode)", false,
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE",
"Enable perfetto backend for tracing", _default_perfetto_v,
"backend", "perfetto");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE",
"[DEPRECATED] Renamed to ROCPROFSYS_TRACE_LEGACY", false,
"backend", "perfetto", "deprecated");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY",
"Use legacy direct mode for perfetto tracing instead of "
"deferred trace generation. When false (default), uses "
"cached mode with minimal runtime overhead.",
false, "backend", "perfetto");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_PERFETTO",
"[DEPRECATED] Renamed to ROCPROFSYS_TRACE_LEGACY", false,
"[DEPRECATED] Renamed to ROCPROFSYS_TRACE", false,
"backend", "perfetto", "deprecated");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_PROFILE", "Enable timemory backend",
!(_config->get<bool>("ROCPROFSYS_TRACE_LEGACY") ||
_config->get<bool>("ROCPROFSYS_TRACE_CACHED")),
"backend", "timemory");
!_config->get<bool>("ROCPROFSYS_TRACE"), "backend",
"timemory");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_TIMEMORY",
"[DEPRECATED] Renamed to ROCPROFSYS_PROFILE",
!_config->get<bool>("ROCPROFSYS_TRACE_LEGACY"), "backend",
"timemory", "deprecated");
ROCPROFSYS_CONFIG_SETTING(
bool, "ROCPROFSYS_USE_TIMEMORY", "[DEPRECATED] Renamed to ROCPROFSYS_PROFILE",
!_config->get<bool>("ROCPROFSYS_TRACE"), "backend", "timemory", "deprecated");
ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_CAUSAL",
"Enable causal profiling analysis", false, "backend",
@@ -1080,8 +1075,7 @@ configure_settings(bool _init)
handle_deprecated_setting("ROCPROFSYS_USE_THREAD_SAMPLING",
"ROCPROFSYS_USE_PROCESS_SAMPLING");
handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE");
handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE_LEGACY");
handle_deprecated_setting("ROCPROFSYS_TRACE", "ROCPROFSYS_TRACE_LEGACY");
handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE");
handle_deprecated_setting("ROCPROFSYS_USE_TIMEMORY", "ROCPROFSYS_PROFILE");
scope::get_fields()[scope::flat::value] = _config->get_flat_profile();
@@ -1148,8 +1142,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
if(get_mode() == Mode::Coverage)
{
set_default_setting_value("ROCPROFSYS_USE_CODE_COVERAGE", true);
_set("ROCPROFSYS_TRACE_LEGACY", false);
_set("ROCPROFSYS_TRACE_CACHED", false);
_set("ROCPROFSYS_TRACE", false);
_set("ROCPROFSYS_PROFILE", false);
_set("ROCPROFSYS_USE_CAUSAL", false);
_set("ROCPROFSYS_USE_AMD_SMI", false);
@@ -1162,8 +1155,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
else if(get_mode() == Mode::Causal)
{
_set("ROCPROFSYS_USE_CAUSAL", true);
_set("ROCPROFSYS_TRACE_LEGACY", false);
_set("ROCPROFSYS_TRACE_CACHED", false);
_set("ROCPROFSYS_TRACE", false);
_set("ROCPROFSYS_PROFILE", false);
_set("ROCPROFSYS_USE_SAMPLING", false);
_set("ROCPROFSYS_USE_PROCESS_SAMPLING", false);
@@ -1859,8 +1851,12 @@ get_verbose()
bool&
get_use_perfetto()
{
static auto _v = get_config()->at("ROCPROFSYS_TRACE_LEGACY");
return static_cast<tim::tsettings<bool>&>(*_v).get();
static auto _trace_setting = get_config()->at("ROCPROFSYS_TRACE");
static auto _legacy_setting = get_config()->at("ROCPROFSYS_TRACE_LEGACY");
auto& _trace = static_cast<tim::tsettings<bool>&>(*_trace_setting).get();
auto& _legacy = static_cast<tim::tsettings<bool>&>(*_legacy_setting).get();
static bool _v = _trace && _legacy;
return _v;
}
bool&
@@ -2564,8 +2560,12 @@ get_use_rocpd()
bool&
get_caching_perfetto()
{
static auto _v = get_config()->at("ROCPROFSYS_TRACE_CACHED");
return static_cast<tim::tsettings<bool>&>(*_v).get();
static auto _trace_setting = get_config()->at("ROCPROFSYS_TRACE");
static auto _legacy_setting = get_config()->at("ROCPROFSYS_TRACE_LEGACY");
auto& _trace = static_cast<tim::tsettings<bool>&>(*_trace_setting).get();
auto& _legacy = static_cast<tim::tsettings<bool>&>(*_legacy_setting).get();
static bool _v = _trace && !_legacy;
return _v;
}
int
@@ -36,7 +36,6 @@ if(
)
set(_annotate_environment
"${_base_environment}"
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_TRACE_LEGACY=ON"
"ROCPROFSYS_TIMEMORY_COMPONENTS=thread_cpu_clock papi_array"
"ROCPROFSYS_PAPI_EVENTS=perf::PERF_COUNT_SW_CPU_CLOCK"
@@ -84,7 +83,6 @@ if(
else()
set(_annotate_environment
"${_base_environment}"
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_TRACE_LEGACY=ON"
"ROCPROFSYS_TIMEMORY_COMPONENTS=thread_cpu_clock"
"ROCPROFSYS_USE_SAMPLING=OFF"
@@ -233,7 +233,7 @@ rocprofiler_systems_add_bin_test(
TIMEOUT 45
PASS_REGEX
"ENVIRONMENT VARIABLE,[ \n]+ROCPROFSYS_CI_SKIP_PUSH_POP_CHECK,[ \n]+ROCPROFSYS_THREAD_POOL_SIZE,[ \n]+ROCPROFSYS_USE_PID,[ \n]+"
FAIL_REGEX "ROCPROFSYS_TRACE_LEGACY|ROCPROFSYS_TRACE_CACHED|ROCPROFSYS_ABORT_FAIL_REGEX"
FAIL_REGEX "ROCPROFSYS_TRACE|ROCPROFSYS_ABORT_FAIL_REGEX"
)
string(
@@ -270,7 +270,7 @@ rocprofiler_systems_add_bin_test(
txt json xml --force
TIMEOUT 45
LABELS "rocprofiler-systems-avail"
ENVIRONMENT "ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=OFF;ROCPROFSYS_PROFILE=ON"
ENVIRONMENT "ROCPROFSYS_TRACE=OFF;ROCPROFSYS_PROFILE=ON"
PASS_REGEX
"Outputting JSON configuration file '${_AVAIL_CFG_PATH}tweak\\\.json'(.*)Outputting XML configuration file '${_AVAIL_CFG_PATH}tweak\\\.xml'(.*)Outputting text configuration file '${_AVAIL_CFG_PATH}tweak\\\.cfg'(.*)"
)
@@ -28,7 +28,7 @@
# Use legacy trace mode for AMD SMI counters - cached mode doesn't support real-time counter tracking
set(_gpu_connect_environment
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_TRACE_LEGACY=ON"
"ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api"
"ROCPROFSYS_AMD_SMI_METRICS=busy,temp,power,xgmi,pcie"
@@ -74,5 +74,5 @@ rocprofiler_systems_add_test(
REWRITE_ARGS -e -v 2 --min-instructions=8
RUN_ARGS 10 4 1000
ENVIRONMENT
"${_lock_environment};ROCPROFSYS_FLAT_PROFILE=ON;ROCPROFSYS_PROFILE=OFF;ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=ON;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF"
"${_lock_environment};ROCPROFSYS_FLAT_PROFILE=ON;ROCPROFSYS_PROFILE=OFF;ROCPROFSYS_TRACE=ON;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF"
)
@@ -73,7 +73,7 @@ rocprofiler_systems_add_test(
--min-instructions
0
ENVIRONMENT
"${_base_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_CACHED=OFF;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON"
"${_base_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON"
REWRITE_RUN_PASS_REGEX
"Successfully executed: .+rocprof-sys-merge-output.sh.*"
REWRITE_RUN_FAIL_REGEX
@@ -126,8 +126,7 @@ rocprofiler_systems_add_test(
)
set(_mpip_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=OFF"
@@ -142,8 +141,7 @@ set(_mpip_environment
)
set(_mpip_all2all_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=OFF"
@@ -28,7 +28,6 @@ message(STATUS "The list of all PAPI network events is ${_event_list}")
# Use legacy trace mode for network stats - cached mode doesn't support real-time counter tracking
set(_nic_perf_environment
"${_base_environment}"
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_TRACE_LEGACY=ON"
"ROCPROFSYS_OUTPUT_PATH=${PROJECT_BINARY_DIR}/rocprof-sys-tests-output/nic-performance"
"ROCPROFSYS_USE_PID=OFF"
@@ -22,8 +22,7 @@ if(NOT EXISTS "${ROCM_LLVM_LIB_PATH}/libomptarget.so" AND ROCPROFSYS_USE_ROCM)
endif()
set(_ompt_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_TIME_OUTPUT=OFF"
"ROCPROFSYS_USE_OMPT=ON"
@@ -34,7 +34,7 @@ rocprofiler_systems_add_test(
RUNTIME_ARGS -e -i 256
RUN_ARGS 30 4 1000
ENVIRONMENT
"${_lock_environment};ROCPROFSYS_PROFILE=ON;ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=ON;ROCPROFSYS_COLLAPSE_THREADS=OFF;ROCPROFSYS_SAMPLING_REALTIME=ON;ROCPROFSYS_SAMPLING_REALTIME_FREQ=10;ROCPROFSYS_SAMPLING_REALTIME_TIDS=0;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF"
"${_lock_environment};ROCPROFSYS_PROFILE=ON;ROCPROFSYS_TRACE=ON;ROCPROFSYS_COLLAPSE_THREADS=OFF;ROCPROFSYS_SAMPLING_REALTIME=ON;ROCPROFSYS_SAMPLING_REALTIME_FREQ=10;ROCPROFSYS_SAMPLING_REALTIME_TIDS=0;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF"
REWRITE_RUN_PASS_REGEX
"wall_clock .*\\|_pthread_create .* 4 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000"
RUNTIME_PASS_REGEX
@@ -41,7 +41,6 @@ endif()
set(_roctx_environment
"${_base_environment}"
"ROCPROFSYS_TRACE_LEGACY=ON"
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,marker_api,kernel_dispatch"
)
@@ -103,8 +103,7 @@ endif()
set(_test_openmp_env "OMP_PROC_BIND=spread" "OMP_PLACES=threads" "OMP_NUM_THREADS=2")
set(_base_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=ON"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -115,8 +114,7 @@ set(_base_environment
)
set(_flat_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_TIME_OUTPUT=OFF"
"ROCPROFSYS_COUT_OUTPUT=ON"
@@ -146,8 +144,7 @@ set(_lock_environment
)
set(_perfetto_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=OFF"
"ROCPROFSYS_USE_SAMPLING=ON"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -159,8 +156,7 @@ set(_perfetto_environment
)
set(_timemory_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=OFF"
"ROCPROFSYS_TRACE=OFF"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=ON"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -181,8 +177,7 @@ set(_causal_environment
)
set(_python_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -195,8 +190,7 @@ set(_python_environment
)
set(_attach_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -210,8 +204,7 @@ set(_attach_environment
)
set(_rccl_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=ON"
@@ -224,8 +217,7 @@ set(_rccl_environment
)
set(_window_environment
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=OFF"
"ROCPROFSYS_USE_PROCESS_SAMPLING=OFF"
@@ -1478,8 +1470,7 @@ function(ROCPROFILER_SYSTEMS_ADD_BIN_TEST)
if(NOT TEST_ENVIRONMENT)
set(TEST_ENVIRONMENT
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_TRACE=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_USE_SAMPLING=ON"
"ROCPROFSYS_TIME_OUTPUT=OFF"
@@ -32,8 +32,6 @@ endif()
set(_thread_limit_environment
"${_base_environment}"
"ROCPROFSYS_TRACE_LEGACY=OFF"
"ROCPROFSYS_TRACE_CACHED=ON"
"ROCPROFSYS_PROFILE=ON"
"ROCPROFSYS_COUT_OUTPUT=ON"
"ROCPROFSYS_USE_SAMPLING=ON"