diff --git a/projects/rocprofiler-systems/CHANGELOG.md b/projects/rocprofiler-systems/CHANGELOG.md index 1697e4b2a3..9938a6b49e 100644 --- a/projects/rocprofiler-systems/CHANGELOG.md +++ b/projects/rocprofiler-systems/CHANGELOG.md @@ -8,20 +8,23 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs. ### Added -- Documentation for `ROCPROFSYS_TRACE_CACHED` configuration option and its performance benefits. - Documentation for `--trace-legacy` / `-L` CLI flag for direct tracing mode. ### Changed -- `ROCPROFSYS_TRACE_CACHED` is now the default perfetto tracing mode for improved performance. -- Renamed `ROCPROFSYS_TRACE` to `ROCPROFSYS_TRACE_LEGACY` (with backward compatibility). -- `--trace` / `-T` CLI flag now uses cached mode by default. -- Added `--trace-legacy` / `-L` CLI flag for direct tracing mode. +- `ROCPROFSYS_TRACE` now controls whether perfetto tracing is enabled (default: true when tracing mode). +- `ROCPROFSYS_TRACE_LEGACY` controls whether to use legacy direct mode (true) or cached mode (false, default). +- By default, tracing uses deferred trace generation (cached mode) for improved performance and minimal runtime overhead. +- `--trace` / `-T` CLI flag enables tracing with cached mode by default. +- `--trace-legacy` / `-L` CLI flag enables legacy direct mode for tracing. + +### Removed + +- `ROCPROFSYS_TRACE_CACHED` environment variable (tracing now uses cached mode by default when `ROCPROFSYS_TRACE_LEGACY=false`). ### Deprecated -- `ROCPROFSYS_TRACE` environment variable (use `ROCPROFSYS_TRACE_LEGACY` for direct mode). -- `ROCPROFSYS_USE_PERFETTO` environment variable (use `ROCPROFSYS_TRACE_LEGACY`). +- `ROCPROFSYS_USE_PERFETTO` environment variable (use `ROCPROFSYS_TRACE`). ## ROCm Systems Profiler 1.3.0 for ROCm 7.2.0 diff --git a/projects/rocprofiler-systems/docs/conceptual/data-collection-modes.rst b/projects/rocprofiler-systems/docs/conceptual/data-collection-modes.rst index 65dc06ed69..c42e3da0ef 100644 --- a/projects/rocprofiler-systems/docs/conceptual/data-collection-modes.rst +++ b/projects/rocprofiler-systems/docs/conceptual/data-collection-modes.rst @@ -176,18 +176,14 @@ Primary collection modes Trace mode (default) ^^^^^^^^^^^^^^^^^^^^^^^^ -Tracing mode generates comprehensive, deterministic traces of every event and measurement during application execution. This mode can be enabled using ``ROCPROFSYS_MODE=trace`` or by enabling one of the trace backend options. +Tracing mode generates comprehensive, deterministic traces of every event and measurement during application execution. This mode can be enabled using ``ROCPROFSYS_TRACE=true``, ``ROCPROFSYS_MODE=trace``, or by using the ``--trace`` / ``-T`` CLI flag. -ROCm Systems Profiler provides two trace backend modes: +ROCm Systems Profiler provides two tracing implementations: -- **Cached Mode (default, recommended)**: ``ROCPROFSYS_TRACE_CACHED=true`` or ``--trace`` / ``-T`` enables deferred trace generation with minimal runtime overhead. Trace data is buffered during execution and written after the application completes, significantly reducing performance impact during profiling. +- **Cached Mode (default)**: By default, when tracing is enabled, ROCm Systems Profiler uses deferred trace generation with minimal runtime overhead. Trace data is buffered during execution and written after the application completes, significantly reducing performance impact during profiling. - **Legacy Mode**: ``ROCPROFSYS_TRACE_LEGACY=true`` or ``--trace-legacy`` / ``-L`` enables direct mode where trace data is written immediately during execution. This mode provides real-time trace generation but has higher runtime overhead compared to cached mode. -.. note:: - - The ``ROCPROFSYS_TRACE`` environment variable is deprecated and has been renamed to ``ROCPROFSYS_TRACE_LEGACY``. For new workflows, use ``ROCPROFSYS_TRACE_CACHED`` (default) or ``ROCPROFSYS_TRACE_LEGACY`` explicitly. - Additional configuration options to control the tracing behavior include: - ``ROCPROFSYS_TRACE_DELAY`` (``--trace-wait``): Delay before starting trace collection (in seconds). diff --git a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst index a2b614add1..0f26bc0e1b 100644 --- a/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst +++ b/projects/rocprofiler-systems/docs/how-to/configuring-runtime-options.rst @@ -34,8 +34,8 @@ and tweak the default sampling values. .. code-block:: shell # ... - ROCPROFSYS_TRACE_CACHED = true # Recommended: deferred trace generation for minimal overhead - # ROCPROFSYS_TRACE_LEGACY = false # Alternative: direct mode with higher overhead + ROCPROFSYS_TRACE = true + # ROCPROFSYS_TRACE_LEGACY = false # Set to true for direct mode (higher overhead) ROCPROFSYS_PROFILE = true ROCPROFSYS_USE_SAMPLING = true ROCPROFSYS_USE_PROCESS_SAMPLING = true @@ -340,7 +340,7 @@ Generating a default configuration file ROCPROFSYS_CONFIG_FILE = ROCPROFSYS_MODE = trace - ROCPROFSYS_TRACE_CACHED = true + ROCPROFSYS_TRACE = true ROCPROFSYS_TRACE_LEGACY = false ROCPROFSYS_PROFILE = false ROCPROFSYS_USE_SAMPLING = false @@ -499,9 +499,8 @@ Viewing the setting descriptions | ROCPROFSYS_USE_CODE_COVERAGE | Enable support for code coverage | | ROCPROFSYS_USE_KOKKOSP | Enable support for Kokkos Tools | | ROCPROFSYS_USE_OMPT | Enable support for OpenMP-Tools | - | ROCPROFSYS_TRACE_CACHED | Enable perfetto backend with deferred...| - | ROCPROFSYS_TRACE_LEGACY | Enable perfetto backend (legacy, dir... | - | ROCPROFSYS_TRACE | [DEPRECATED] Renamed to ROCPROFSYS_T... | + | ROCPROFSYS_TRACE | Enable perfetto backend for tracing | + | ROCPROFSYS_TRACE_LEGACY | Use legacy direct mode for tracing | | ROCPROFSYS_USE_PID | Enable tagging filenames with proces... | | ROCPROFSYS_USE_AMD_SMI | Enable sampling GPU power, temp, uti... | | ROCPROFSYS_USE_ROCM | Enable ROCM tracing | @@ -1349,8 +1348,8 @@ but do not override an existing value for the environment variable. $SAMPLE = OFF # use fields - ROCPROFSYS_TRACE_CACHED = $ENABLE # Recommended: deferred trace generation - ROCPROFSYS_TRACE_LEGACY = OFF # Legacy direct mode (higher overhead) + ROCPROFSYS_TRACE = $ENABLE + # ROCPROFSYS_TRACE_LEGACY = OFF # Set to ON for direct mode (higher overhead) ROCPROFSYS_PROFILE = $ENABLE ROCPROFSYS_USE_SAMPLING = $SAMPLE ROCPROFSYS_USE_PROCESS_SAMPLING = $SAMPLE diff --git a/projects/rocprofiler-systems/docs/how-to/nic-profiling.rst b/projects/rocprofiler-systems/docs/how-to/nic-profiling.rst index 0e91f5a0d2..8c7f0bfb29 100644 --- a/projects/rocprofiler-systems/docs/how-to/nic-profiling.rst +++ b/projects/rocprofiler-systems/docs/how-to/nic-profiling.rst @@ -75,7 +75,7 @@ The configuration parameter settings can be saved in a configuration file. Here ROCPROFSYS_SAMPLING_DELAY=0.05 ROCPROFSYS_SAMPLING_CPUS=0-9 ROCPROFSYS_SAMPLING_GPUS=$env:HIP_VISIBLE_DEVICES - ROCPROFSYS_TRACE_CACHED=ON + ROCPROFSYS_TRACE=ON ROCPROFSYS_PROFILE=ON ROCPROFSYS_USE_SAMPLING=ON ROCPROFSYS_USE_PROCESS_SAMPLING=OFF diff --git a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst index 97d01da86f..3b0cd6ebaa 100644 --- a/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst +++ b/projects/rocprofiler-systems/docs/how-to/sampling-call-stack.rst @@ -284,7 +284,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ LD_PRELOAD=/opt/rocprofiler-systems/lib/librocprof-sys-dl.so.1.7.1 ROCPROFSYS_CPU_FREQ_ENABLED=false ROCPROFSYS_PROFILE=true - ROCPROFSYS_TRACE_CACHED=true + ROCPROFSYS_TRACE=true ROCPROFSYS_USE_AMD_SMI=true ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_SAMPLING=true @@ -307,7 +307,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_USE_KOKKOSP=true ROCPROFSYS_USE_MPIP=true ROCPROFSYS_USE_OMPT=true - ROCPROFSYS_TRACE_CACHED=true + ROCPROFSYS_TRACE=true ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=true ROCPROFSYS_USE_AMD_SMI=true @@ -337,7 +337,7 @@ The following snippets show how ``rocprof-sys-sample`` runs with various environ ROCPROFSYS_USE_KOKKOSP=false ROCPROFSYS_USE_MPIP=false ROCPROFSYS_USE_OMPT=false - ROCPROFSYS_TRACE_CACHED=true + ROCPROFSYS_TRACE=true ROCPROFSYS_USE_PROCESS_SAMPLING=true ROCPROFSYS_USE_RCCLP=false ROCPROFSYS_USE_AMD_SMI=false @@ -362,7 +362,7 @@ Here is the full output from the previous ROCPROFSYS_OUTPUT_PATH=rocprof-sys-output ROCPROFSYS_OUTPUT_PREFIX=%tag% ROCPROFSYS_PROFILE=true - ROCPROFSYS_TRACE_CACHED=true + ROCPROFSYS_TRACE=true ROCPROFSYS_TRACE_THREAD_LOCKS=false ROCPROFSYS_TRACE_THREAD_RW_LOCKS=false ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS=false diff --git a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst index a7d9d85f7a..1f77e74e6b 100644 --- a/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst +++ b/projects/rocprofiler-systems/docs/how-to/understanding-rocprof-sys-output.rst @@ -17,7 +17,7 @@ For example, starting with the following base configuration: export ROCPROFSYS_TIME_OUTPUT=ON export ROCPROFSYS_USE_PID=OFF export ROCPROFSYS_PROFILE=ON - export ROCPROFSYS_TRACE_CACHED=ON + export ROCPROFSYS_TRACE=ON .. code-block:: shell diff --git a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst index 1d491ef78c..639f87cd78 100644 --- a/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst +++ b/projects/rocprofiler-systems/docs/how-to/using-rocprof-sys-api.rst @@ -192,7 +192,7 @@ First, instrument and run the program. ROCPROFSYS: LD_PRELOAD=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0 ROCPROFSYS: OMP_TOOL_LIBRARIES=/home/rocm-dev/code/rocprofiler-systems/build/ubuntu/22.04/lib/librocprof-sys-dl.so.1.0.0 ROCPROFSYS: ROCPROFSYS_PROFILE=true - ROCPROFSYS: ROCPROFSYS_TRACE_CACHED=true + ROCPROFSYS: ROCPROFSYS_TRACE=true ROCPROFSYS: ROCPROFSYS_VERBOSE=0 [rocprof-sys][dl][1827155] rocprofsys_main [rocprof-sys][1827155][rocprofsys_init_tooling] Instrumentation mode: Trace diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp index 68901a6503..ea629c3f84 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-avail/generate_config.cpp @@ -339,7 +339,7 @@ generate_config(std::string _config_file, const std::set& _config_f if(_lomni && !_romni) return true; if(_romni && !_lomni) return false; for(const auto* itr : - { "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE_CACHED", + { "ROCPROFSYS_CONFIG", "ROCPROFSYS_MODE", "ROCPROFSYS_TRACE", "ROCPROFSYS_TRACE_LEGACY", "ROCPROFSYS_PROFILE", "ROCPROFSYS_USE_SAMPLING", "ROCPROFSYS_USE_PROCESS_SAMPLING", "ROCPROFSYS_USE_ROCM", "ROCPROFSYS_USE_AMD_SMI", diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp index b68ec8ffef..16490dbbbd 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-causal/impl.cpp @@ -186,10 +186,9 @@ get_initial_environment() update_env(_env, "ROCPROFSYS_MODE", "causal"); update_env(_env, "ROCPROFSYS_USE_CAUSAL", true); update_env(_env, "ROCPROFSYS_USE_SAMPLING", false); - update_env(_env, "ROCPROFSYS_TRACE_CACHED", false); + update_env(_env, "ROCPROFSYS_TRACE", false); update_env(_env, "ROCPROFSYS_PROFILE", false); update_env(_env, "ROCPROFSYS_USE_PROCESS_SAMPLING", false); - update_env(_env, "ROCPROFSYS_TRACE_LEGACY", false); update_env(_env, "ROCPROFSYS_THREAD_POOL_SIZE", get_env("ROCPROFSYS_THREAD_POOL_SIZE", 0)); update_env(_env, "ROCPROFSYS_LAUNCHER", "rocprof-sys-causal"); diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp index 0614c38b60..124217b102 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp @@ -339,19 +339,17 @@ parse_args(int argc, char** argv, std::vector& _env) original_envs); }); parser - .add_argument( - { "-T", "--trace" }, - "Generate a detailed trace with deferred trace generation (perfetto output)") + .add_argument({ "-T", "--trace" }, "Generate a detailed trace (perfetto output)") .max_count(1) .action([&](parser_t& p) { - rocprofsys::common::update_env(_env, "ROCPROFSYS_TRACE_CACHED", - p.get("trace"), update_mode::REPLACE, - ":", updated_envs, original_envs); + rocprofsys::common::update_env(_env, "ROCPROFSYS_TRACE", p.get("trace"), + update_mode::REPLACE, ":", updated_envs, + original_envs); }); parser - .add_argument( - { "-L", "--trace-legacy" }, - "Generate a detailed trace with direct mode (perfetto output, legacy)") + .add_argument({ "-L", "--trace-legacy" }, + "Use legacy direct mode for tracing instead of deferred trace " + "generation (higher overhead)") .max_count(1) .action([&](parser_t& p) { rocprofsys::common::update_env( diff --git a/projects/rocprofiler-systems/source/lib/core/argparse.cpp b/projects/rocprofiler-systems/source/lib/core/argparse.cpp index 1e511c6604..443c217bc5 100644 --- a/projects/rocprofiler-systems/source/lib/core/argparse.cpp +++ b/projects/rocprofiler-systems/source/lib/core/argparse.cpp @@ -301,17 +301,17 @@ add_core_arguments(parser_t& _parser, parser_data& _data) if(_data.environ_filter("trace", _data)) { _parser - .add_argument({ "-T", "--trace" }, "Generate a detailed trace with deferred " - "trace generation (perfetto output)") + .add_argument({ "-T", "--trace" }, + "Generate a detailed trace (perfetto output)") .max_count(1) .action([&](parser_t& p) { - update_env(_data, "ROCPROFSYS_TRACE_CACHED", p.get("trace")); + update_env(_data, "ROCPROFSYS_TRACE", p.get("trace")); }); _parser - .add_argument( - { "-L", "--trace-legacy" }, - "Generate a detailed trace with direct mode (perfetto output, legacy)") + .add_argument({ "-L", "--trace-legacy" }, + "Use legacy direct mode for tracing instead of deferred trace " + "generation (higher overhead)") .max_count(1) .action([&](parser_t& p) { update_env(_data, "ROCPROFSYS_TRACE_LEGACY", p.get("trace-legacy")); diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index 26deb03581..ffbb4dd46e 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -299,32 +299,27 @@ configure_settings(bool _init) get_env("ROCPROFSYS_NUM_THREADS", 1), "threading", "performance", "sampling", "parallelism", "advanced"); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_CACHED", - "Enable perfetto backend with deferred trace generation " - "for minimal runtime overhead", - _default_perfetto_v, "backend", "perfetto_caching"); - - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY", - "Enable perfetto backend (legacy, direct mode)", false, + ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE", + "Enable perfetto backend for tracing", _default_perfetto_v, "backend", "perfetto"); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE", - "[DEPRECATED] Renamed to ROCPROFSYS_TRACE_LEGACY", false, - "backend", "perfetto", "deprecated"); + ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_TRACE_LEGACY", + "Use legacy direct mode for perfetto tracing instead of " + "deferred trace generation. When false (default), uses " + "cached mode with minimal runtime overhead.", + false, "backend", "perfetto"); ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_PERFETTO", - "[DEPRECATED] Renamed to ROCPROFSYS_TRACE_LEGACY", false, + "[DEPRECATED] Renamed to ROCPROFSYS_TRACE", false, "backend", "perfetto", "deprecated"); ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_PROFILE", "Enable timemory backend", - !(_config->get("ROCPROFSYS_TRACE_LEGACY") || - _config->get("ROCPROFSYS_TRACE_CACHED")), - "backend", "timemory"); + !_config->get("ROCPROFSYS_TRACE"), "backend", + "timemory"); - ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_TIMEMORY", - "[DEPRECATED] Renamed to ROCPROFSYS_PROFILE", - !_config->get("ROCPROFSYS_TRACE_LEGACY"), "backend", - "timemory", "deprecated"); + ROCPROFSYS_CONFIG_SETTING( + bool, "ROCPROFSYS_USE_TIMEMORY", "[DEPRECATED] Renamed to ROCPROFSYS_PROFILE", + !_config->get("ROCPROFSYS_TRACE"), "backend", "timemory", "deprecated"); ROCPROFSYS_CONFIG_SETTING(bool, "ROCPROFSYS_USE_CAUSAL", "Enable causal profiling analysis", false, "backend", @@ -1080,8 +1075,7 @@ configure_settings(bool _init) handle_deprecated_setting("ROCPROFSYS_USE_THREAD_SAMPLING", "ROCPROFSYS_USE_PROCESS_SAMPLING"); handle_deprecated_setting("ROCPROFSYS_OUTPUT_FILE", "ROCPROFSYS_PERFETTO_FILE"); - handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE_LEGACY"); - handle_deprecated_setting("ROCPROFSYS_TRACE", "ROCPROFSYS_TRACE_LEGACY"); + handle_deprecated_setting("ROCPROFSYS_USE_PERFETTO", "ROCPROFSYS_TRACE"); handle_deprecated_setting("ROCPROFSYS_USE_TIMEMORY", "ROCPROFSYS_PROFILE"); scope::get_fields()[scope::flat::value] = _config->get_flat_profile(); @@ -1148,8 +1142,7 @@ configure_mode_settings(const std::shared_ptr& _config) if(get_mode() == Mode::Coverage) { set_default_setting_value("ROCPROFSYS_USE_CODE_COVERAGE", true); - _set("ROCPROFSYS_TRACE_LEGACY", false); - _set("ROCPROFSYS_TRACE_CACHED", false); + _set("ROCPROFSYS_TRACE", false); _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_CAUSAL", false); _set("ROCPROFSYS_USE_AMD_SMI", false); @@ -1162,8 +1155,7 @@ configure_mode_settings(const std::shared_ptr& _config) else if(get_mode() == Mode::Causal) { _set("ROCPROFSYS_USE_CAUSAL", true); - _set("ROCPROFSYS_TRACE_LEGACY", false); - _set("ROCPROFSYS_TRACE_CACHED", false); + _set("ROCPROFSYS_TRACE", false); _set("ROCPROFSYS_PROFILE", false); _set("ROCPROFSYS_USE_SAMPLING", false); _set("ROCPROFSYS_USE_PROCESS_SAMPLING", false); @@ -1859,8 +1851,12 @@ get_verbose() bool& get_use_perfetto() { - static auto _v = get_config()->at("ROCPROFSYS_TRACE_LEGACY"); - return static_cast&>(*_v).get(); + static auto _trace_setting = get_config()->at("ROCPROFSYS_TRACE"); + static auto _legacy_setting = get_config()->at("ROCPROFSYS_TRACE_LEGACY"); + auto& _trace = static_cast&>(*_trace_setting).get(); + auto& _legacy = static_cast&>(*_legacy_setting).get(); + static bool _v = _trace && _legacy; + return _v; } bool& @@ -2564,8 +2560,12 @@ get_use_rocpd() bool& get_caching_perfetto() { - static auto _v = get_config()->at("ROCPROFSYS_TRACE_CACHED"); - return static_cast&>(*_v).get(); + static auto _trace_setting = get_config()->at("ROCPROFSYS_TRACE"); + static auto _legacy_setting = get_config()->at("ROCPROFSYS_TRACE_LEGACY"); + auto& _trace = static_cast&>(*_trace_setting).get(); + auto& _legacy = static_cast&>(*_legacy_setting).get(); + static bool _v = _trace && !_legacy; + return _v; } int diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-annotate-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-annotate-tests.cmake index a6ffdfb465..3c43960887 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-annotate-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-annotate-tests.cmake @@ -36,7 +36,6 @@ if( ) set(_annotate_environment "${_base_environment}" - "ROCPROFSYS_TRACE_CACHED=OFF" "ROCPROFSYS_TRACE_LEGACY=ON" "ROCPROFSYS_TIMEMORY_COMPONENTS=thread_cpu_clock papi_array" "ROCPROFSYS_PAPI_EVENTS=perf::PERF_COUNT_SW_CPU_CLOCK" @@ -84,7 +83,6 @@ if( else() set(_annotate_environment "${_base_environment}" - "ROCPROFSYS_TRACE_CACHED=OFF" "ROCPROFSYS_TRACE_LEGACY=ON" "ROCPROFSYS_TIMEMORY_COMPONENTS=thread_cpu_clock" "ROCPROFSYS_USE_SAMPLING=OFF" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-binary-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-binary-tests.cmake index 2d566e601b..786f527ebc 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-binary-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-binary-tests.cmake @@ -233,7 +233,7 @@ rocprofiler_systems_add_bin_test( TIMEOUT 45 PASS_REGEX "ENVIRONMENT VARIABLE,[ \n]+ROCPROFSYS_CI_SKIP_PUSH_POP_CHECK,[ \n]+ROCPROFSYS_THREAD_POOL_SIZE,[ \n]+ROCPROFSYS_USE_PID,[ \n]+" - FAIL_REGEX "ROCPROFSYS_TRACE_LEGACY|ROCPROFSYS_TRACE_CACHED|ROCPROFSYS_ABORT_FAIL_REGEX" + FAIL_REGEX "ROCPROFSYS_TRACE|ROCPROFSYS_ABORT_FAIL_REGEX" ) string( @@ -270,7 +270,7 @@ rocprofiler_systems_add_bin_test( txt json xml --force TIMEOUT 45 LABELS "rocprofiler-systems-avail" - ENVIRONMENT "ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=OFF;ROCPROFSYS_PROFILE=ON" + ENVIRONMENT "ROCPROFSYS_TRACE=OFF;ROCPROFSYS_PROFILE=ON" PASS_REGEX "Outputting JSON configuration file '${_AVAIL_CFG_PATH}tweak\\\.json'(.*)Outputting XML configuration file '${_AVAIL_CFG_PATH}tweak\\\.xml'(.*)Outputting text configuration file '${_AVAIL_CFG_PATH}tweak\\\.cfg'(.*)" ) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-gpu-connect-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-gpu-connect-tests.cmake index d9c1cc9ffe..cab8fcb8c6 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-gpu-connect-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-gpu-connect-tests.cmake @@ -28,7 +28,7 @@ # Use legacy trace mode for AMD SMI counters - cached mode doesn't support real-time counter tracking set(_gpu_connect_environment - "ROCPROFSYS_TRACE_CACHED=OFF" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_TRACE_LEGACY=ON" "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api" "ROCPROFSYS_AMD_SMI_METRICS=busy,temp,power,xgmi,pcie" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-instrument-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-instrument-tests.cmake index 1eb01a5a51..407eb1a401 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-instrument-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-instrument-tests.cmake @@ -74,5 +74,5 @@ rocprofiler_systems_add_test( REWRITE_ARGS -e -v 2 --min-instructions=8 RUN_ARGS 10 4 1000 ENVIRONMENT - "${_lock_environment};ROCPROFSYS_FLAT_PROFILE=ON;ROCPROFSYS_PROFILE=OFF;ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=ON;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF" + "${_lock_environment};ROCPROFSYS_FLAT_PROFILE=ON;ROCPROFSYS_PROFILE=OFF;ROCPROFSYS_TRACE=ON;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF" ) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-mpi-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-mpi-tests.cmake index 425e7c64f4..4a8592301d 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-mpi-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-mpi-tests.cmake @@ -73,7 +73,7 @@ rocprofiler_systems_add_test( --min-instructions 0 ENVIRONMENT - "${_base_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_CACHED=OFF;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON" + "${_base_environment};ROCPROFSYS_VERBOSE=1;ROCPROFSYS_TRACE_LEGACY=ON;ROCPROFSYS_PERFETTO_COMBINE_TRACES=ON" REWRITE_RUN_PASS_REGEX "Successfully executed: .+rocprof-sys-merge-output.sh.*" REWRITE_RUN_FAIL_REGEX @@ -126,8 +126,7 @@ rocprofiler_systems_add_test( ) set(_mpip_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=OFF" @@ -142,8 +141,7 @@ set(_mpip_environment ) set(_mpip_all2all_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=OFF" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-nic-perf.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-nic-perf.cmake index 268bb3fc70..81fd6f29d5 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-nic-perf.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-nic-perf.cmake @@ -28,7 +28,6 @@ message(STATUS "The list of all PAPI network events is ${_event_list}") # Use legacy trace mode for network stats - cached mode doesn't support real-time counter tracking set(_nic_perf_environment "${_base_environment}" - "ROCPROFSYS_TRACE_CACHED=OFF" "ROCPROFSYS_TRACE_LEGACY=ON" "ROCPROFSYS_OUTPUT_PATH=${PROJECT_BINARY_DIR}/rocprof-sys-tests-output/nic-performance" "ROCPROFSYS_USE_PID=OFF" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake index 698281c9dc..89eb86df9a 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake @@ -22,8 +22,7 @@ if(NOT EXISTS "${ROCM_LLVM_LIB_PATH}/libomptarget.so" AND ROCPROFSYS_USE_ROCM) endif() set(_ompt_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_TIME_OUTPUT=OFF" "ROCPROFSYS_USE_OMPT=ON" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-pthread-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-pthread-tests.cmake index 256f3b8386..3730659855 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-pthread-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-pthread-tests.cmake @@ -34,7 +34,7 @@ rocprofiler_systems_add_test( RUNTIME_ARGS -e -i 256 RUN_ARGS 30 4 1000 ENVIRONMENT - "${_lock_environment};ROCPROFSYS_PROFILE=ON;ROCPROFSYS_TRACE_LEGACY=OFF;ROCPROFSYS_TRACE_CACHED=ON;ROCPROFSYS_COLLAPSE_THREADS=OFF;ROCPROFSYS_SAMPLING_REALTIME=ON;ROCPROFSYS_SAMPLING_REALTIME_FREQ=10;ROCPROFSYS_SAMPLING_REALTIME_TIDS=0;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF" + "${_lock_environment};ROCPROFSYS_PROFILE=ON;ROCPROFSYS_TRACE=ON;ROCPROFSYS_COLLAPSE_THREADS=OFF;ROCPROFSYS_SAMPLING_REALTIME=ON;ROCPROFSYS_SAMPLING_REALTIME_FREQ=10;ROCPROFSYS_SAMPLING_REALTIME_TIDS=0;ROCPROFSYS_SAMPLING_KEEP_INTERNAL=OFF" REWRITE_RUN_PASS_REGEX "wall_clock .*\\|_pthread_create .* 4 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000 .*\\|_pthread_mutex_lock .* 1000 .*\\|_pthread_mutex_unlock .* 1000" RUNTIME_PASS_REGEX diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake index 986826cc87..ed311496d1 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake @@ -41,7 +41,6 @@ endif() set(_roctx_environment "${_base_environment}" "ROCPROFSYS_TRACE_LEGACY=ON" - "ROCPROFSYS_TRACE_CACHED=OFF" "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,marker_api,kernel_dispatch" ) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake index b1b585ad90..7421ee3132 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake @@ -103,8 +103,7 @@ endif() set(_test_openmp_env "OMP_PROC_BIND=spread" "OMP_PLACES=threads" "OMP_NUM_THREADS=2") set(_base_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=ON" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -115,8 +114,7 @@ set(_base_environment ) set(_flat_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_TIME_OUTPUT=OFF" "ROCPROFSYS_COUT_OUTPUT=ON" @@ -146,8 +144,7 @@ set(_lock_environment ) set(_perfetto_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=OFF" "ROCPROFSYS_USE_SAMPLING=ON" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -159,8 +156,7 @@ set(_perfetto_environment ) set(_timemory_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=OFF" + "ROCPROFSYS_TRACE=OFF" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=ON" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -181,8 +177,7 @@ set(_causal_environment ) set(_python_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -195,8 +190,7 @@ set(_python_environment ) set(_attach_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -210,8 +204,7 @@ set(_attach_environment ) set(_rccl_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=ON" @@ -224,8 +217,7 @@ set(_rccl_environment ) set(_window_environment - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=OFF" "ROCPROFSYS_USE_PROCESS_SAMPLING=OFF" @@ -1478,8 +1470,7 @@ function(ROCPROFILER_SYSTEMS_ADD_BIN_TEST) if(NOT TEST_ENVIRONMENT) set(TEST_ENVIRONMENT - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" + "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_USE_SAMPLING=ON" "ROCPROFSYS_TIME_OUTPUT=OFF" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-thread-limit-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-thread-limit-tests.cmake index e515374178..b81f9f86f6 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-thread-limit-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-thread-limit-tests.cmake @@ -32,8 +32,6 @@ endif() set(_thread_limit_environment "${_base_environment}" - "ROCPROFSYS_TRACE_LEGACY=OFF" - "ROCPROFSYS_TRACE_CACHED=ON" "ROCPROFSYS_PROFILE=ON" "ROCPROFSYS_COUT_OUTPUT=ON" "ROCPROFSYS_USE_SAMPLING=ON"