From 3318c540eaff87171c0ae207a899cadc76b251ec Mon Sep 17 00:00:00 2001 From: habajpai-amd Date: Fri, 23 Jan 2026 10:17:43 +0530 Subject: [PATCH] fix roctx range markers not paired correctly in rocpd output (#2793) ## Motivation Fix roctx range markers (Push/Pop, Start/Stop) not being displayed correctly in rocpd output. The Visualizer was showing only Stop/Pop events as instant markers instead of proper duration ranges with labels, while Perfetto output displayed them correctly. ## Technical Details In `tool_tracing_callback_stop()`, the rocpd/database output was using `user_data->value` (timestamp of the Pop/Stop event) instead of `begin_ts` (corrected timestamp from the corresponding Push/Start event) when calling `cache_region()`. The Perfetto output already used `begin_ts` correctly (line 818). This change aligns the rocpd output with the Perfetto behavior by using `begin_ts` instead of `user_data->value` (line 887). Updated rocpd validation rules --- projects/rocprofiler-systems/CHANGELOG.md | 1 + .../lib/rocprof-sys/library/rocprofiler-sdk.cpp | 2 +- .../roctx/amd-smi-rules.json | 10 +++++----- .../roctx/sdk-metrics-rules.json | 14 ++++++++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/projects/rocprofiler-systems/CHANGELOG.md b/projects/rocprofiler-systems/CHANGELOG.md index aeb6dffc4c..a2cfaeb620 100644 --- a/projects/rocprofiler-systems/CHANGELOG.md +++ b/projects/rocprofiler-systems/CHANGELOG.md @@ -30,6 +30,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs. ### Resolved issues - Fixed application termination with segfault when thread creation surpasses ROCPROFSYS_MAX_THREADS configuration. +- Fixed how `roctxRange` markers are handled in the `rocpd` output. The "push" and "pop" markers are now shown as a single event. ### Removed diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index 0a337b2400..bdf94326fd 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -884,7 +884,7 @@ tool_tracing_callback_stop( record, iterate_args_callback, 2, &args); auto call_stack = get_backtrace(_bt_data); - uint64_t _beg_ts = user_data->value; + uint64_t _beg_ts = begin_ts; uint64_t _end_ts = ts; { diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json index 128f6a25dd..1562bc0b75 100644 --- a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json @@ -24,7 +24,7 @@ }, { "_comment": "The actual number of samples will vary depending on the GPU. This validates presence of samples, not the actual number of samples.", - "min_rows": 100, + "min_rows": 20, "name_prefix": "rocpd_pmc_event_", "required_columns": [ "event_id", @@ -36,28 +36,28 @@ "comparison": "greater_than", "description": "Check for amd-smi monitoring busy times", "error_message": "Less than expected number of captured amd-smi-busy samples!", - "expected_result": 10, + "expected_result": 5, "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'" }, { "comparison": "greater_than", "description": "Check for amd-smi monitoring GPU temperature", "error_message": "Less than expected number of captured amd-smi-temperature samples!", - "expected_result": 10, + "expected_result": 5, "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_temp'" }, { "comparison": "greater_than", "description": "Check for amd-smi monitoring GPU power consumption", "error_message": "Less than expected number of captured amd-smi-power samples!", - "expected_result": 10, + "expected_result": 5, "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_power'" }, { "comparison": "greater_than", "description": "Check for amd-smi monitoring GPU memory usage", "error_message": "Less than expected number of captured amd-smi-memory-usage samples!", - "expected_result": 10, + "expected_result": 5, "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'" } ] diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json index 7c8eee6b27..164940f9ed 100644 --- a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json @@ -78,6 +78,20 @@ "error_message": "Expected 2 roctxRangeStart marker entries in `regions` table", "expected_result": 2, "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangeStart_%';" + }, + { + "comparison": "equals", + "description": "Verify that roctxRangePop markers do not appear in table 'regions'", + "error_message": "Found unexpected roctxRangePop marker entries in `regions` table", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangePop%';" + }, + { + "comparison": "equals", + "description": "Verify that roctxRangeStop markers do not appear in table 'regions'", + "error_message": "Found unexpected roctxRangeStop marker entries in `regions` table", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangeStop%';" } ] },