fix roctx range markers not paired correctly in rocpd output (#2793)
## Motivation Fix roctx range markers (Push/Pop, Start/Stop) not being displayed correctly in rocpd output. The Visualizer was showing only Stop/Pop events as instant markers instead of proper duration ranges with labels, while Perfetto output displayed them correctly. ## Technical Details In `tool_tracing_callback_stop()`, the rocpd/database output was using `user_data->value` (timestamp of the Pop/Stop event) instead of `begin_ts` (corrected timestamp from the corresponding Push/Start event) when calling `cache_region()`. The Perfetto output already used `begin_ts` correctly (line 818). This change aligns the rocpd output with the Perfetto behavior by using `begin_ts` instead of `user_data->value` (line 887). Updated rocpd validation rules
This commit is contained in:
@@ -30,6 +30,7 @@ Full documentation for ROCm Systems Profiler is available at [https://rocm.docs.
|
||||
### Resolved issues
|
||||
|
||||
- Fixed application termination with segfault when thread creation surpasses ROCPROFSYS_MAX_THREADS configuration.
|
||||
- Fixed how `roctxRange` markers are handled in the `rocpd` output. The "push" and "pop" markers are now shown as a single event.
|
||||
|
||||
### Removed
|
||||
|
||||
|
||||
@@ -884,7 +884,7 @@ tool_tracing_callback_stop(
|
||||
record, iterate_args_callback, 2, &args);
|
||||
|
||||
auto call_stack = get_backtrace(_bt_data);
|
||||
uint64_t _beg_ts = user_data->value;
|
||||
uint64_t _beg_ts = begin_ts;
|
||||
uint64_t _end_ts = ts;
|
||||
|
||||
{
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
},
|
||||
{
|
||||
"_comment": "The actual number of samples will vary depending on the GPU. This validates presence of samples, not the actual number of samples.",
|
||||
"min_rows": 100,
|
||||
"min_rows": 20,
|
||||
"name_prefix": "rocpd_pmc_event_",
|
||||
"required_columns": [
|
||||
"event_id",
|
||||
@@ -36,28 +36,28 @@
|
||||
"comparison": "greater_than",
|
||||
"description": "Check for amd-smi monitoring busy times",
|
||||
"error_message": "Less than expected number of captured amd-smi-busy samples!",
|
||||
"expected_result": 10,
|
||||
"expected_result": 5,
|
||||
"query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'"
|
||||
},
|
||||
{
|
||||
"comparison": "greater_than",
|
||||
"description": "Check for amd-smi monitoring GPU temperature",
|
||||
"error_message": "Less than expected number of captured amd-smi-temperature samples!",
|
||||
"expected_result": 10,
|
||||
"expected_result": 5,
|
||||
"query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_temp'"
|
||||
},
|
||||
{
|
||||
"comparison": "greater_than",
|
||||
"description": "Check for amd-smi monitoring GPU power consumption",
|
||||
"error_message": "Less than expected number of captured amd-smi-power samples!",
|
||||
"expected_result": 10,
|
||||
"expected_result": 5,
|
||||
"query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_power'"
|
||||
},
|
||||
{
|
||||
"comparison": "greater_than",
|
||||
"description": "Check for amd-smi monitoring GPU memory usage",
|
||||
"error_message": "Less than expected number of captured amd-smi-memory-usage samples!",
|
||||
"expected_result": 10,
|
||||
"expected_result": 5,
|
||||
"query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'"
|
||||
}
|
||||
]
|
||||
|
||||
+14
@@ -78,6 +78,20 @@
|
||||
"error_message": "Expected 2 roctxRangeStart marker entries in `regions` table",
|
||||
"expected_result": 2,
|
||||
"query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangeStart_%';"
|
||||
},
|
||||
{
|
||||
"comparison": "equals",
|
||||
"description": "Verify that roctxRangePop markers do not appear in table 'regions'",
|
||||
"error_message": "Found unexpected roctxRangePop marker entries in `regions` table",
|
||||
"expected_result": 0,
|
||||
"query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangePop%';"
|
||||
},
|
||||
{
|
||||
"comparison": "equals",
|
||||
"description": "Verify that roctxRangeStop markers do not appear in table 'regions'",
|
||||
"error_message": "Found unexpected roctxRangeStop marker entries in `regions` table",
|
||||
"expected_result": 0,
|
||||
"query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name LIKE 'roctxRangeStop%';"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user