SWDEV-500520: Updated documentation for hang issue (#79)

* SWDEV-500520: Updated documentation for hang issue

* Avoid fatal error when invalid metric is found

* removing invalid metrics

* clang formatting

[ROCm/rocprofiler-sdk commit: dfee6489b1]
This commit is contained in:
Jakaraddi, Manjunath
2025-01-16 02:14:22 -08:00
committato da GitHub
parent ee959cc591
commit 324c57ede1
2 ha cambiato i file con 12 aggiunte e 13 eliminazioni
@@ -789,22 +789,23 @@ Properties
$ cat input.json
{
"jobs": [
{
"jobs": [
{
"pmc": ["SQ_WAVES", "GRBM_COUNT", "GRBM_GUI_ACTIVE"]
},
{
},
{
"pmc": ["FETCH_SIZE", "WRITE_SIZE"],
"kernel_include_regex": ".*_kernel",
"kernel_exclude_regex": "multiply",
"kernel_iteration_range": "[1-2]","[3-4]"
"kernel_iteration_range": "[1-2],[3-4]",
"output_file": "out",
"output_format": [
"csv",
"json"
"csv",
"json"
],
"truncate_kernels": true
]
}
]
}
.. code-block:: shell
@@ -816,8 +817,6 @@ Properties
- SQ_WAVES
- GRBM_COUNT
- GRBM_GUI_ACTIVE
- 'TCC_HIT[1]'
- 'TCC_HIT[2]'
- pmc:
- FETCH_SIZE
- WRITE_SIZE
@@ -838,9 +838,9 @@ get_device_counting_service(rocprofiler_agent_id_t agent_id)
", "));
auto found_counters =
fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", "));
LOG(FATAL) << "Unable to find all counters for agent " << agent_v->node_id
<< " (gpu-" << agent_v->gpu_index << ", " << agent_v->name << ") in ["
<< requested_counters << "]. Found: [" << found_counters << "]";
ROCP_WARNING << "Unable to find all counters for agent " << agent_v->node_id
<< " (gpu-" << agent_v->gpu_index << ", " << agent_v->name << ") in ["
<< requested_counters << "]. Found: [" << found_counters << "]";
}
if(!counters_v.empty())