[SWDEV-474474] - Changed Monitor PCIE_REPLAY count to use gpu_metrics

Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: I4351a23e8412875bb4b23b30747ac6d0bf3d3c56


[ROCm/amdsmi commit: 5b7be3bf99]
这个提交包含在:
Maisam Arif
2024-07-17 17:10:37 -05:00
父节点 a7ea536524
当前提交 ad3ba88ba5
+11 -3
查看文件
@@ -4300,11 +4300,19 @@ class AMDSMICommands():
self.logger.table_header += 'DOUBLE_ECC'.rjust(12)
try:
pcie_replay = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu)
monitor_values['pcie_replay'] = pcie_replay
pcie_metric = amdsmi_interface.amdsmi_get_pcie_info(args.gpu)['pcie_metric']
logging.debug("PCIE Metric for %s | %s", gpu_id, pcie_metric)
monitor_values['pcie_replay'] = pcie_metric['pcie_replay_count']
except amdsmi_exception.AmdSmiLibraryException as e:
monitor_values['pcie_replay'] = "N/A"
logging.debug("Failed to get pcie replay counter on gpu %s | %s", gpu_id, e.get_error_info())
logging.debug("Failed to get gpu_metrics pcie replay counter on gpu %s | %s", gpu_id, e.get_error_info())
if monitor_values['pcie_replay'] == "N/A":
try:
pcie_replay = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu)
monitor_values['pcie_replay'] = pcie_replay
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get sysfs pcie replay counter on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.table_header += 'PCIE_REPLAY'.rjust(13)
if args.vram_usage: