From e8a812211122a25bf57a3d45cf706320ccefffdd Mon Sep 17 00:00:00 2001 From: "Arif, Maisam" Date: Thu, 6 Feb 2025 19:33:49 -0600 Subject: [PATCH] [SWDEV-494072] Added Fallback to metric command to for pcie replay_counter (#99) Change-Id: I5392e8f881b1e69d9a76b01813a66b08fb70e006 Signed-off-by: Maisam Arif [ROCm/amdsmi commit: 548ed781c7410b85b3b5604b04cfcdd7462157d3] --- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 0c4a33aa7f..622d1a5254 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1542,7 +1542,15 @@ class AMDSMICommands(): pcie_dict['speed'] = pcie_speed_GTs_value pcie_dict['bandwidth'] = pcie_metric['pcie_bandwidth'] + pcie_dict['replay_count'] = pcie_metric['pcie_replay_count'] + if pcie_dict['replay_count'] == "N/A": + try: + pcie_replay = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu) + pcie_dict['replay_count'] = pcie_replay + except amdsmi_exception.AmdSmiLibraryException as e: + logging.debug("Failed to get sysfs pcie replay counter on gpu %s | %s", gpu_id, e.get_error_info()) + pcie_dict['l0_to_recovery_count'] = pcie_metric['pcie_l0_to_recovery_count'] pcie_dict['replay_roll_over_count'] = pcie_metric['pcie_replay_roll_over_count'] pcie_dict['nak_received_count'] = pcie_metric['pcie_nak_received_count']