From db33cda0c1aab631cd72e5b069077089dc0d8aaf Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Thu, 15 Feb 2024 12:41:50 -0600 Subject: [PATCH] Unify the amdsmi_get_pcie_info python interface Make the python interface consistent with the C interface. Change-Id: Idda08f888947c757e475d5a024b0ec3d8e1d846a --- amdsmi_cli/amdsmi_commands.py | 15 +++++++++------ example/amd_smi_drm_example.cc | 4 ++-- include/amd_smi/amdsmi.h | 4 ++-- py-interface/README.md | 4 ++-- py-interface/amdsmi_interface.py | 21 ++++++++++++++++----- py-interface/amdsmi_wrapper.py | 6 +++--- src/amd_smi/amd_smi.cc | 4 ++-- 7 files changed, 36 insertions(+), 22 deletions(-) diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index bb717e498d..6b62982f7c 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -353,7 +353,10 @@ class AMDSMICommands(): try: link_caps = amdsmi_interface.amdsmi_get_pcie_info(args.gpu) - bus_info.update(link_caps) + bus_info['max_pcie_speed'] = link_caps['pcie_static']['max_pcie_speed'] + bus_info['pcie_slot_type'] = link_caps['pcie_static']['slot_type'] + bus_info['pcie_interface_version'] = link_caps['pcie_static']['pcie_interface_version'] + if bus_info['max_pcie_speed'] % 1000 != 0: pcie_speed_GTs_value = round(bus_info['max_pcie_speed'] / 1000, 1) else: @@ -1396,17 +1399,17 @@ class AMDSMICommands(): try: pcie_link_status = amdsmi_interface.amdsmi_get_pcie_info(args.gpu) - if pcie_link_status['pcie_speed'] % 1000 != 0: - pcie_speed_GTs_value = round(pcie_link_status['pcie_speed'] / 1000, 1) + if pcie_link_status['pcie_metric']['pcie_speed'] % 1000 != 0: + pcie_speed_GTs_value = round(pcie_link_status['pcie_metric']['pcie_speed'] / 1000, 1) else: - pcie_speed_GTs_value = round(pcie_link_status['pcie_speed'] / 1000) + pcie_speed_GTs_value = round(pcie_link_status['pcie_metric']['pcie_speed'] / 1000) pcie_dict['current_speed'] = pcie_speed_GTs_value - pcie_dict['current_lanes'] = pcie_link_status['pcie_lanes'] + pcie_dict['current_lanes'] = pcie_link_status['pcie_metric']['pcie_width'] if self.logger.is_human_readable_format(): unit = 'GT/s' - pcie_dict['current_lanes'] = f"{pcie_link_status['pcie_lanes']} lanes" + pcie_dict['current_lanes'] = f"{pcie_link_status['pcie_metric']['pcie_width']} lanes" pcie_dict['current_speed'] = f"{pcie_dict['current_speed']} GT/s" except amdsmi_exception.AmdSmiLibraryException as e: diff --git a/example/amd_smi_drm_example.cc b/example/amd_smi_drm_example.cc index e18fc1cfce..ea28b8ebba 100644 --- a/example/amd_smi_drm_example.cc +++ b/example/amd_smi_drm_example.cc @@ -403,12 +403,12 @@ int main() { ret = amdsmi_get_pcie_info(processor_handles[j], &pcie_info); CHK_AMDSMI_RET(ret) printf(" Output of amdsmi_get_pcie_info:\n"); - printf("\tCurrent PCIe lanes: %d\n", pcie_info.pcie_metric.pcie_lanes); + printf("\tCurrent PCIe lanes: %d\n", pcie_info.pcie_metric.pcie_width); printf("\tCurrent PCIe speed: %d\n", pcie_info.pcie_metric.pcie_speed); printf("\tCurrent PCIe Interface Version: %d\n", pcie_info.pcie_static.pcie_interface_version); printf("\tPCIe slot type: %d\n", pcie_info.pcie_static.slot_type); - printf("\tPCIe max lanes: %d\n", pcie_info.pcie_static.max_pcie_lanes); + printf("\tPCIe max lanes: %d\n", pcie_info.pcie_static.max_pcie_width); printf("\tPCIe max speed: %d\n", pcie_info.pcie_static.max_pcie_speed); // Get VRAM temperature limit diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 1b599d5632..7a4b209124 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -500,15 +500,15 @@ typedef enum { typedef struct { struct pcie_static_ { - uint16_t max_pcie_lanes; //!< maximum number of PCIe lanes + uint16_t max_pcie_width; //!< maximum number of PCIe lanes uint32_t max_pcie_speed; //!< maximum PCIe speed uint32_t pcie_interface_version; //!< PCIe interface version amdsmi_card_form_factor_t slot_type; //!< card form factor uint64_t reserved[10]; } pcie_static; struct pcie_metric_ { + uint16_t pcie_width; //!< current PCIe width uint32_t pcie_speed; //!< current PCIe speed in MT/s - uint16_t pcie_lanes; //!< current PCIe width uint32_t pcie_bandwidth; //!< current PCIe bandwidth Mb/s uint64_t pcie_replay_count; //!< total number of the replays issued on the PCIe link uint64_t pcie_l0_to_recovery_count; //!< total number of times the PCIe link transitioned from L0 to the recovery state diff --git a/py-interface/README.md b/py-interface/README.md index a1747f31a6..d812dc7375 100644 --- a/py-interface/README.md +++ b/py-interface/README.md @@ -791,7 +791,7 @@ Output: Dictionary with fields Field | Description ---|--- -`pcie_lanes`| pcie lanes in use +`pcie_width`| pcie lanes in use `pcie_speed`| current pcie speed `pcie_interface_version`| current pcie generation @@ -811,7 +811,7 @@ try: else: for device in devices: pcie_link_status = amdsmi_get_pcie_info(device) - print(pcie_link_status["pcie_lanes"]) + print(pcie_link_status["pcie_width"]) print(pcie_link_status["pcie_speed"]) print(pcie_link_status["pcie_interface_version"]) except AmdSmiException as e: diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 6b22359404..a7ebc41f08 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -2134,13 +2134,24 @@ def amdsmi_get_pcie_info( ) ) - return {"pcie_speed": pcie_info.pcie_metric.pcie_speed, - "pcie_lanes": pcie_info.pcie_metric.pcie_lanes, - "pcie_interface_version": pcie_info.pcie_static.pcie_interface_version, + return { + "pcie_static": { + "max_pcie_width": pcie_info.pcie_static.max_pcie_width, "max_pcie_speed": pcie_info.pcie_static.max_pcie_speed, - "max_pcie_lanes": pcie_info.pcie_static.max_pcie_lanes, "pcie_interface_version": pcie_info.pcie_static.pcie_interface_version, - "pcie_slot_type": pcie_info.pcie_static.slot_type} + "slot_type": pcie_info.pcie_static.slot_type, + }, + "pcie_metric": { + "pcie_width": pcie_info.pcie_metric.pcie_width, + "pcie_speed": pcie_info.pcie_metric.pcie_speed, + "pcie_bandwidth": pcie_info.pcie_metric.pcie_bandwidth, + "pcie_replay_count": pcie_info.pcie_metric.pcie_replay_count, + "pcie_l0_to_recovery_count": pcie_info.pcie_metric.pcie_l0_to_recovery_count, + "pcie_replay_roll_over_count": pcie_info.pcie_metric.pcie_replay_roll_over_count, + "pcie_nak_sent_count": pcie_info.pcie_metric.pcie_nak_sent_count, + "pcie_nak_received_count": pcie_info.pcie_metric.pcie_nak_received_count, + } + } def amdsmi_get_processor_handle_from_bdf(bdf): diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 7656b9fa8b..eafde217c7 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -751,9 +751,9 @@ class struct_pcie_metric_(Structure): struct_pcie_metric_._pack_ = 1 # source:False struct_pcie_metric_._fields_ = [ - ('pcie_speed', ctypes.c_uint32), - ('pcie_lanes', ctypes.c_uint16), + ('pcie_width', ctypes.c_uint16), ('PADDING_0', ctypes.c_ubyte * 2), + ('pcie_speed', ctypes.c_uint32), ('pcie_bandwidth', ctypes.c_uint32), ('PADDING_1', ctypes.c_ubyte * 4), ('pcie_replay_count', ctypes.c_uint64), @@ -769,7 +769,7 @@ class struct_pcie_static_(Structure): struct_pcie_static_._pack_ = 1 # source:False struct_pcie_static_._fields_ = [ - ('max_pcie_lanes', ctypes.c_uint16), + ('max_pcie_width', ctypes.c_uint16), ('PADDING_0', ctypes.c_ubyte * 2), ('max_pcie_speed', ctypes.c_uint32), ('pcie_interface_version', ctypes.c_uint32), diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 2f68823540..e576f67226 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1964,7 +1964,7 @@ amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, a printf("Failed to open file: %s \n", path_max_link_width.c_str()); return AMDSMI_STATUS_API_FAILED; } - info->pcie_static.max_pcie_lanes = (uint16_t)pcie_width; + info->pcie_static.max_pcie_width = (uint16_t)pcie_width; std::string path_max_link_speed = "/sys/class/drm/" + gpu_device->get_gpu_path() + "/device/max_link_speed"; @@ -2028,7 +2028,7 @@ amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, a if (status != AMDSMI_STATUS_SUCCESS) return status; - info->pcie_metric.pcie_lanes = metric_info.pcie_link_width; + info->pcie_metric.pcie_width = metric_info.pcie_link_width; // gpu metrics is inconsistent with pcie_speed values, if 0-6 then it needs to be translated if (metric_info.pcie_link_speed <= 6) { status = smi_amdgpu_get_pcie_speed_from_pcie_type(metric_info.pcie_link_speed, &info->pcie_metric.pcie_speed); // mapping to MT/s