diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 27152afcb5..35af8395a4 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -361,11 +361,11 @@ class AMDSMICommands(): logging.debug("Failed to get bdf for gpu %s | %s", gpu_id, e.get_error_info()) try: - link_caps = amdsmi_interface.amdsmi_get_pcie_info(args.gpu) - bus_info['max_pcie_width'] = link_caps['pcie_static']['max_pcie_width'] - bus_info['max_pcie_speed'] = link_caps['pcie_static']['max_pcie_speed'] - bus_info['pcie_interface_version'] = link_caps['pcie_static']['pcie_interface_version'] - + pcie_static = amdsmi_interface.amdsmi_get_pcie_info(args.gpu)['pcie_static'] + bus_info['max_pcie_width'] = pcie_static['max_pcie_width'] + bus_info['max_pcie_speed'] = pcie_static['max_pcie_speed'] + bus_info['pcie_interface_version'] = pcie_static['pcie_interface_version'] + bus_info['slot_type'] = pcie_static['slot_type'] if bus_info['max_pcie_speed'] % 1000 != 0: pcie_speed_GTs_value = round(bus_info['max_pcie_speed'] / 1000, 1) else: @@ -373,14 +373,6 @@ class AMDSMICommands(): bus_info['max_pcie_speed'] = pcie_speed_GTs_value - slot_type = link_caps['pcie_static']['slot_type'] - if isinstance(slot_type, int): - slot_types = amdsmi_interface.amdsmi_wrapper.amdsmi_card_form_factor_t__enumvalues - if slot_type in slot_types: - bus_info['slot_type'] = slot_types[slot_type].replace("AMDSMI_CARD_FORM_FACTOR_", "") - else: - bus_info['slot_type'] = "Unknown" - if bus_info['pcie_interface_version'] > 0: bus_info['pcie_interface_version'] = f"Gen {bus_info['pcie_interface_version']}" @@ -636,7 +628,7 @@ class AMDSMICommands(): except amdsmi_exception.AmdSmiLibraryException as e: policy_info = "N/A" logging.debug("Failed to get policy info for gpu %s | %s", gpu_id, e.get_error_info()) - + static_dict['dpm_policy'] = policy_info if 'numa' in current_platform_args: if args.numa: @@ -1460,6 +1452,7 @@ class AMDSMICommands(): if args.pcie: pcie_dict = {"width": "N/A", "speed": "N/A", + "bandwidth": "N/A", "replay_count" : "N/A", "l0_to_recovery_count" : "N/A", "replay_roll_over_count" : "N/A", @@ -1470,65 +1463,43 @@ class AMDSMICommands(): "max_packet_size": "N/A"} try: - pcie_link_status = amdsmi_interface.amdsmi_get_pcie_info(args.gpu) + pcie_metric = amdsmi_interface.amdsmi_get_pcie_info(args.gpu)['pcie_metric'] + logging.debug("PCIE Metric for %s | %s", gpu_id, pcie_metric) - if pcie_link_status['pcie_metric']['pcie_speed'] % 1000 != 0: - pcie_speed_GTs_value = round(pcie_link_status['pcie_metric']['pcie_speed'] / 1000, 1) - else: - pcie_speed_GTs_value = round(pcie_link_status['pcie_metric']['pcie_speed'] / 1000) + pcie_dict['width'] = pcie_metric['pcie_width'] - pcie_dict['width'] = pcie_link_status['pcie_metric']['pcie_width'] - pcie_dict['speed'] = pcie_speed_GTs_value + if pcie_metric['pcie_speed'] != "N/A": + if pcie_metric['pcie_speed'] % 1000 != 0: + pcie_speed_GTs_value = round(pcie_metric['pcie_speed'] / 1000, 1) + else: + pcie_speed_GTs_value = round(pcie_metric['pcie_speed'] / 1000) + pcie_dict['speed'] = pcie_speed_GTs_value + + pcie_dict['bandwidth'] = pcie_metric['pcie_bandwidth'] + pcie_dict['replay_count'] = pcie_metric['pcie_replay_count'] + pcie_dict['l0_to_recovery_count'] = pcie_metric['pcie_l0_to_recovery_count'] + pcie_dict['replay_roll_over_count'] = pcie_metric['pcie_replay_roll_over_count'] + pcie_dict['nak_received_count'] = pcie_metric['pcie_nak_received_count'] + pcie_dict['nak_sent_count'] = pcie_metric['pcie_nak_sent_count'] pcie_speed_unit = 'GT/s' + pcie_bw_unit = 'Mb/s' if self.logger.is_human_readable_format(): - pcie_dict['speed'] = f"{pcie_dict['speed']} {pcie_speed_unit}" + if pcie_dict['speed'] != "N/A": + pcie_dict['speed'] = f"{pcie_dict['speed']} {pcie_speed_unit}" + if pcie_dict['bandwidth'] != "N/A": + pcie_dict['bandwidth'] = f"{pcie_dict['bandwidth']} {pcie_bw_unit}" if self.logger.is_json_format(): - pcie_dict['speed'] = {"value" : pcie_dict['speed'], - "unit" : pcie_speed_unit} + if pcie_dict['speed'] != "N/A": + pcie_dict['speed'] = {"value" : pcie_dict['speed'], + "unit" : pcie_speed_unit} + if pcie_dict['bandwidth'] != "N/A": + pcie_dict['bandwidth'] = {"value" : pcie_dict['bandwidth'], + "unit" : pcie_bw_unit} except amdsmi_exception.AmdSmiLibraryException as e: logging.debug("Failed to get pcie link status for gpu %s | %s", gpu_id, e.get_error_info()) - try: - pci_replay_counter = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['pcie_replay_count_acc'] - if pci_replay_counter == "N/A": - # raising exception here to fall back to sysfs - raise amdsmi_exception.AmdSmiLibraryException(amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED) - pcie_dict['replay_count'] = pci_replay_counter - except amdsmi_exception.AmdSmiLibraryException as e: - logging.debug("Failed to get pci replay counter for gpu %s | %s", gpu_id, e.get_error_info()) - logging.debug("Falling back to sysfs pci replay counter for gpu %s | %s", gpu_id, e.get_error_info()) - try: - pci_replay_counter = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu) - pcie_dict['replay_count'] = pci_replay_counter - except amdsmi_exception.AmdSmiLibraryException as err: - pcie_dict['replay_count'] = "N/A" - logging.debug("Failed to get sysfs fallback pci replay counter for gpu %s | %s", gpu_id, err.get_error_info()) - - try: - l0_to_recovery_counter = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['pcie_l0_to_recov_count_acc'] - pcie_dict['l0_to_recovery_count'] = l0_to_recovery_counter - except amdsmi_exception.AmdSmiLibraryException as e: - pcie_dict['l0_to_recovery_count'] = "N/A" - logging.debug("Failed to get pcie l0 to recovery counter for gpu %s | %s", gpu_id, e.get_error_info()) - - try: - pci_replay_rollover_counter = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['pcie_replay_rover_count_acc'] - pcie_dict['replay_roll_over_count'] = pci_replay_rollover_counter - except amdsmi_exception.AmdSmiLibraryException as e: - pcie_dict['replay_roll_over_count'] = "N/A" - logging.debug("Failed to get pcie replay rollover counter for gpu %s | %s", gpu_id, e.get_error_info()) - - try: - gpu_metrics_info = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu) - pcie_dict['nak_sent_count'] = gpu_metrics_info['pcie_nak_sent_count_acc'] - pcie_dict['nak_received_count'] = gpu_metrics_info['pcie_nak_rcvd_count_acc'] - except amdsmi_exception.AmdSmiLibraryException as e: - pcie_dict['nak_sent_count'] = "N/A" - pcie_dict['nak_received_count'] = "N/A" - logging.debug("Failed to get pcie nak info for gpu %s | %s", gpu_id, e.get_error_info()) - try: pcie_bw = amdsmi_interface.amdsmi_get_gpu_pci_throughput(args.gpu) sent = pcie_bw['sent'] * pcie_bw['max_pkt_sz'] @@ -4134,14 +4105,14 @@ class AMDSMICommands(): } try: - pcie_info = amdsmi_interface.amdsmi_get_pcie_info(src_gpu)['pcie_static'] - if pcie_info['max_pcie_speed'] % 1000 != 0: - pcie_speed_GTs_value = round(pcie_info['max_pcie_speed'] / 1000, 1) + pcie_static = amdsmi_interface.amdsmi_get_pcie_info(src_gpu)['pcie_static'] + if pcie_static['max_pcie_speed'] % 1000 != 0: + pcie_speed_GTs_value = round(pcie_static['max_pcie_speed'] / 1000, 1) else: - pcie_speed_GTs_value = round(pcie_info['max_pcie_speed'] / 1000) + pcie_speed_GTs_value = round(pcie_static['max_pcie_speed'] / 1000) bitrate = pcie_speed_GTs_value - max_bandwidth = bitrate * pcie_info['max_pcie_width'] + max_bandwidth = bitrate * pcie_static['max_pcie_width'] except amdsmi_exception.AmdSmiLibraryException as e: logging.debug("Failed to get bitrate and bandwidth for GPU %s | %s", src_gpu_id, e.get_error_info()) diff --git a/example/amd_smi_drm_example.cc b/example/amd_smi_drm_example.cc index ea28b8ebba..cd9a3a1f36 100644 --- a/example/amd_smi_drm_example.cc +++ b/example/amd_smi_drm_example.cc @@ -411,6 +411,14 @@ int main() { printf("\tPCIe max lanes: %d\n", pcie_info.pcie_static.max_pcie_width); printf("\tPCIe max speed: %d\n", pcie_info.pcie_static.max_pcie_speed); + // additional pcie related metrics + printf("\tPCIe bandwidth: %d\n", pcie_info.pcie_metric.pcie_bandwidth); + printf("\tPCIe replay count: %d\n", pcie_info.pcie_metric.pcie_replay_count); + printf("\tPCIe L0 recovery count: %d\n", pcie_info.pcie_metric.pcie_l0_to_recovery_count); + printf("\tPCIe rollover count: %d\n", pcie_info.pcie_metric.pcie_replay_roll_over_count); + printf("\tPCIe nak received count: %d\n", pcie_info.pcie_metric.pcie_nak_received_count); + printf("\tPCIe nak sent count: %d\n", pcie_info.pcie_metric.pcie_nak_sent_count); + // Get VRAM temperature limit int64_t temperature = 0; ret = amdsmi_get_temp_metric( diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index ef58a6ce3d..861709b98d 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -509,7 +509,7 @@ typedef struct { struct pcie_metric_ { uint16_t pcie_width; //!< current PCIe width uint32_t pcie_speed; //!< current PCIe speed in MT/s - uint32_t pcie_bandwidth; //!< current PCIe bandwidth Mb/s + uint32_t pcie_bandwidth; //!< current instantaneous PCIe bandwidth in Mb/s uint64_t pcie_replay_count; //!< total number of the replays issued on the PCIe link uint64_t pcie_l0_to_recovery_count; //!< total number of times the PCIe link transitioned from L0 to the recovery state uint64_t pcie_replay_roll_over_count; //!< total number of replay rollovers issued on the PCIe link diff --git a/include/amd_smi/impl/amd_smi_utils.h b/include/amd_smi/impl/amd_smi_utils.h index 7d2df9b115..30897b340f 100644 --- a/include/amd_smi/impl/amd_smi_utils.h +++ b/include/amd_smi/impl/amd_smi_utils.h @@ -21,6 +21,9 @@ #ifndef AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_ #define AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_ +#include +#include + #include "amd_smi/amdsmi.h" #include "amd_smi/impl/amd_smi_gpu_device.h" #include "rocm_smi/rocm_smi_utils.h" @@ -45,4 +48,55 @@ amdsmi_status_t smi_amdgpu_get_pcie_speed_from_pcie_type(uint16_t pcie_type, uin amdsmi_status_t smi_amdgpu_get_market_name_from_dev_id(uint32_t device_id, char *market_name); amdsmi_status_t smi_amdgpu_is_gpu_power_management_enabled(amd::smi::AMDSmiGPUDevice* device, bool *enabled); + +template +constexpr bool is_dependent_false_v = false; + +template +inline constexpr bool is_supported_type_v = ( + std::is_same_v>, std::uint8_t> || + std::is_same_v>, std::uint16_t> || + std::is_same_v>, std::uint32_t> || + std::is_same_v>, std::uint64_t> +); + +template +constexpr T get_std_num_limit() +{ + if constexpr (is_supported_type_v) { + return std::numeric_limits::max(); + } + else { + return std::numeric_limits::min(); + static_assert(is_dependent_false_v, "Error: Type not supported..."); + } +} + +template +constexpr bool is_std_num_limit(T value) +{ + return (value == get_std_num_limit()); +} + +template +constexpr T translate_umax_or_assign_value(U source_value, V target_value) +{ + T result{}; + if constexpr (is_supported_type_v && is_supported_type_v) { + // If the source value is uint::max(), then return is uint::max() + if (is_std_num_limit(source_value)) { + result = get_std_num_limit(); + } else { + result = static_cast(target_value); + } + + return result; + } + else { + static_assert(is_dependent_false_v, "Error: Type not supported..."); + } + + return result; +} + #endif // diff --git a/py-interface/README.md b/py-interface/README.md index fbc5450c61..7d9fd5908e 100644 --- a/py-interface/README.md +++ b/py-interface/README.md @@ -580,7 +580,7 @@ Output: Dictionary with fields Field | Description ---|--- -`fw_list`| List of dictionaries that contain information about a certain firmware block +`fw_list` | List of dictionaries that contain information about a certain firmware block Exceptions that can be thrown by `amdsmi_get_fw_info` function: @@ -619,7 +619,7 @@ Output: Dictionary of activites to their respective usage percentage or 'N/A' if Field | Description ---|--- -`gfx_activity`| graphics engine usage percentage (0 - 100) +`gfx_activity` | graphics engine usage percentage (0 - 100) `umc_activity` | memory engine usage percentage (0 - 100) `mm_activity` | average multimedia engine usages in percentage (0 - 100) @@ -659,7 +659,7 @@ Output: Dictionary with fields Field | Description ---|--- -`average_socket_power`| average socket power +`average_socket_power` | average socket power `gfx_voltage` | voltage gfx `power_limit` | power limit @@ -699,7 +699,7 @@ Output: Dictionary with fields Field | Description ---|--- `vram_total` | VRAM total -`vram_used`| VRAM currently in use +`vram_used` | VRAM currently in use Exceptions that can be thrown by `amdsmi_get_gpu_vram_usage` function: @@ -751,7 +751,7 @@ Output: Dictionary with fields Field | Description ---|--- -`cur_clk`| Current clock for given clock type +`cur_clk` | Current clock for given clock type `max_clk` | Maximum clock for given clock type `min_clk` | Minimum clock for given clock type @@ -780,20 +780,19 @@ except AmdSmiException as e: ### amdsmi_get_pcie_info -Description: Returns the pcie link status for the given GPU. +Description: Returns the pcie metric and static information for the given GPU. It is not supported on virtual machine guest Input parameters: * `processor_handle` device which to query -Output: Dictionary with fields +Output: Dictionary with 2 fields `pcie_static` and `pcie_metric` -Field | Description +Fields | Description ---|--- -`pcie_width`| pcie lanes in use -`pcie_speed`| current pcie speed -`pcie_interface_version`| current pcie generation +`pcie_static` |
Subfield Description
`max_pcie_width`Maximum number of pcie lanes available
`max_pcie_speed`Maximum capable pcie speed in GT/s
`pcie_interface_version`PCIe generation ie. 3,4,5...
`slot_type`The type of form factor of the slot: PCIE, OAM, or Unknown
+`pcie_metric` |
Subfield Description
`pcie_width`Current number of pcie lanes available
`pcie_speed`Current pcie speed capable in GT/s
`pcie_bandwidth`Current instantaneous bandwidth usage in Mb/s
`pcie_replay_count`Total number of PCIe replays (NAKs)
`pcie_l0_to_recovery_count`PCIE L0 to recovery state transition accumulated count
`pcie_replay_roll_over_count`PCIe Replay accumulated count
`pcie_nak_sent_count`PCIe NAK sent accumulated count
`pcie_nak_received_count`PCIe NAK received accumulated count
Exceptions that can be thrown by `amdsmi_get_pcie_info` function: @@ -810,10 +809,9 @@ try: print("No GPUs on machine") else: for device in devices: - pcie_link_status = amdsmi_get_pcie_info(device) - print(pcie_link_status["pcie_width"]) - print(pcie_link_status["pcie_speed"]) - print(pcie_link_status["pcie_interface_version"]) + pcie_info = amdsmi_get_pcie_info(device) + print(pcie_info["pcie_static"]) + print(pcie_info["pcie_metric"]) except AmdSmiException as e: print(e) ``` @@ -949,8 +947,8 @@ Output: Dictionary with fields Field | Description ---|--- -`correctable_count`| Correctable ECC error count -`uncorrectable_count`| Uncorrectable ECC error count +`correctable_count` | Correctable ECC error count +`uncorrectable_count` | Uncorrectable ECC error count Exceptions that can be thrown by `amdsmi_get_gpu_total_ecc_count` function: @@ -2021,9 +2019,9 @@ Output: Dictionary with fields Field | Description ---|--- -`num_supported`| The number of supported frequencies -`current`| The current frequency index -`frequency`| List of frequencies, only the first num_supported frequencies are valid +`num_supported` | The number of supported frequencies +`current` | The current frequency index +`frequency` | List of frequencies, only the first num_supported frequencies are valid Exceptions that can be thrown by `amdsmi_get_clk_freq` function: @@ -2062,8 +2060,8 @@ Field | Description `curr_mclk_range` |
Subfield Description
`lower_bound`lower bound mclk range
`upper_bound`upper bound mclk range
`sclk_freq_limits` |
Subfield Description
`lower_bound`lower bound sclk range limt
`upper_bound`upper bound sclk range limit
`mclk_freq_limits` |
Subfield Description
`lower_bound`lower bound mclk range limit
`upper_bound`upper bound mclk range limit
-`curve.vc_points`| The number of supported frequencies -`num_regions`| The current frequency index +`curve.vc_points` | The number of supported frequencies +`num_regions` | The current frequency index Exceptions that can be thrown by `amdsmi_get_gpu_od_volt_info` function: @@ -2228,9 +2226,9 @@ Output: Dictionary with fields Field | Description ---|--- -`available_profiles`| Which profiles are supported by this system -`current`| Which power profile is currently active -`num_profiles`| How many power profiles are available +`available_profiles` | Which profiles are supported by this system +`current` | Which power profile is currently active +`num_profiles` | How many power profiles are available Exceptions that can be thrown by `amdsmi_get_gpu_power_profile_presets` function: @@ -2391,9 +2389,9 @@ Output: Dictionary with fields Field | Description ---|--- -`value`| Counter value -`time_enabled`| Time that the counter was enabled in nanoseconds -`time_running`| Time that the counter was running in nanoseconds +`value` | Counter value +`time_enabled` | Time that the counter was enabled in nanoseconds +`time_running` | Time that the counter was running in nanoseconds Exceptions that can be thrown by `amdsmi_gpu_read_counter` function: @@ -2661,8 +2659,8 @@ Output: Dict containing information about error counts Field | Description ---|--- -`correctable_count`| Count of correctable errors -`uncorrectable_count`| Count of uncorrectable errors +`correctable_count` | Count of correctable errors +`uncorrectable_count` | Count of uncorrectable errors Exceptions that can be thrown by `amdsmi_get_gpu_ecc_count` function: diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 7eb501bb64..e27451dab4 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -2134,7 +2134,7 @@ def amdsmi_get_pcie_info( ) ) - return { + pcie_info_dict = { "pcie_static": { "max_pcie_width": pcie_info.pcie_static.max_pcie_width, "max_pcie_speed": pcie_info.pcie_static.max_pcie_speed, @@ -2153,6 +2153,49 @@ def amdsmi_get_pcie_info( } } + # Check pcie static values for uint max + if pcie_info_dict['pcie_static']['max_pcie_width'] == 0xFFFF: + pcie_info_dict['pcie_static']['max_pcie_width'] = "N/A" + if pcie_info_dict['pcie_static']['max_pcie_speed'] == 0xFFFFFFFF: + pcie_info_dict['pcie_static']['max_pcie_speed'] = "N/A" + if pcie_info_dict['pcie_static']['pcie_interface_version'] == 0xFFFFFFFF: + pcie_info_dict['pcie_static']['pcie_interface_version'] = "N/A" + + slot_type = pcie_info_dict['pcie_static']['slot_type'] + if isinstance(slot_type, int): + slot_types = amdsmi_wrapper.amdsmi_card_form_factor_t__enumvalues + if slot_type in slot_types: + pcie_info_dict['pcie_static']['slot_type'] = slot_types[slot_type].replace("AMDSMI_CARD_FORM_FACTOR_", "") + else: + pcie_info_dict['pcie_static']['slot_type'] = "Unknown" + else: + pcie_info_dict['pcie_static']['slot_type'] = "N/A" + + # Check pcie metric values for uint max + if pcie_info_dict['pcie_metric']['pcie_width'] == 0xFFFF: + pcie_info_dict['pcie_metric']['pcie_width'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_speed'] == 0xFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_speed'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_bandwidth'] == 0xFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_bandwidth'] = "N/A" + + # TODO Just Navi 21 has a different uint max size for pcie_bandwidth + # if pcie_info_dict['pcie_metric']['pcie_bandwidth'] == 0xFFFFFFFF: + # pcie_info_dict['pcie_metric']['pcie_bandwidth'] = "N/A" + + if pcie_info_dict['pcie_metric']['pcie_replay_count'] == 0xFFFFFFFFFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_replay_count'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_l0_to_recovery_count'] == 0xFFFFFFFFFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_l0_to_recovery_count'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_replay_roll_over_count'] == 0xFFFFFFFFFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_replay_roll_over_count'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_nak_sent_count'] == 0xFFFFFFFFFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_nak_sent_count'] = "N/A" + if pcie_info_dict['pcie_metric']['pcie_nak_received_count'] == 0xFFFFFFFFFFFFFFFF: + pcie_info_dict['pcie_metric']['pcie_nak_received_count'] = "N/A" + + return pcie_info_dict + def amdsmi_get_processor_handle_from_bdf(bdf): bdf = _parse_bdf(bdf) @@ -3275,7 +3318,7 @@ def amdsmi_get_dpm_policy( processor_handle, ctypes.byref(policy) ) ) - + polices = [] for i in range(0, policy.num_supported): id = policy.policies[i].policy_id diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 392b6188ca..e57ae30cbb 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -2052,9 +2052,33 @@ amdsmi_status_t amdsmi_get_pcie_info(amdsmi_processor_handle processor_handle, a status = smi_amdgpu_get_pcie_speed_from_pcie_type(metric_info.pcie_link_speed, &info->pcie_metric.pcie_speed); // mapping to MT/s } else { // gpu metrics returns pcie link speed in .1 GT/s ex. 160 vs 16 - info->pcie_metric.pcie_speed = metric_info.pcie_link_speed * 100; + info->pcie_metric.pcie_speed = translate_umax_or_assign_valuepcie_metric.pcie_speed)> + (metric_info.pcie_link_speed, (metric_info.pcie_link_speed * 100)); } + // additional pcie related metrics + /** + * pcie_metric.pcie_bandwidth: MB/s (uint32_t) + * metric_info.pcie_bandwidth_inst: GB/s (uint64_t) + */ + info->pcie_metric.pcie_bandwidth = translate_umax_or_assign_valuepcie_metric.pcie_bandwidth)> + (metric_info.pcie_bandwidth_inst, metric_info.pcie_bandwidth_inst); + info->pcie_metric.pcie_replay_count = metric_info.pcie_replay_count_acc; + info->pcie_metric.pcie_l0_to_recovery_count = metric_info.pcie_l0_to_recov_count_acc; + info->pcie_metric.pcie_replay_roll_over_count = metric_info.pcie_replay_rover_count_acc; + /** + * pcie_metric.pcie_nak_received_count: (uint64_t) + * metric_info.pcie_nak_rcvd_count_acc: (uint32_t) + */ + info->pcie_metric.pcie_nak_received_count = translate_umax_or_assign_valuepcie_metric.pcie_nak_received_count)> + (metric_info.pcie_nak_rcvd_count_acc, (metric_info.pcie_nak_rcvd_count_acc)); + /** + * pcie_metric.pcie_nak_sent_count: (uint64_t) + * metric_info.pcie_nak_sent_count_acc: (uint32_t) + */ + info->pcie_metric.pcie_nak_sent_count = translate_umax_or_assign_valuepcie_metric.pcie_nak_sent_count)> + (metric_info.pcie_nak_sent_count_acc, (metric_info.pcie_nak_sent_count_acc)); + return AMDSMI_STATUS_SUCCESS; }