Refactor gpu_metrics usage in CLI

Signed-off-by: Maisam Arif <maisarif@amd.com>
Change-Id: I599878971ab94a768d008f046f2d303ad76fdb3b
This commit is contained in:
Maisam Arif
2023-11-22 03:32:55 -06:00
bovenliggende d790ebc62b
commit 5b36b438b7
6 gewijzigde bestanden met toevoegingen van 165 en 80 verwijderingen
+67 -22
Bestand weergeven
@@ -505,12 +505,16 @@ class AMDSMICommands():
ras_info = amdsmi_interface.amdsmi_get_gpu_ras_feature_info(args.gpu)
for key, value in ras_info.items():
if isinstance(value, int):
if value == 65535 or value == 0:
if value == 65535:
logging.debug(f"Failed to get ras {key} for gpu {gpu_id}")
ras_info[key] = "N/A"
continue
if self.logger.is_human_readable_format():
ras_info[key] = f"{value}"
if key != "eeprom_version":
if value:
ras_info[key] = "ENABLED"
else:
ras_info[key] = "DISABLED"
ras_dict.update(ras_info)
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get ras info for gpu %s | %s", gpu_id, e.get_error_info())
@@ -981,12 +985,13 @@ class AMDSMICommands():
'current_soc_voltage': "N/A",
'current_mem_voltage': "N/A",
'power_limit': "N/A",
'power_management': "N/A"}
'power_management': "N/A",
'throttle_status': "N/A"}
try:
power_info = amdsmi_interface.amdsmi_get_power_info(args.gpu)
for key, value in power_info.items():
if value == 0xFFFFFFFF:
if value == 0xFFFF:
power_info[key] = "N/A"
elif self.logger.is_human_readable_format():
if "voltage" in key:
@@ -994,7 +999,11 @@ class AMDSMICommands():
elif "power" in key:
power_info[key] = f"{value} W"
power_dict['current_power'] = power_info['average_socket_power']
power_dict['current_power'] = power_info['current_socket_power']
if power_dict['current_power'] == "N/A":
power_dict['current_power'] = power_info['average_socket_power']
power_dict['current_gfx_voltage'] = power_info['gfx_voltage']
power_dict['current_soc_voltage'] = power_info['soc_voltage']
power_dict['current_mem_voltage'] = power_info['mem_voltage']
@@ -1012,6 +1021,16 @@ class AMDSMICommands():
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get power management status for gpu %s | %s", gpu_id, e.get_error_info())
try:
throttle_status = amdsmi_interface.amdsmi_get_gpu_metrics_throttle_status(args.gpu)
if throttle_status:
power_dict['throttle_status'] = "THROTTLED"
else:
power_dict['throttle_status'] = "UNTHROTTLED"
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get throttle status for gpu %s | %s", gpu_id, e.get_error_info())
values_dict['power'] = power_dict
if "clock" in current_platform_args:
if args.clock:
@@ -1060,8 +1079,12 @@ class AMDSMICommands():
logging.debug("Failed to get %s clock info for gpu %s | %s", clock_name, gpu_id, e.get_error_info())
try:
# is_clk_locked = amdsmi_interface.amdsmi_is_clk_locked(args.gpu, amdsmi_interface.AmdSmiClkType.GFX)
is_clk_locked = "N/A"
is_clk_locked = amdsmi_interface.amdsmi_get_gpu_metrics_gfxclk_lock_status(args.gpu)
if self.logger.is_human_readable_format():
if is_clk_locked:
is_clk_locked = "LOCKED"
else:
is_clk_locked = "UNLOCKED"
except amdsmi_exception.AmdSmiLibraryException as e:
is_clk_locked = "N/A"
logging.debug("Failed to get gfx clock lock status info for gpu %s | %s", gpu_id, e.get_error_info())
@@ -1114,7 +1137,7 @@ class AMDSMICommands():
if self.logger.is_human_readable_format():
unit = '\N{DEGREE SIGN}C'
for temperature_key, temperature_value in temperatures.items():
if 'AMD_SMI_STATUS' not in str(temperature_value):
if 'N/A' not in str(temperature_value):
temperatures[temperature_key] = f"{temperature_value} {unit}"
values_dict['temperature'] = temperatures
@@ -1123,12 +1146,26 @@ class AMDSMICommands():
ecc_count = {}
try:
ecc_count = amdsmi_interface.amdsmi_get_gpu_total_ecc_count(args.gpu)
ecc_count['correctable'] = ecc_count.pop('correctable_count')
ecc_count['uncorrectable'] = ecc_count.pop('uncorrectable_count')
ecc_count['total_correctable'] = ecc_count.pop('correctable_count')
ecc_count['total_uncorrectable'] = ecc_count.pop('uncorrectable_count')
except amdsmi_exception.AmdSmiLibraryException as e:
ecc_count['correctable'] = "N/A"
ecc_count['uncorrectable'] = "N/A"
logging.debug("Failed to get ecc count for gpu %s | %s", gpu_id, e.get_error_info())
ecc_count['total_correctable'] = "N/A"
ecc_count['total_uncorrectable'] = "N/A"
ecc_count['cache_correctable'] = "N/A"
ecc_count['cache_uncorrectable'] = "N/A"
logging.debug("Failed to get total ecc count for gpu %s | %s", gpu_id, e.get_error_info())
if ecc_count['total_correctable'] != "N/A":
# Get the UMC error count for getting total cache correctable errors
umc_block = amdsmi_interface.AmdSmiGpuBlock['UMC']
try:
umc_count = amdsmi_interface.amdsmi_get_gpu_ecc_count(args.gpu, umc_block)
ecc_count['cache_correctable'] = ecc_count['total_correctable'] - umc_count['correctable_count']
ecc_count['cache_uncorrectable'] = ecc_count['total_uncorrectable'] - umc_count['uncorrectable_count']
except amdsmi_exception.AmdSmiLibraryException as e:
ecc_count['cache_correctable'] = "N/A"
ecc_count['cache_uncorrectable'] = "N/A"
logging.debug("Failed to get cache ecc count for gpu %s at block %s | %s", gpu_id, umc_block, e.get_error_info())
values_dict['ecc'] = ecc_count
if "pcie" in current_platform_args:
@@ -1162,23 +1199,28 @@ class AMDSMICommands():
logging.debug("Failed to get pcie link status for gpu %s | %s", gpu_id, e.get_error_info())
try:
pci_replay_counter = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu)
pci_replay_counter = amdsmi_interface.amdsmi_get_gpu_metrics_pcie_replay_count_acc(args.gpu)
pcie_dict['replay_count'] = pci_replay_counter
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get pci replay counter for gpu %s | %s", gpu_id, e.get_error_info())
logging.debug("Falling back to sysfs pci replay counter for gpu %s | %s", gpu_id, e.get_error_info())
try:
pci_replay_counter = amdsmi_interface.amdsmi_get_gpu_pci_replay_counter(args.gpu)
pcie_dict['replay_count'] = pci_replay_counter
except amdsmi_exception.AmdSmiLibraryException as err:
pcie_dict['replay_count'] = "N/A"
logging.debug("Failed to get sysfs fallback pci replay counter for gpu %s | %s", gpu_id, err.get_error_info())
try:
# l0_to_recovery_counter = amdsmi_interface.amdsmi_get_gpu_pci_l0_to_recovery_counter(args.gpu)
# pcie_dict['l0_to_recovery_count'] = l0_to_recovery_counter
pcie_dict['l0_to_recovery_count'] = "N/A"
l0_to_recovery_counter = amdsmi_interface.amdsmi_get_gpu_metrics_pcie_l0_recov_count_acc(args.gpu)
pcie_dict['l0_to_recovery_count'] = l0_to_recovery_counter
except amdsmi_exception.AmdSmiLibraryException as e:
pcie_dict['l0_to_recovery_count'] = "N/A"
logging.debug("Failed to get pcie l0 to recovery counter for gpu %s | %s", gpu_id, e.get_error_info())
try:
# pci_replay_rollover_counter = amdsmi_interface.amdsmi_get_gpu_pci_replay_rollover_counter(args.gpu)
# pcie_dict['replay_roll_over_count'] = pci_replay_rollover_counter
pcie_dict['replay_roll_over_count'] = "N/A"
pci_replay_rollover_counter = amdsmi_interface.amdsmi_get_gpu_metrics_pcie_replay_rover_count_acc(args.gpu)
pcie_dict['replay_rollover_count'] = pci_replay_rollover_counter
except amdsmi_exception.AmdSmiLibraryException as e:
pcie_dict['replay_roll_over_count'] = "N/A"
logging.debug("Failed to get pcie replay rollover counter for gpu %s | %s", gpu_id, e.get_error_info())
@@ -1702,7 +1744,10 @@ class AMDSMICommands():
try:
dest_gpu_link_status = amdsmi_interface.amdsmi_is_P2P_accessible(src_gpu, dest_gpu)
src_gpu_links[dest_gpu_key] = bool(dest_gpu_link_status)
if dest_gpu_link_status:
src_gpu_links[dest_gpu_key] = "ENABLED"
else:
src_gpu_links[dest_gpu_key] = "DISABLED"
except amdsmi_exception.AmdSmiLibraryException as e:
src_gpu_links[dest_gpu_key] = "N/A"
logging.debug("Failed to get link status for %s to %s | %s",
+1 -1
Bestand weergeven
@@ -248,7 +248,7 @@ class AMDSMIParser(argparse.ArgumentParser):
command_modifier_group.add_argument('--file', action=self._check_output_file_path(), type=str, required=False, help=file_help)
# Placing loglevel outside the subcommands so it can be used with any subcommand
command_modifier_group.add_argument('--loglevel', action='store', required=False, help=loglevel_help, default='ERROR', metavar='LEVEL',
command_modifier_group.add_argument('--loglevel', action='store', type=str.upper, required=False, help=loglevel_help, default='ERROR', metavar='LEVEL',
choices=loglevel_choices)
+8 -7
Bestand weergeven
@@ -525,6 +525,7 @@ typedef struct {
} amdsmi_board_info_t;
typedef struct {
uint32_t current_socket_power;
uint32_t average_socket_power;
uint32_t gfx_voltage; // GFX voltage measurement in mV
uint32_t soc_voltage; // SOC voltage measurement in mV
@@ -2705,16 +2706,16 @@ amdsmi_status_t amdsmi_get_gpu_metrics_info(amdsmi_processor_handle processor_ha
* @brief Get the pm metrics table with provided device index.
*
* @details Given a device handle @p processor_handle, @p pm_metrics pointer,
* and @p num_of_metrics pointer,
* and @p num_of_metrics pointer,
* this function will write the pm metrics name value pair
* to the array at @p pm_metrics and the number of metrics retreived to @p num_of_metrics
* Note: the library allocated memory for pm_metrics, and user must call
* free(pm_metrics) to free it after use.
*
*
* @param[in] processor_handle a processor handle
*
* @param[inout] pm_metrics A pointerto an array to hold multiple PM metrics. On successs,
* the library will allocate memory of pm_metrics and write metrics to this array.
* the library will allocate memory of pm_metrics and write metrics to this array.
* The caller must free this memory after usage to avoid memory leak.
*
* @param[inout] num_of_metrics a pointer to uint32_t to which the number of
@@ -2739,18 +2740,18 @@ amdsmi_status_t amdsmi_get_gpu_pm_metrics_info(
* @brief Get the register metrics table with provided device index and register type.
*
* @details Given a device handle @p processor_handle, @p reg_type, @p reg_metrics pointer,
* and @p num_of_metrics pointer,
* and @p num_of_metrics pointer,
* this function will write the register metrics name value pair
* to the array at @p reg_metrics and the number of metrics retreived to @p num_of_metrics
* Note: the library allocated memory for reg_metrics, and user must call
* free(reg_metrics) to free it after use.
*
*
* @param[in] processor_handle a processor handle
*
*
* @param[in] reg_type The register type
*
* @param[inout] reg_metrics A pointerto an array to hold multiple register metrics. On successs,
* the library will allocate memory of reg_metrics and write metrics to this array.
* the library will allocate memory of reg_metrics and write metrics to this array.
* The caller must free this memory after usage to avoid memory leak.
*
* @param[inout] num_of_metrics a pointer to uint32_t to which the number of
+4
Bestand weergeven
@@ -1320,6 +1320,9 @@ def amdsmi_get_gpu_cache_info(
"cpu_cache": cpu_cache,
"simd_cache": simd_cache}
if cache_info_dict == {}:
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_NO_DATA)
return cache_info_dict
@@ -1642,6 +1645,7 @@ def amdsmi_get_power_info(
)
return {
"current_socket_power": power_measure.current_socket_power,
"average_socket_power": power_measure.average_socket_power,
"gfx_voltage": power_measure.gfx_voltage,
"soc_voltage": power_measure.soc_voltage,
+75 -39
Bestand weergeven
@@ -866,6 +866,7 @@ class struct_amdsmi_power_info_t(Structure):
struct_amdsmi_power_info_t._pack_ = 1 # source:False
struct_amdsmi_power_info_t._fields_ = [
('current_socket_power', ctypes.c_uint32),
('average_socket_power', ctypes.c_uint32),
('gfx_voltage', ctypes.c_uint32),
('soc_voltage', ctypes.c_uint32),
@@ -1514,6 +1515,31 @@ struct_amdsmi_gpu_metrics_t._fields_ = [
]
amdsmi_gpu_metrics_t = struct_amdsmi_gpu_metrics_t
class struct_amdsmi_name_value_t(Structure):
pass
struct_amdsmi_name_value_t._pack_ = 1 # source:False
struct_amdsmi_name_value_t._fields_ = [
('name', ctypes.c_char * 64),
('value', ctypes.c_uint64),
]
amdsmi_name_value_t = struct_amdsmi_name_value_t
# values for enumeration 'amdsmi_reg_type_t'
amdsmi_reg_type_t__enumvalues = {
0: 'AMDSMI_REG_XGMI',
1: 'AMDSMI_REG_WAFL',
2: 'AMDSMI_REG_PCIE',
3: 'AMDSMI_REG_USR',
4: 'AMDSMI_REG_USR1',
}
AMDSMI_REG_XGMI = 0
AMDSMI_REG_WAFL = 1
AMDSMI_REG_PCIE = 2
AMDSMI_REG_USR = 3
AMDSMI_REG_USR1 = 4
amdsmi_reg_type_t = ctypes.c_uint32 # enum
class struct_amdsmi_ras_feature_t(Structure):
pass
@@ -1824,6 +1850,12 @@ amdsmi_get_gpu_od_volt_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(
amdsmi_get_gpu_metrics_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_metrics_info
amdsmi_get_gpu_metrics_info.restype = amdsmi_status_t
amdsmi_get_gpu_metrics_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_gpu_metrics_t)]
amdsmi_get_gpu_pm_metrics_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_pm_metrics_info
amdsmi_get_gpu_pm_metrics_info.restype = amdsmi_status_t
amdsmi_get_gpu_pm_metrics_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.POINTER(struct_amdsmi_name_value_t)), ctypes.POINTER(ctypes.c_uint32)]
amdsmi_get_gpu_reg_table_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_reg_table_info
amdsmi_get_gpu_reg_table_info.restype = amdsmi_status_t
amdsmi_get_gpu_reg_table_info.argtypes = [amdsmi_processor_handle, amdsmi_reg_type_t, ctypes.POINTER(ctypes.POINTER(struct_amdsmi_name_value_t)), ctypes.POINTER(ctypes.c_uint32)]
amdsmi_set_gpu_clk_range = _libraries['libamd_smi.so'].amdsmi_set_gpu_clk_range
amdsmi_set_gpu_clk_range.restype = amdsmi_status_t
amdsmi_set_gpu_clk_range.argtypes = [amdsmi_processor_handle, uint64_t, uint64_t, amdsmi_clk_type_t]
@@ -2349,7 +2381,9 @@ __all__ = \
'AMDSMI_RAS_ERR_STATE_INVALID', 'AMDSMI_RAS_ERR_STATE_LAST',
'AMDSMI_RAS_ERR_STATE_MULT_UC', 'AMDSMI_RAS_ERR_STATE_NONE',
'AMDSMI_RAS_ERR_STATE_PARITY', 'AMDSMI_RAS_ERR_STATE_POISON',
'AMDSMI_RAS_ERR_STATE_SING_C', 'AMDSMI_SLOT_TYPE__CEM',
'AMDSMI_RAS_ERR_STATE_SING_C', 'AMDSMI_REG_PCIE',
'AMDSMI_REG_USR', 'AMDSMI_REG_USR1', 'AMDSMI_REG_WAFL',
'AMDSMI_REG_XGMI', 'AMDSMI_SLOT_TYPE__CEM',
'AMDSMI_SLOT_TYPE__OAM', 'AMDSMI_SLOT_TYPE__PCIE',
'AMDSMI_SLOT_TYPE__RESERVED', 'AMDSMI_STATUS_ADDRESS_FAULT',
'AMDSMI_STATUS_AMDGPU_RESTART_ERR', 'AMDSMI_STATUS_API_FAILED',
@@ -2414,12 +2448,12 @@ __all__ = \
'FW_ID_DRV_CAP', 'FW_ID_FIRST', 'FW_ID_IMU_DRAM',
'FW_ID_IMU_IRAM', 'FW_ID_ISP', 'FW_ID_MC', 'FW_ID_MES_KIQ',
'FW_ID_MES_STACK', 'FW_ID_MES_THREAD1', 'FW_ID_MES_THREAD1_STACK',
'FW_ID_MMSCH', 'FW_ID_PPTABLE', 'FW_ID_PSP_BL', 'FW_ID_PSP_DBG',
'FW_ID_PSP_INTF', 'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC',
'FW_ID_PSP_SOSDRV', 'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV',
'FW_ID_PSP_TOC', 'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC',
'FW_ID_RLCV_LX7', 'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL',
'FW_ID_RLC_RESTORE_LIST_GPM_MEM',
'FW_ID_MMSCH', 'FW_ID_PM', 'FW_ID_PPTABLE', 'FW_ID_PSP_BL',
'FW_ID_PSP_DBG', 'FW_ID_PSP_INTF', 'FW_ID_PSP_KEYDB',
'FW_ID_PSP_SOC', 'FW_ID_PSP_SOSDRV', 'FW_ID_PSP_SPL',
'FW_ID_PSP_SYSDRV', 'FW_ID_PSP_TOC', 'FW_ID_REG_ACCESS_WHITELIST',
'FW_ID_RLC', 'FW_ID_RLCV_LX7', 'FW_ID_RLC_P',
'FW_ID_RLC_RESTORE_LIST_CNTL', 'FW_ID_RLC_RESTORE_LIST_GPM_MEM',
'FW_ID_RLC_RESTORE_LIST_SRM_MEM', 'FW_ID_RLC_SAVE_RESTORE_LIST',
'FW_ID_RLC_SRLG', 'FW_ID_RLC_SRLS', 'FW_ID_RLC_V', 'FW_ID_RLX6',
'FW_ID_RLX6_CORE1', 'FW_ID_RLX6_DRAM_BOOT',
@@ -2431,34 +2465,34 @@ __all__ = \
'FW_ID_RS64_PFP_P1_DATA', 'FW_ID_SDMA0', 'FW_ID_SDMA1',
'FW_ID_SDMA2', 'FW_ID_SDMA3', 'FW_ID_SDMA4', 'FW_ID_SDMA5',
'FW_ID_SDMA6', 'FW_ID_SDMA7', 'FW_ID_SDMA_TH0', 'FW_ID_SDMA_TH1',
'FW_ID_SEC_POLICY_STAGE2', 'FW_ID_PM', 'FW_ID_SMU',
'FW_ID_TA_RAS', 'FW_ID_TA_XGMI', 'FW_ID_UVD', 'FW_ID_VCE',
'FW_ID_VCN', 'FW_ID__MAX', 'MEMORY_PARTITION_NPS1',
'MEMORY_PARTITION_NPS2', 'MEMORY_PARTITION_NPS4',
'MEMORY_PARTITION_NPS8', 'MEMORY_PARTITION_UNKNOWN',
'NON_AMD_CPU', 'NON_AMD_GPU', 'RD_BW0', 'TEMPERATURE_TYPE_EDGE',
'TEMPERATURE_TYPE_FIRST', 'TEMPERATURE_TYPE_HBM_0',
'TEMPERATURE_TYPE_HBM_1', 'TEMPERATURE_TYPE_HBM_2',
'TEMPERATURE_TYPE_HBM_3', 'TEMPERATURE_TYPE_HOTSPOT',
'TEMPERATURE_TYPE_JUNCTION', 'TEMPERATURE_TYPE_PLX',
'TEMPERATURE_TYPE_VRAM', 'TEMPERATURE_TYPE__MAX', 'UNKNOWN',
'VRAM_TYPE_DDR2', 'VRAM_TYPE_DDR3', 'VRAM_TYPE_DDR4',
'VRAM_TYPE_GDDR1', 'VRAM_TYPE_GDDR3', 'VRAM_TYPE_GDDR4',
'VRAM_TYPE_GDDR5', 'VRAM_TYPE_GDDR6', 'VRAM_TYPE_HBM',
'VRAM_TYPE_UNKNOWN', 'VRAM_TYPE__MAX', 'WR_BW0',
'amd_metrics_table_header_t', 'amdsmi_asic_info_t',
'amdsmi_bdf_t', 'amdsmi_bit_field_t', 'amdsmi_board_info_t',
'amdsmi_cache_flags_type_t', 'amdsmi_clk_info_t',
'amdsmi_clk_type_t', 'amdsmi_compute_partition_type_t',
'amdsmi_container_types_t', 'amdsmi_counter_command_t',
'amdsmi_counter_value_t', 'amdsmi_cpu_apb_disable',
'amdsmi_cpu_apb_enable', 'amdsmi_cpusocket_handle',
'amdsmi_ddr_bw_metrics_t', 'amdsmi_dev_perf_level_t',
'amdsmi_dimm_power_t', 'amdsmi_dimm_thermal_t',
'amdsmi_dpm_level_t', 'amdsmi_driver_info_t',
'amdsmi_engine_usage_t', 'amdsmi_error_count_t',
'amdsmi_event_group_t', 'amdsmi_event_handle_t',
'amdsmi_event_type_t', 'amdsmi_evt_notification_data_t',
'FW_ID_SEC_POLICY_STAGE2', 'FW_ID_SMU', 'FW_ID_TA_RAS',
'FW_ID_TA_XGMI', 'FW_ID_UVD', 'FW_ID_VCE', 'FW_ID_VCN',
'FW_ID__MAX', 'MEMORY_PARTITION_NPS1', 'MEMORY_PARTITION_NPS2',
'MEMORY_PARTITION_NPS4', 'MEMORY_PARTITION_NPS8',
'MEMORY_PARTITION_UNKNOWN', 'NON_AMD_CPU', 'NON_AMD_GPU',
'RD_BW0', 'TEMPERATURE_TYPE_EDGE', 'TEMPERATURE_TYPE_FIRST',
'TEMPERATURE_TYPE_HBM_0', 'TEMPERATURE_TYPE_HBM_1',
'TEMPERATURE_TYPE_HBM_2', 'TEMPERATURE_TYPE_HBM_3',
'TEMPERATURE_TYPE_HOTSPOT', 'TEMPERATURE_TYPE_JUNCTION',
'TEMPERATURE_TYPE_PLX', 'TEMPERATURE_TYPE_VRAM',
'TEMPERATURE_TYPE__MAX', 'UNKNOWN', 'VRAM_TYPE_DDR2',
'VRAM_TYPE_DDR3', 'VRAM_TYPE_DDR4', 'VRAM_TYPE_GDDR1',
'VRAM_TYPE_GDDR3', 'VRAM_TYPE_GDDR4', 'VRAM_TYPE_GDDR5',
'VRAM_TYPE_GDDR6', 'VRAM_TYPE_HBM', 'VRAM_TYPE_UNKNOWN',
'VRAM_TYPE__MAX', 'WR_BW0', 'amd_metrics_table_header_t',
'amdsmi_asic_info_t', 'amdsmi_bdf_t', 'amdsmi_bit_field_t',
'amdsmi_board_info_t', 'amdsmi_cache_flags_type_t',
'amdsmi_clk_info_t', 'amdsmi_clk_type_t',
'amdsmi_compute_partition_type_t', 'amdsmi_container_types_t',
'amdsmi_counter_command_t', 'amdsmi_counter_value_t',
'amdsmi_cpu_apb_disable', 'amdsmi_cpu_apb_enable',
'amdsmi_cpusocket_handle', 'amdsmi_ddr_bw_metrics_t',
'amdsmi_dev_perf_level_t', 'amdsmi_dimm_power_t',
'amdsmi_dimm_thermal_t', 'amdsmi_dpm_level_t',
'amdsmi_driver_info_t', 'amdsmi_engine_usage_t',
'amdsmi_error_count_t', 'amdsmi_event_group_t',
'amdsmi_event_handle_t', 'amdsmi_event_type_t',
'amdsmi_evt_notification_data_t',
'amdsmi_evt_notification_type_t',
'amdsmi_first_online_core_on_cpu_socket', 'amdsmi_freq_ind_t',
'amdsmi_freq_volt_region_t', 'amdsmi_frequencies_t',
@@ -2561,10 +2595,12 @@ __all__ = \
'amdsmi_get_gpu_pci_bandwidth',
'amdsmi_get_gpu_pci_replay_counter',
'amdsmi_get_gpu_pci_throughput', 'amdsmi_get_gpu_perf_level',
'amdsmi_get_gpu_pm_metrics_info',
'amdsmi_get_gpu_power_profile_presets',
'amdsmi_get_gpu_process_info', 'amdsmi_get_gpu_process_list',
'amdsmi_get_gpu_ras_block_features_enabled',
'amdsmi_get_gpu_ras_feature_info', 'amdsmi_get_gpu_revision',
'amdsmi_get_gpu_ras_feature_info',
'amdsmi_get_gpu_reg_table_info', 'amdsmi_get_gpu_revision',
'amdsmi_get_gpu_subsystem_id', 'amdsmi_get_gpu_subsystem_name',
'amdsmi_get_gpu_topo_numa_affinity',
'amdsmi_get_gpu_total_ecc_count', 'amdsmi_get_gpu_vbios_info',
@@ -2590,7 +2626,7 @@ __all__ = \
'amdsmi_is_gpu_power_management_enabled',
'amdsmi_link_id_bw_type_t', 'amdsmi_memory_page_status_t',
'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t',
'amdsmi_mm_ip_t', 'amdsmi_od_vddc_point_t',
'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_od_vddc_point_t',
'amdsmi_od_volt_curve_t', 'amdsmi_od_volt_freq_data_t',
'amdsmi_pcie_bandwidth_t', 'amdsmi_pcie_info_t',
'amdsmi_pcie_slot_type_t', 'amdsmi_power_cap_info_t',
@@ -2599,7 +2635,7 @@ __all__ = \
'amdsmi_proc_info_t', 'amdsmi_process_handle_t',
'amdsmi_process_info_t', 'amdsmi_processor_handle',
'amdsmi_range_t', 'amdsmi_ras_err_state_t',
'amdsmi_ras_feature_t', 'amdsmi_reset_gpu',
'amdsmi_ras_feature_t', 'amdsmi_reg_type_t', 'amdsmi_reset_gpu',
'amdsmi_reset_gpu_compute_partition', 'amdsmi_reset_gpu_fan',
'amdsmi_reset_gpu_memory_partition',
'amdsmi_reset_gpu_xgmi_error', 'amdsmi_retired_page_record_t',
@@ -2646,7 +2682,7 @@ __all__ = \
'struct_amdsmi_freq_volt_region_t', 'struct_amdsmi_frequencies_t',
'struct_amdsmi_frequency_range_t', 'struct_amdsmi_fw_info_t',
'struct_amdsmi_gpu_cache_info_t', 'struct_amdsmi_gpu_metrics_t',
'struct_amdsmi_link_id_bw_type_t',
'struct_amdsmi_link_id_bw_type_t', 'struct_amdsmi_name_value_t',
'struct_amdsmi_od_vddc_point_t', 'struct_amdsmi_od_volt_curve_t',
'struct_amdsmi_od_volt_freq_data_t',
'struct_amdsmi_pcie_bandwidth_t', 'struct_amdsmi_pcie_info_t',
+10 -11
Bestand weergeven
@@ -1859,22 +1859,21 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf
if (status != AMDSMI_STATUS_SUCCESS)
return status;
info->average_socket_power = 0xFFFFFFFF;
info->gfx_voltage = 0xFFFFFFFF;
info->soc_voltage = 0xFFFFFFFF; // Not implmented yet
info->mem_voltage = 0xFFFFFFFF; // Not implmented yet
info->power_limit = 0xFFFFFFFF;
info->current_socket_power = 0xFFFF;
info->average_socket_power = 0xFFFF;
info->gfx_voltage = 0xFFFF;
info->soc_voltage = 0xFFFF;
info->mem_voltage = 0xFFFF;
info->power_limit = 0xFFFF;
amdsmi_gpu_metrics_t metrics = {};
status = amdsmi_get_gpu_metrics_info(processor_handle, &metrics);
if (status == AMDSMI_STATUS_SUCCESS) {
info->current_socket_power = metrics.current_socket_power;
info->average_socket_power = metrics.average_socket_power;
}
int64_t voltage_read = 0;
status = amdsmi_get_gpu_volt_metric(processor_handle, AMDSMI_VOLT_TYPE_VDDGFX, AMDSMI_VOLT_CURRENT, &voltage_read);
if (status == AMDSMI_STATUS_SUCCESS) {
info->gfx_voltage = voltage_read;
info->gfx_voltage = metrics.voltage_gfx;
info->soc_voltage = metrics.voltage_soc;
info->mem_voltage = metrics.voltage_mem;
}
int power_limit = 0;