Revert Major ABI break for amdsmi_get_violation_status()

Changes:
- This aligns back to original struct naming for ROCm 7.0. This removes
any Major ABI breakages for updates for 7.0 release.
- Minor ABI breakage is required since there were additions to the
header. Refer to changelog for these updates.

Change-Id: If35af74eac6beac8c267d05ce789b7761ed24bff
Signed-off-by: Charis Poag <Charis.Poag@amd.com>


[ROCm/amdsmi commit: d3b73fac82]
Этот коммит содержится в:
Charis Poag
2025-08-12 15:05:46 -05:00
коммит произвёл Arif, Maisam
родитель 2ebf71976e
Коммит 7ab967ec69
5 изменённых файлов: 49 добавлений и 49 удалений
+4 -4
Просмотреть файл
@@ -2569,7 +2569,7 @@ class AMDSMICommands():
throttle_status['hbm_thermal_accumulated'] = violation_status['acc_hbm_thrm']
throttle_status['gfx_clk_below_host_limit_accumulated'] = violation_status['acc_gfx_clk_below_host_limit'] #deprecated
throttle_status['gfx_clk_below_host_limit_power_accumulated'] = self.build_xcp_dict('acc_gfx_clk_below_host_limit_pwr', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_accumulated'] = self.build_xcp_dict('acc_gfx_clk_below_host_limit_thrm', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_accumulated'] = self.build_xcp_dict('acc_gfx_clk_below_host_limit_thm', violation_status, num_partition)
throttle_status['total_gfx_clk_below_host_limit_accumulated'] = self.build_xcp_dict('acc_gfx_clk_below_host_limit_total', violation_status, num_partition)
throttle_status['low_utilization_accumulated'] = self.build_xcp_dict('acc_low_utilization', violation_status, num_partition)
throttle_status['prochot_violation_status'] = self.build_xcp_dict('active_prochot_thrm', violation_status, num_partition)
@@ -2579,7 +2579,7 @@ class AMDSMICommands():
throttle_status['hbm_thermal_violation_status'] = self.build_xcp_dict('active_hbm_thrm', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_violation_status'] = self.build_xcp_dict('active_gfx_clk_below_host_limit', violation_status, num_partition) # deprecated
throttle_status['gfx_clk_below_host_limit_power_violation_status'] = self.build_xcp_dict('active_gfx_clk_below_host_limit_pwr', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_violation_status'] = self.build_xcp_dict('active_gfx_clk_below_host_limit_thrm', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_violation_status'] = self.build_xcp_dict('active_gfx_clk_below_host_limit_thm', violation_status, num_partition)
throttle_status['total_gfx_clk_below_host_limit_violation_status'] = self.build_xcp_dict('active_gfx_clk_below_host_limit_total', violation_status, num_partition)
throttle_status['low_utilization_violation_status'] = self.build_xcp_dict('active_low_utilization', violation_status, num_partition)
throttle_status['prochot_violation_activity'] = violation_status['per_prochot_thrm']
@@ -2589,7 +2589,7 @@ class AMDSMICommands():
throttle_status['hbm_thermal_violation_activity'] = violation_status['per_hbm_thrm']
throttle_status['gfx_clk_below_host_limit_violation_activity'] = violation_status['per_gfx_clk_below_host_limit'] # deprecated
throttle_status['gfx_clk_below_host_limit_power_violation_activity'] = self.build_xcp_dict('per_gfx_clk_below_host_limit_pwr', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_violation_activity'] = self.build_xcp_dict('per_gfx_clk_below_host_limit_thrm', violation_status, num_partition)
throttle_status['gfx_clk_below_host_limit_thermal_violation_activity'] = self.build_xcp_dict('per_gfx_clk_below_host_limit_thm', violation_status, num_partition)
throttle_status['total_gfx_clk_below_host_limit_violation_activity'] = self.build_xcp_dict('per_gfx_clk_below_host_limit_total', violation_status, num_partition)
throttle_status['low_utilization_violation_activity'] = self.build_xcp_dict('per_low_utilization', violation_status, num_partition)
@@ -6035,7 +6035,7 @@ class AMDSMICommands():
violation_status['hbm_tviol'] = violations['per_hbm_thrm']
violation_status['gfx_clkviol'] = violations['per_gfx_clk_below_host_limit']
violation_status['gfxclk_pviol'] = violations['per_gfx_clk_below_host_limit_pwr']
violation_status['gfxclk_tviol'] = violations['per_gfx_clk_below_host_limit_thrm']
violation_status['gfxclk_tviol'] = violations['per_gfx_clk_below_host_limit_thm']
violation_status['gfxclk_totalviol'] = violations['per_gfx_clk_below_host_limit_total']
violation_status['low_utilviol'] = violations['per_low_utilization']
except amdsmi_exception.AmdSmiLibraryException as e:
+3 -3
Просмотреть файл
@@ -714,17 +714,17 @@ typedef struct {
Gfx clock below host limit violation; 1 = active 0 = not active; Max uint8 means unsupported.*/
//GPU metrics 1.8 violations
uint64_t acc_gfx_clk_below_host_limit_pwr[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Current gfx clock below host limit power count; Max uint64 means unsupported
uint64_t acc_gfx_clk_below_host_limit_thrm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Current gfx clock below host limit thermal count; Max uint64 means unsupported
uint64_t acc_gfx_clk_below_host_limit_thm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Current gfx clock below host limit thermal count; Max uint64 means unsupported
uint64_t acc_low_utilization[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Current low utilization count; Max uint64 means unsupported
uint64_t acc_gfx_clk_below_host_limit_total[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Current gfx clock below host limit total count; Max uint64 means unsupported
uint64_t per_gfx_clk_below_host_limit_pwr[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit power violation % (greater than 0% is a violation); Max uint64 means unsupported
uint64_t per_gfx_clk_below_host_limit_thrm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit violation % (greater than 0% is a violation); Max uint64 means unsupported
uint64_t per_gfx_clk_below_host_limit_thm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit violation % (greater than 0% is a violation); Max uint64 means unsupported
uint64_t per_low_utilization[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Low utilization violation % (greater than 0% is a violation); Max uint64 means unsupported
uint64_t per_gfx_clk_below_host_limit_total[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Any Gfx clock below host limit violation % (greater than 0% is a violation); Max uint64 means unsupported
uint8_t active_gfx_clk_below_host_limit_pwr[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit power violation; 1 = active 0 = not active; Max uint8 means unsupported
uint8_t active_gfx_clk_below_host_limit_thrm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit thermal violation; 1 = active 0 = not active; Max uint8 means unsupported
uint8_t active_gfx_clk_below_host_limit_thm[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Gfx clock below host limit thermal violation; 1 = active 0 = not active; Max uint8 means unsupported
uint8_t active_low_utilization[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; //!< New Driver 1.8 fields: Low utilization violation; 1 = active 0 = not active; Max uint8 means unsupported
uint8_t active_gfx_clk_below_host_limit_total[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC];//!< New Driver 1.8 fields: Any Gfx clock host limit violation; 1 = active 0 = not active; Max uint8 means unsupported
uint64_t reserved[AMDSMI_MAX_NUM_XCP][AMDSMI_MAX_NUM_XCC]; // reserved for new violation info
+12 -12
Просмотреть файл
@@ -2448,7 +2448,7 @@ def amdsmi_get_violation_status(
"acc_hbm_thrm": _validate_if_max_uint(violation_status.acc_hbm_thrm, MaxUIntegerTypes.UINT64_T),
"acc_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.acc_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT64_T),
"acc_gfx_clk_below_host_limit_pwr": list(violation_status.acc_gfx_clk_below_host_limit_pwr),
"acc_gfx_clk_below_host_limit_thrm": list(violation_status.acc_gfx_clk_below_host_limit_thrm),
"acc_gfx_clk_below_host_limit_thm": list(violation_status.acc_gfx_clk_below_host_limit_thm),
"acc_gfx_clk_below_host_limit_total": list(violation_status.acc_gfx_clk_below_host_limit_total),
"acc_low_utilization": list(violation_status.acc_low_utilization),
"per_prochot_thrm": _validate_if_max_uint(violation_status.per_prochot_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True),
@@ -2458,7 +2458,7 @@ def amdsmi_get_violation_status(
"per_hbm_thrm": _validate_if_max_uint(violation_status.per_hbm_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True),
"per_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.per_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT64_T, isActivity=True),
"per_gfx_clk_below_host_limit_pwr": list(violation_status.per_gfx_clk_below_host_limit_pwr),
"per_gfx_clk_below_host_limit_thrm": list(violation_status.per_gfx_clk_below_host_limit_thrm),
"per_gfx_clk_below_host_limit_thm": list(violation_status.per_gfx_clk_below_host_limit_thm),
"per_gfx_clk_below_host_limit_total": list(violation_status.per_gfx_clk_below_host_limit_total),
"per_low_utilization": list(violation_status.per_low_utilization),
"active_prochot_thrm": _validate_if_max_uint(violation_status.active_prochot_thrm, MaxUIntegerTypes.UINT8_T, isBool=True),
@@ -2468,7 +2468,7 @@ def amdsmi_get_violation_status(
"active_hbm_thrm": _validate_if_max_uint(violation_status.active_hbm_thrm, MaxUIntegerTypes.UINT8_T, isBool=True),
"active_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.active_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT8_T, isBool=True),
"active_gfx_clk_below_host_limit_pwr": list(violation_status.active_gfx_clk_below_host_limit_pwr),
"active_gfx_clk_below_host_limit_thrm": list(violation_status.active_gfx_clk_below_host_limit_thrm),
"active_gfx_clk_below_host_limit_thm": list(violation_status.active_gfx_clk_below_host_limit_thm),
"active_gfx_clk_below_host_limit_total": list(violation_status.active_gfx_clk_below_host_limit_total),
"active_low_utilization": list(violation_status.active_low_utilization),
}
@@ -2480,12 +2480,12 @@ def amdsmi_get_violation_status(
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
dict_return['acc_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
if 'acc_gfx_clk_below_host_limit_thrm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['acc_gfx_clk_below_host_limit_thrm']):
if 'acc_gfx_clk_below_host_limit_thm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['acc_gfx_clk_below_host_limit_thm']):
xcp_detail = []
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
dict_return['acc_gfx_clk_below_host_limit_thrm'][xcp_index] = xcp_detail
dict_return['acc_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
if 'acc_low_utilization' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['acc_low_utilization']):
xcp_detail = []
@@ -2505,12 +2505,12 @@ def amdsmi_get_violation_status(
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
dict_return['per_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
if 'per_gfx_clk_below_host_limit_thrm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['per_gfx_clk_below_host_limit_thrm']):
if 'per_gfx_clk_below_host_limit_thm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['per_gfx_clk_below_host_limit_thm']):
xcp_detail = []
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
dict_return['per_gfx_clk_below_host_limit_thrm'][xcp_index] = xcp_detail
dict_return['per_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
if 'per_low_utilization' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['per_low_utilization']):
xcp_detail = []
@@ -2530,12 +2530,12 @@ def amdsmi_get_violation_status(
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
dict_return['active_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
if 'active_gfx_clk_below_host_limit_thrm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['active_gfx_clk_below_host_limit_thrm']):
if 'active_gfx_clk_below_host_limit_thm' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['active_gfx_clk_below_host_limit_thm']):
xcp_detail = []
for val in xcp_metrics:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
dict_return['active_gfx_clk_below_host_limit_thrm'][xcp_index] = xcp_detail
dict_return['active_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
if 'active_low_utilization' in dict_return:
for xcp_index, xcp_metrics in enumerate(dict_return['active_low_utilization']):
xcp_detail = []
+24 -24
Просмотреть файл
@@ -871,15 +871,15 @@ struct_amdsmi_violation_status_t._fields_ = [
('active_gfx_clk_below_host_limit', ctypes.c_ubyte),
('PADDING_0', ctypes.c_ubyte * 2),
('acc_gfx_clk_below_host_limit_pwr', ctypes.c_uint64 * 8 * 8),
('acc_gfx_clk_below_host_limit_thrm', ctypes.c_uint64 * 8 * 8),
('acc_gfx_clk_below_host_limit_thm', ctypes.c_uint64 * 8 * 8),
('acc_low_utilization', ctypes.c_uint64 * 8 * 8),
('acc_gfx_clk_below_host_limit_total', ctypes.c_uint64 * 8 * 8),
('per_gfx_clk_below_host_limit_pwr', ctypes.c_uint64 * 8 * 8),
('per_gfx_clk_below_host_limit_thrm', ctypes.c_uint64 * 8 * 8),
('per_gfx_clk_below_host_limit_thm', ctypes.c_uint64 * 8 * 8),
('per_low_utilization', ctypes.c_uint64 * 8 * 8),
('per_gfx_clk_below_host_limit_total', ctypes.c_uint64 * 8 * 8),
('active_gfx_clk_below_host_limit_pwr', ctypes.c_ubyte * 8 * 8),
('active_gfx_clk_below_host_limit_thrm', ctypes.c_ubyte * 8 * 8),
('active_gfx_clk_below_host_limit_thm', ctypes.c_ubyte * 8 * 8),
('active_low_utilization', ctypes.c_ubyte * 8 * 8),
('active_gfx_clk_below_host_limit_total', ctypes.c_ubyte * 8 * 8),
('reserved', ctypes.c_uint64 * 8 * 8),
@@ -902,22 +902,22 @@ amdsmi_frequency_range_t = struct_amdsmi_frequency_range_t
class union_amdsmi_bdf_t(Union):
pass
class struct_amdsmi_bdf_t(Structure):
class struct_bdf_(Structure):
pass
struct_amdsmi_bdf_t._pack_ = 1 # source:False
struct_amdsmi_bdf_t._fields_ = [
struct_bdf_._pack_ = 1 # source:False
struct_bdf_._fields_ = [
('function_number', ctypes.c_uint64, 3),
('device_number', ctypes.c_uint64, 5),
('bus_number', ctypes.c_uint64, 8),
('domain_number', ctypes.c_uint64, 48),
]
class struct_bdf_(Structure):
class struct_amdsmi_bdf_t(Structure):
pass
struct_bdf_._pack_ = 1 # source:False
struct_bdf_._fields_ = [
struct_amdsmi_bdf_t._pack_ = 1 # source:False
struct_amdsmi_bdf_t._fields_ = [
('function_number', ctypes.c_uint64, 3),
('device_number', ctypes.c_uint64, 5),
('bus_number', ctypes.c_uint64, 8),
@@ -962,6 +962,21 @@ amdsmi_card_form_factor_t = ctypes.c_uint32 # enum
class struct_amdsmi_pcie_info_t(Structure):
pass
class struct_pcie_static_(Structure):
pass
struct_pcie_static_._pack_ = 1 # source:False
struct_pcie_static_._fields_ = [
('max_pcie_width', ctypes.c_uint16),
('PADDING_0', ctypes.c_ubyte * 2),
('max_pcie_speed', ctypes.c_uint32),
('pcie_interface_version', ctypes.c_uint32),
('slot_type', amdsmi_card_form_factor_t),
('max_pcie_interface_version', ctypes.c_uint32),
('PADDING_1', ctypes.c_ubyte * 4),
('reserved', ctypes.c_uint64 * 9),
]
class struct_pcie_metric_(Structure):
pass
@@ -982,21 +997,6 @@ struct_pcie_metric_._fields_ = [
('reserved', ctypes.c_uint64 * 12),
]
class struct_pcie_static_(Structure):
pass
struct_pcie_static_._pack_ = 1 # source:False
struct_pcie_static_._fields_ = [
('max_pcie_width', ctypes.c_uint16),
('PADDING_0', ctypes.c_ubyte * 2),
('max_pcie_speed', ctypes.c_uint32),
('pcie_interface_version', ctypes.c_uint32),
('slot_type', amdsmi_card_form_factor_t),
('max_pcie_interface_version', ctypes.c_uint32),
('PADDING_1', ctypes.c_ubyte * 4),
('reserved', ctypes.c_uint64 * 9),
]
struct_amdsmi_pcie_info_t._pack_ = 1 # source:False
struct_amdsmi_pcie_info_t._fields_ = [
('pcie_static', struct_pcie_static_),
+6 -6
Просмотреть файл
@@ -1054,7 +1054,7 @@ amdsmi_status_t amdsmi_get_violation_status(amdsmi_processor_handle processor_ha
fill_2d_array(violation_status->acc_gfx_clk_below_host_limit_pwr,
std::numeric_limits<uint64_t>::max());
fill_2d_array(violation_status->acc_gfx_clk_below_host_limit_thrm,
fill_2d_array(violation_status->acc_gfx_clk_below_host_limit_thm,
std::numeric_limits<uint64_t>::max());
fill_2d_array(violation_status->acc_low_utilization,
std::numeric_limits<uint64_t>::max());
@@ -1063,7 +1063,7 @@ amdsmi_status_t amdsmi_get_violation_status(amdsmi_processor_handle processor_ha
fill_2d_array(violation_status->per_gfx_clk_below_host_limit_pwr,
std::numeric_limits<uint64_t>::max());
fill_2d_array(violation_status->per_gfx_clk_below_host_limit_thrm,
fill_2d_array(violation_status->per_gfx_clk_below_host_limit_thm,
std::numeric_limits<uint64_t>::max());
fill_2d_array(violation_status->per_low_utilization,
std::numeric_limits<uint64_t>::max());
@@ -1072,7 +1072,7 @@ amdsmi_status_t amdsmi_get_violation_status(amdsmi_processor_handle processor_ha
fill_2d_array(violation_status->active_gfx_clk_below_host_limit_pwr,
std::numeric_limits<uint8_t>::max());
fill_2d_array(violation_status->active_gfx_clk_below_host_limit_thrm,
fill_2d_array(violation_status->active_gfx_clk_below_host_limit_thm,
std::numeric_limits<uint8_t>::max());
fill_2d_array(violation_status->active_low_utilization,
std::numeric_limits<uint8_t>::max());
@@ -1184,7 +1184,7 @@ amdsmi_status_t amdsmi_get_violation_status(amdsmi_processor_handle processor_ha
};
copy_xcp_metric(metric_info_b.xcp_stats, violation_status->acc_gfx_clk_below_host_limit_pwr,
&amdsmi_gpu_xcp_metrics_t::gfx_below_host_limit_ppt_acc);
copy_xcp_metric(metric_info_b.xcp_stats, violation_status->acc_gfx_clk_below_host_limit_thrm,
copy_xcp_metric(metric_info_b.xcp_stats, violation_status->acc_gfx_clk_below_host_limit_thm,
&amdsmi_gpu_xcp_metrics_t::gfx_below_host_limit_thm_acc);
copy_xcp_metric(metric_info_b.xcp_stats, violation_status->acc_low_utilization,
&amdsmi_gpu_xcp_metrics_t::gfx_low_utilization_acc);
@@ -1428,13 +1428,13 @@ amdsmi_status_t amdsmi_get_violation_status(amdsmi_processor_handle processor_ha
metric_members,
{
std::ref(violation_status->per_gfx_clk_below_host_limit_pwr),
std::ref(violation_status->per_gfx_clk_below_host_limit_thrm),
std::ref(violation_status->per_gfx_clk_below_host_limit_thm),
std::ref(violation_status->per_low_utilization),
std::ref(violation_status->per_gfx_clk_below_host_limit_total)
},
{
std::ref(violation_status->active_gfx_clk_below_host_limit_pwr),
std::ref(violation_status->active_gfx_clk_below_host_limit_thrm),
std::ref(violation_status->active_gfx_clk_below_host_limit_thm),
std::ref(violation_status->active_low_utilization),
std::ref(violation_status->active_gfx_clk_below_host_limit_total)
});