Fix powercap default to enum for sensor_ind (#2004)
* Fix powercap default to enum for sensor_ind Signed-off-by: Maisam Arif <Maisam.Arif@amd.com> * [SWDEV-559965] Refactor amdsmi set power cap Modified power cap set to accept args with optional power_cap type. Added power_cap helper validate_and_set_power_cap(). Fixed JSON output format. Signed-off-by: Bindhiya Kanangot Balakrishnan <Bindhiya.KanangotBalakrishnan@amd.com> --------- Signed-off-by: Maisam Arif <Maisam.Arif@amd.com> Signed-off-by: Bindhiya Kanangot Balakrishnan <Bindhiya.KanangotBalakrishnan@amd.com> Co-authored-by: Bindhiya Kanangot Balakrishnan <Bindhiya.KanangotBalakrishnan@amd.com>
このコミットが含まれているのは:
@@ -4983,50 +4983,23 @@ class AMDSMICommands():
|
||||
# Universal args
|
||||
if isinstance(args.power_cap, tuple):
|
||||
pwr_type = args.power_cap.pwr_type
|
||||
pwr_type_as_int = (0 if pwr_type == "ppt0" else 1 if pwr_type == "ppt1" else None)
|
||||
pwr_type = pwr_type.upper()
|
||||
requested_power_cap = args.power_cap.watts
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu, pwr_type_as_int)
|
||||
logging.debug(f"Power cap info for gpu {gpu_id} | {power_cap_info}")
|
||||
min_power_cap = power_cap_info["min_power_cap"]
|
||||
min_power_cap = self.helpers.convert_SI_unit(min_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
max_power_cap = power_cap_info["max_power_cap"]
|
||||
max_power_cap = self.helpers.convert_SI_unit(max_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
current_power_cap = power_cap_info["power_cap"]
|
||||
current_power_cap = self.helpers.convert_SI_unit(current_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
min_power_cap = "N/A"
|
||||
max_power_cap = "N/A"
|
||||
current_power_cap = "N/A"
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set {pwr_type} power cap to {requested_power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if requested_power_cap == current_power_cap:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"{pwr_type} power cap is already set to {requested_power_cap}W")
|
||||
elif current_power_cap == 0:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Unable to set {pwr_type} power cap to {requested_power_cap}W, current value is {current_power_cap}W")
|
||||
elif requested_power_cap >= min_power_cap and requested_power_cap <= max_power_cap and requested_power_cap > 0:
|
||||
try:
|
||||
new_power_cap = self.helpers.convert_SI_unit(requested_power_cap, AMDSMIHelpers.SI_Unit.BASE,
|
||||
AMDSMIHelpers.SI_Unit.MICRO)
|
||||
amdsmi_interface.amdsmi_set_power_cap(args.gpu, pwr_type_as_int, new_power_cap)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set {pwr_type} power cap to {requested_power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Successfully set {pwr_type} power cap to {requested_power_cap}W")
|
||||
# If pwr_type is None, default to ppt0 (legacy behavior)
|
||||
if pwr_type is None:
|
||||
pwr_type = "ppt0"
|
||||
pwr_type_as_int = 0
|
||||
else:
|
||||
# setting power cap to 0 will return the current power cap so the technical minimum value is 1
|
||||
if min_power_cap == 0:
|
||||
min_power_cap = 1
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap must be between {min_power_cap}W and {max_power_cap}W")
|
||||
pwr_type_as_int = 0 if pwr_type == "ppt0" else 1
|
||||
|
||||
# Set the power cap for the specified sensor
|
||||
pwr_type_upper = pwr_type.upper()
|
||||
result = self.helpers.validate_and_set_power_cap(
|
||||
args.gpu, pwr_type_as_int, pwr_type_upper, requested_power_cap, self.logger)
|
||||
self.logger.store_output(args.gpu, 'powercap', result)
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
return # Skip printing when there are multiple devices
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
@@ -2042,3 +2042,84 @@ class AMDSMIHelpers():
|
||||
type_name, gpu_id, e.get_error_info())
|
||||
|
||||
return base_board_temp_dict
|
||||
|
||||
def validate_and_set_power_cap(self, device_handle, power_type, power_type_key, requested_power_cap, logger):
|
||||
"""Validate and set power cap for a specific sensor.
|
||||
|
||||
Args:
|
||||
device_handle: GPU device handle
|
||||
power_type: Sensor ID (0 for ppt0, 1 for ppt1)
|
||||
power_type_key: Display name for the sensor (e.g., "PPT0")
|
||||
requested_power_cap: Requested power cap value in watts
|
||||
logger: AMDSMILogger instance for format-aware output
|
||||
|
||||
Returns:
|
||||
dict or str: Structured data for JSON/CSV or formatted string for human-readable output
|
||||
"""
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(device_handle, power_type)
|
||||
gpu_id = self.get_gpu_id_from_device_handle(device_handle)
|
||||
logging.debug(f"Power cap info for gpu {gpu_id} {power_type_key} | {power_cap_info}")
|
||||
|
||||
min_power_cap = self.convert_SI_unit(power_cap_info["min_power_cap"], AMDSMIHelpers.SI_Unit.MICRO)
|
||||
max_power_cap = self.convert_SI_unit(power_cap_info["max_power_cap"], AMDSMIHelpers.SI_Unit.MICRO)
|
||||
current_power_cap = self.convert_SI_unit(power_cap_info["power_cap"], AMDSMIHelpers.SI_Unit.MICRO)
|
||||
|
||||
# Return structured data for JSON/CSV or formatted string for human-readable
|
||||
if requested_power_cap == current_power_cap:
|
||||
if logger.is_json_format() or logger.is_csv_format():
|
||||
return {
|
||||
"status": "already_set",
|
||||
"sensor": power_type_key,
|
||||
"requested_power_cap": self.unit_format(logger, requested_power_cap, "W"),
|
||||
"current_power_cap": self.unit_format(logger, current_power_cap, "W"),
|
||||
"message": f"{power_type_key} power cap is already set to {requested_power_cap}W"
|
||||
}
|
||||
return f"{power_type_key} power cap is already set to {requested_power_cap}W"
|
||||
elif current_power_cap == 0:
|
||||
if logger.is_json_format() or logger.is_csv_format():
|
||||
return {
|
||||
"status": "error",
|
||||
"sensor": power_type_key,
|
||||
"requested_power_cap": self.unit_format(logger, requested_power_cap, "W"),
|
||||
"current_power_cap": self.unit_format(logger, current_power_cap, "W"),
|
||||
"message": f"Unable to set {power_type_key} power cap to {requested_power_cap}W, current value is {current_power_cap}W"
|
||||
}
|
||||
return f"Unable to set {power_type_key} power cap to {requested_power_cap}W, current value is {current_power_cap}W"
|
||||
elif not (min_power_cap < requested_power_cap <= max_power_cap and requested_power_cap > 0):
|
||||
# setting power cap to 0 will return the current power cap so the technical minimum value is 1
|
||||
min_cap_display = 1 if min_power_cap == 0 else min_power_cap
|
||||
if logger.is_json_format() or logger.is_csv_format():
|
||||
return {
|
||||
"status": "error",
|
||||
"sensor": power_type_key,
|
||||
"requested_power_cap": self.unit_format(logger, requested_power_cap, "W"),
|
||||
"min_power_cap": self.unit_format(logger, min_cap_display, "W"),
|
||||
"max_power_cap": self.unit_format(logger, max_power_cap, "W"),
|
||||
"message": f"Power cap must be between {min_cap_display}W and {max_power_cap}W"
|
||||
}
|
||||
return f"Power cap must be between {min_cap_display}W and {max_power_cap}W"
|
||||
# Set the power cap
|
||||
new_power_cap = self.convert_SI_unit(requested_power_cap, AMDSMIHelpers.SI_Unit.BASE, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
amdsmi_interface.amdsmi_set_power_cap(device_handle, power_type, new_power_cap)
|
||||
if logger.is_json_format() or logger.is_csv_format():
|
||||
return {
|
||||
"status": "success",
|
||||
"sensor": power_type_key,
|
||||
"power_cap": self.unit_format(logger, requested_power_cap, "W"),
|
||||
"message": f"Successfully set {power_type_key} power cap to {requested_power_cap}W"
|
||||
}
|
||||
return f"Successfully set {power_type_key} power cap to {requested_power_cap}W"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
error_msg = f"[{e.get_error_info(detailed=False)}] Unable to set {power_type_key} power cap to {requested_power_cap}W"
|
||||
if logger.is_json_format() or logger.is_csv_format():
|
||||
return {
|
||||
"status": "error",
|
||||
"sensor": power_type_key,
|
||||
"requested_power_cap": self.unit_format(logger, requested_power_cap, "W"),
|
||||
"error": e.get_error_info(detailed=False),
|
||||
"message": error_msg
|
||||
}
|
||||
return error_msg
|
||||
|
||||
@@ -300,15 +300,19 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
class AMDSMIPowerCapArgs(argparse.Action):
|
||||
def __call__(self, parser: AMDSMIParser, namespace: argparse.Namespace,
|
||||
values: list, option_string: Optional[str] = None) -> None:
|
||||
if len(values) != 2:
|
||||
if len(values) == 1:
|
||||
# Only wattage provided - set all available power cap types
|
||||
power_cap_value = values[0]
|
||||
power_cap_type = None # None means all available sensors
|
||||
elif len(values) == 2:
|
||||
# Both power type and wattage provided
|
||||
power_cap_value = values[0]
|
||||
power_cap_type = values[1]
|
||||
if power_cap_type not in ['ppt0', 'ppt1']:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(sys.argv[1], power_cap_type, output_format)
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(sys.argv[1], values, output_format)
|
||||
|
||||
power_cap_type = values[0]
|
||||
power_cap_value = values[1]
|
||||
|
||||
if power_cap_type not in ['ppt0', 'ppt1']:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(sys.argv[1], power_cap_type, output_format)
|
||||
|
||||
if not power_cap_value.isdigit():
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(sys.argv[1], power_cap_value, output_format)
|
||||
|
||||
@@ -1320,7 +1324,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
ptl_format_help_choices_str = ", ".join(self.helpers.get_ptl_values()[0][0:-1])
|
||||
set_ptl_format_help = f"Set the PTL format on a GPU processor. For example, --ptl-format I8,F32\n\tSet to one of the following PTL formats: {ptl_format_help_choices_str}"
|
||||
ppt0_power_cap_min, ppt0_power_cap_max, ppt1_power_cap_min, ppt1_power_cap_max = self.helpers.get_power_caps()
|
||||
set_power_cap_help = f"Set either PPT0 or PPT1 power capacity limit:\n\tEx: `amd-smi set -o ppt0 1300`\n\tPPT0 min cap: {ppt0_power_cap_min}, PPT0 max cap: {ppt0_power_cap_max}\n\tPPT1 min cap: {ppt1_power_cap_min}, PPT1 max cap: {ppt1_power_cap_max}"
|
||||
set_power_cap_help = f"Set either PPT0 or PPT1 power capacity limit:\n\tEx: `amd-smi set -o 1300 ppt0`\n\tPPT0 min cap: {ppt0_power_cap_min}, PPT0 max cap: {ppt0_power_cap_max}\n\tPPT1 min cap: {ppt1_power_cap_min}, PPT1 max cap: {ppt1_power_cap_max}"
|
||||
set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies. \n\tex: amd-smi set -L (sclk | mclk) (min | max) value"
|
||||
set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis:\n 0 for disable and 1 for enable.\n"
|
||||
|
||||
@@ -1359,7 +1363,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
required=False, help=set_compute_partition_help, metavar=('TYPE/INDEX'))
|
||||
set_value_exclusive_group.add_argument('-M', '--memory-partition', action='store', choices=self.helpers.get_memory_partition_types(), type=str.upper, required=False, help=set_memory_partition_help, metavar='PARTITION')
|
||||
# Power cap is enabled on guest, maintain order
|
||||
set_value_exclusive_group.add_argument('-o', '--power-cap', action=self._power_cap_options(), nargs=2, required=False, help=set_power_cap_help, metavar=('PWR_TYPE', 'WATTS'))
|
||||
set_value_exclusive_group.add_argument('-o', '--power-cap', action=self._power_cap_options(), nargs='+', required=False, help=set_power_cap_help, metavar=('WATTS', '[PWR_TYPE]'))
|
||||
if self.helpers.is_baremetal():
|
||||
set_value_exclusive_group.add_argument('-p', '--soc-pstate', action='store', required=False, type=lambda value: self._not_negative_int(value, '--soc-pstate'), help=set_soc_pstate_help, metavar='POLICY_ID')
|
||||
set_value_exclusive_group.add_argument('-x', '--xgmi-plpd', action='store', required=False, type=lambda value: self._not_negative_int(value, '--xgmi-plpd'), help=set_xgmi_plpd_help, metavar='POLICY_ID')
|
||||
|
||||
@@ -2191,7 +2191,7 @@ def amdsmi_get_supported_power_cap(
|
||||
|
||||
def amdsmi_get_power_cap_info(
|
||||
processor_handle: processor_handle_t,
|
||||
sensor_ind: int = 0
|
||||
sensor_ind: int = AmdSmiPowerCapType.PPT0
|
||||
) -> Dict[str, Any]:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする