[SWDEV-540665] Add power_cap set to Linux Guest (#626)
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: I3c8d707681c141390b40521231e0d638c81cdeaf
[ROCm/amdsmi commit: 2d5accd000]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
7ab967ec69
Коммит
4e568b2eea
@@ -4463,8 +4463,9 @@ class AMDSMICommands():
|
||||
command = " ".join(sys.argv[1:])
|
||||
raise AmdSmiRequiredCommandException(command, self.logger.format)
|
||||
else:
|
||||
if not any([args.process_isolation is not None,
|
||||
args.clk_limit is not None]):
|
||||
if not any([args.power_cap is not None,
|
||||
args.clk_limit is not None,
|
||||
args.process_isolation is not None]):
|
||||
command = " ".join(sys.argv[1:])
|
||||
raise AmdSmiRequiredCommandException(command, self.logger.format)
|
||||
|
||||
@@ -4590,7 +4591,6 @@ class AMDSMICommands():
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if args.memory_partition:
|
||||
####################################################################
|
||||
# Get current and available memory partition modes #
|
||||
@@ -4634,52 +4634,6 @@ class AMDSMICommands():
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if isinstance(args.power_cap, int):
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
logging.debug(f"Power cap info for gpu {gpu_id} | {power_cap_info}")
|
||||
min_power_cap = power_cap_info["min_power_cap"]
|
||||
min_power_cap = self.helpers.convert_SI_unit(min_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
max_power_cap = power_cap_info["max_power_cap"]
|
||||
max_power_cap = self.helpers.convert_SI_unit(max_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
current_power_cap = power_cap_info["power_cap"]
|
||||
current_power_cap = self.helpers.convert_SI_unit(current_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
min_power_cap = "N/A"
|
||||
max_power_cap = "N/A"
|
||||
current_power_cap = "N/A"
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set power cap to {args.power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if args.power_cap == current_power_cap:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap is already set to {args.power_cap}W")
|
||||
elif current_power_cap == 0:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Unable to set power cap to {args.power_cap}W, current value is {current_power_cap}W")
|
||||
elif args.power_cap >= min_power_cap and args.power_cap <= max_power_cap:
|
||||
try:
|
||||
new_power_cap = self.helpers.convert_SI_unit(args.power_cap, AMDSMIHelpers.SI_Unit.BASE,
|
||||
AMDSMIHelpers.SI_Unit.MICRO)
|
||||
amdsmi_interface.amdsmi_set_power_cap(args.gpu, 0, new_power_cap)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set power cap to {args.power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Successfully set power cap to {args.power_cap}W")
|
||||
else:
|
||||
# setting power cap to 0 will return the current power cap so the technical minimum value is 1
|
||||
if min_power_cap == 0:
|
||||
min_power_cap = 1
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap must be between {min_power_cap}W and {max_power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
if isinstance(args.soc_pstate, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_soc_pstate(args.gpu, args.soc_pstate)
|
||||
@@ -4819,7 +4773,52 @@ class AMDSMICommands():
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
# Universal args
|
||||
if isinstance(args.power_cap, int):
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
logging.debug(f"Power cap info for gpu {gpu_id} | {power_cap_info}")
|
||||
min_power_cap = power_cap_info["min_power_cap"]
|
||||
min_power_cap = self.helpers.convert_SI_unit(min_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
max_power_cap = power_cap_info["max_power_cap"]
|
||||
max_power_cap = self.helpers.convert_SI_unit(max_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
current_power_cap = power_cap_info["power_cap"]
|
||||
current_power_cap = self.helpers.convert_SI_unit(current_power_cap, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
min_power_cap = "N/A"
|
||||
max_power_cap = "N/A"
|
||||
current_power_cap = "N/A"
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set power cap to {args.power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if args.power_cap == current_power_cap:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap is already set to {args.power_cap}W")
|
||||
elif current_power_cap == 0:
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Unable to set power cap to {args.power_cap}W, current value is {current_power_cap}W")
|
||||
elif args.power_cap >= min_power_cap and args.power_cap <= max_power_cap:
|
||||
try:
|
||||
new_power_cap = self.helpers.convert_SI_unit(args.power_cap, AMDSMIHelpers.SI_Unit.BASE,
|
||||
AMDSMIHelpers.SI_Unit.MICRO)
|
||||
amdsmi_interface.amdsmi_set_power_cap(args.gpu, 0, new_power_cap)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
self.logger.store_output(args.gpu, 'powercap', f"[{e.get_error_info(detailed=False)}] Unable to set power cap to {args.power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Successfully set power cap to {args.power_cap}W")
|
||||
else:
|
||||
# setting power cap to 0 will return the current power cap so the technical minimum value is 1
|
||||
if min_power_cap == 0:
|
||||
min_power_cap = 1
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap must be between {min_power_cap}W and {max_power_cap}W")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
if isinstance(args.clk_limit, tuple):
|
||||
clk_type = args.clk_limit.clk_type
|
||||
lim_type = args.clk_limit.lim_type
|
||||
@@ -4886,7 +4885,6 @@ class AMDSMICommands():
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
if isinstance(args.process_isolation, int):
|
||||
status_string = "Enabled" if args.process_isolation else "Disabled"
|
||||
result = f"Requested process isolation to {status_string}" # This should not print out
|
||||
|
||||
@@ -821,6 +821,7 @@ class AMDSMIHelpers():
|
||||
power_cap_min = power_cap_info['min_power_cap']
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
logging.debug(f"AMDSMIHelpers.get_power_caps - Unable to get power cap info for device {dev}: {str(e)}")
|
||||
power_cap_min, power_cap_max = "N/A", "N/A"
|
||||
continue
|
||||
return (power_cap_min, power_cap_max)
|
||||
|
||||
|
||||
@@ -1220,15 +1220,17 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
memory_partition_choices_str = ", ".join(self.helpers.get_memory_partition_types())
|
||||
set_compute_partition_help = f"Set one of the following the accelerator TYPE or profile INDEX:\n\t{accelerator_set_choices}.\n\tUse `sudo amd-smi partition --accelerator` to find acceptable values."
|
||||
set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}"
|
||||
power_cap_min, power_cap_max = self.helpers.get_power_caps()
|
||||
power_cap_max = self.helpers.convert_SI_unit(power_cap_max, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
power_cap_min = self.helpers.convert_SI_unit(power_cap_min, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
set_power_cap_help = f"Set power capacity limit:\n\tmin cap: {power_cap_min} W, max cap: {power_cap_max} W"
|
||||
soc_pstate_help_info = ", ".join(self.helpers.get_soc_pstates())
|
||||
set_soc_pstate_help = f"Set the GPU soc pstate policy using policy id, an integer. Valid id's include:\n\t{soc_pstate_help_info}"
|
||||
xgmi_plpd_help_info = ", ".join(self.helpers.get_xgmi_plpd_policies())
|
||||
set_xgmi_plpd_help = f"Set the GPU XGMI per-link power down policy using policy id, an integer. Valid id's include:\n\t{xgmi_plpd_help_info}"
|
||||
set_clock_freq_help = "Set one or more sclk (aka gfxclk), mclk, fclk, pcie, or socclk frequency levels.\n\tUse `amd-smi static --clock` to find acceptable levels."
|
||||
power_cap_min, power_cap_max = self.helpers.get_power_caps()
|
||||
if power_cap_max != "N/A":
|
||||
power_cap_max = self.helpers.convert_SI_unit(power_cap_max, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
if power_cap_min != "N/A":
|
||||
power_cap_min = self.helpers.convert_SI_unit(power_cap_min, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
set_power_cap_help = f"Set power capacity limit:\n\tmin cap: {power_cap_min} W, max cap: {power_cap_max} W"
|
||||
set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies. \n\tex: amd-smi set -L (sclk | mclk) (min | max) value"
|
||||
set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis: 0 for disable and 1 for enable.\n"
|
||||
|
||||
@@ -1266,7 +1268,9 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_exclusive_group.add_argument('-C', '--compute-partition', action='store', choices=accelerator_set_choices, type=lambda value: self._is_command_supported(value, accelerator_set_choices, '--compute-partition'),
|
||||
required=False, help=set_compute_partition_help, metavar=('TYPE/INDEX'))
|
||||
set_value_exclusive_group.add_argument('-M', '--memory-partition', action='store', choices=self.helpers.get_memory_partition_types(), type=str.upper, required=False, help=set_memory_partition_help, metavar='PARTITION')
|
||||
set_value_exclusive_group.add_argument('-o', '--power-cap', action='store', type=lambda value: self._positive_int(value, '--power-cap'), required=False, help=set_power_cap_help, metavar='WATTS')
|
||||
# Power cap is enabled on guest, maintain order
|
||||
set_value_exclusive_group.add_argument('-o', '--power-cap', action='store', type=lambda value: self._positive_int(value, '--power-cap'), required=False, help=set_power_cap_help, metavar='WATTS')
|
||||
if self.helpers.is_baremetal():
|
||||
set_value_exclusive_group.add_argument('-p', '--soc-pstate', action='store', required=False, type=lambda value: self._not_negative_int(value, '--soc-pstate'), help=set_soc_pstate_help, metavar='POLICY_ID')
|
||||
set_value_exclusive_group.add_argument('-x', '--xgmi-plpd', action='store', required=False, type=lambda value: self._not_negative_int(value, '--xgmi-plpd'), help=set_xgmi_plpd_help, metavar='POLICY_ID')
|
||||
set_value_exclusive_group.add_argument('-c', '--clk-level', action=self._level_select(), nargs='+', required=False, help=set_clock_freq_help, metavar=('CLK_TYPE', 'FREQ_LEVELS'))
|
||||
|
||||
Ссылка в новой задаче
Block a user