[SWDEV-565483] Add power profile set/get to amd-smi CLI (#1905)

* Fix exception handling in power profile commands
* Update CHANGELOG.md
* Update amdsmi_parser.py for the single character argument for --profile as -o

---------

Co-authored-by: Koushik Billakanti <Koushik.Billakanti@amd.com>
Co-authored-by: gabrpham <Gabriel.Pham@amd.com>
Co-authored-by: Maisam Arif <Maisam.Arif@amd.com>
Šī revīzija ir iekļauta:
systems-assistant[bot]
2026-01-28 22:00:18 -06:00
revīziju iesūtīja GitHub
vecāks 680a92769c
revīzija 27be824745
4 mainīti faili ar 147 papildinājumiem un 9 dzēšanām
+35
Parādīt failu
@@ -8,6 +8,41 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Added
- **Added Power Profile set/get/reset to amd-smi CLI**.
- New `amd-smi static --profile` command to display current and available power profiles.
- New `amd-smi set --profile <PROFILE>` command to set the power profile.
- New `amd-smi reset --profile` command to reset power profile back to default (bootup default).
- Available profiles: CUSTOM, VIDEO, POWER_SAVING, COMPUTE, VR, 3D_FULL_SCREEN, BOOTUP_DEFAULT.
```console
$ amd-smi static --profile
GPU: 0
POWER_PROFILE:
CURRENT: COMPUTE
NUM_PROFILES: 7
PROFILES:
CUSTOM
VIDEO
POWER_SAVING
COMPUTE
VR
3D_FULL_SCREEN
BOOTUP_DEFAULT
```
```console
$ sudo amd-smi set --profile VIDEO
GPU: 0
PROFILE: Successfully set power profile to VIDEO
```
```console
$ sudo amd-smi reset --profile
GPU: 0
RESET_PROFILE:
POWER_PROFILE: Successfully reset Power Profile to default (bootup default)
```
- **Added `os_kernel_version` to `amd-smi static --driver` and `amd-smi` output**.
- Displays the Linux kernel version from `os.uname().release`.
@@ -393,7 +393,7 @@ class AMDSMICommands():
def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None,
limit=None, driver=None, ras=None, board=None, numa=None, vram=None,
cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None,
soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None):
soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None, profile=None):
"""Get Static information for target gpu
Args:
@@ -485,8 +485,10 @@ class AMDSMICommands():
args.soc_pstate = soc_pstate
if xgmi_plpd:
args.xgmi_plpd = xgmi_plpd
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd"]
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd]
if profile:
args.profile = profile
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd", "profile"]
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd, args.profile]
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
if numa:
@@ -956,6 +958,30 @@ class AMDSMICommands():
static_dict['policies'] = policies_str
else:
static_dict['xgmi_plpd'] = policy_info
if 'profile' in current_platform_args:
if args.profile:
try:
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
# Parse available profiles from bitfield
available_profiles = self.helpers.parse_available_profiles(
profile_status['available_profiles']
)
# Get current profile name
current_profile = self.helpers.get_profile_name_from_mask(
profile_status['current']
)
# Store output
static_dict['profile'] = {
'available_profiles': available_profiles,
'current': current_profile,
'num_profiles': profile_status['num_profiles']
}
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict['profile'] = e.get_error_info()
logging.debug("Failed to get power profile info for gpu %s | %s", gpu_id, e.get_error_info())
if 'process_isolation' in current_platform_args:
if args.process_isolation:
try:
@@ -1265,7 +1291,7 @@ class AMDSMICommands():
board=None, numa=None, vram=None, cache=None, partition=None,
dfc_ucode=None, fb_info=None, num_vf=None, cpu=None,
interface_ver=None, soc_pstate=None, xgmi_plpd = None, process_isolation=None,
clock=None):
clock=None, profile=None):
"""Get Static information for target gpu and cpu
Args:
@@ -1317,7 +1343,7 @@ class AMDSMICommands():
gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras",
"board", "numa", "vram", "cache", "partition",
"dfc_ucode", "fb_info", "num_vf", "soc_pstate", "xgmi_plpd",
"process_isolation", "clock"]
"process_isolation", "clock", "profile"]
for attr in gpu_attributes:
if hasattr(args, attr):
if getattr(args, attr):
@@ -1347,8 +1373,8 @@ class AMDSMICommands():
self.static_gpu(args, multiple_devices, gpu, asic,
bus, vbios, limit, driver, ras,
board, numa, vram, cache, partition,
dfc_ucode, fb_info, num_vf, soc_pstate,
process_isolation, clock)
dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd,
process_isolation, clock, profile)
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
if args.cpu == None:
args.cpu = self.cpu_handles
@@ -1363,7 +1389,7 @@ class AMDSMICommands():
bus, vbios, limit, driver, ras,
board, numa, vram, cache, partition,
dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd,
process_isolation, clock)
process_isolation, clock, profile)
if self.logger.is_json_format():
self.logger.combine_arrays_to_json()
@@ -4778,7 +4804,54 @@ class AMDSMICommands():
self.logger.clear_multiple_devices_output()
return
if args.profile:
self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented")
try:
# Parse profile input (name or number)
profile_input = args.profile.upper()
name_mapping = self.helpers.get_power_profile_name_mapping()
if profile_input in name_mapping:
profile_mask = name_mapping[profile_input]
else:
# Invalid profile - show available ones
try:
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
available = self.helpers.parse_available_profiles(profile_status['available_profiles'])
available_str = ", ".join(available)
except amdsmi_exception.AmdSmiLibraryException as e:
available_str = "Unable to fetch available profiles"
logging.debug(f"Failed to fetch available profiles: {e.get_error_info()}")
self.logger.store_output(args.gpu, 'profile',
f"Invalid profile: {args.profile}\n\nAvailable profiles: {available_str}")
self.logger.print_output()
self.logger.clear_multiple_devices_output()
return
# Set the profile
amdsmi_interface.amdsmi_set_gpu_power_profile(args.gpu, 0, profile_mask)
self.logger.store_output(args.gpu, 'profile',
f"Successfully set power profile to {profile_input}")
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
# Get available profiles for error message
try:
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
available = self.helpers.parse_available_profiles(profile_status['available_profiles'])
available_str = ", ".join(available)
except amdsmi_exception.AmdSmiLibraryException as get_error:
available_str = "Unable to fetch available profiles"
logging.debug(f"Failed to fetch available profiles: {get_error.get_error_info()}")
error_msg = f"[{e.get_error_info(detailed=False)}] Unable to set power profile to {args.profile}"
self.logger.store_output(args.gpu, 'profile', error_msg)
print(f"\nAvailable Power Profiles:\n\t{available_str}\n")
self.logger.print_output()
self.logger.clear_multiple_devices_output()
return
self.logger.print_output()
self.logger.clear_multiple_devices_output()
return
@@ -890,6 +890,34 @@ class AMDSMIHelpers():
return power_profiles_str
def get_power_profile_name_mapping(self):
"""Returns dict mapping friendly names to enum values"""
return {
'CUSTOM': amdsmi_interface.AmdSmiPowerProfilePresetMasks.CUSTOM_MASK,
'VIDEO': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VIDEO_MASK,
'POWER_SAVING': amdsmi_interface.AmdSmiPowerProfilePresetMasks.POWER_SAVING_MASK,
'COMPUTE': amdsmi_interface.AmdSmiPowerProfilePresetMasks.COMPUTE_MASK,
'VR': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VR_MASK,
'3D_FULL_SCREEN': amdsmi_interface.AmdSmiPowerProfilePresetMasks.THREE_D_FULL_SCR_MASK,
'BOOTUP_DEFAULT': amdsmi_interface.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT,
}
def get_profile_name_from_mask(self, mask):
"""Convert mask value to friendly name"""
reverse_mapping = {v: k for k, v in self.get_power_profile_name_mapping().items()}
return reverse_mapping.get(mask, 'UNKNOWN')
def parse_available_profiles(self, available_profiles_bitfield):
"""Extract list of profile names from bitfield"""
profiles = []
for name, mask in self.get_power_profile_name_mapping().items():
if available_profiles_bitfield & mask:
profiles.append(name)
return profiles
def get_perf_det_levels(self):
perf_det_level_str = [level.name for level in amdsmi_interface.AmdSmiDevPerfLevel]
if 'UNKNOWN' in perf_det_level_str:
@@ -908,6 +908,7 @@ class AMDSMIParser(argparse.ArgumentParser):
soc_pstate_help = "The available soc pstate policy"
xgmi_plpd_help = "The available XGMI per-link power down policy"
process_isolation_help = "The process isolation status"
profile_help = "Display current and available power profiles"
clk_options = self.helpers.get_clock_types()[0]
clk_options.remove('PCIE')
clk_option_str = ", ".join(clk_options) + ", ALL"
@@ -958,6 +959,7 @@ class AMDSMIParser(argparse.ArgumentParser):
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
static_parser.add_argument('-P', '--soc-pstate', action='store_true', required=False, help=soc_pstate_help)
static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
static_parser.add_argument('-o', '--profile', action='store_true', required=False, help=profile_help)
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
static_parser.add_argument('-u', '--numa', action='store_true', required=False, help=numa_help)