[SWDEV-565483] Add power profile set/get to amd-smi CLI (#1905)
* Fix exception handling in power profile commands * Update CHANGELOG.md * Update amdsmi_parser.py for the single character argument for --profile as -o --------- Co-authored-by: Koushik Billakanti <Koushik.Billakanti@amd.com> Co-authored-by: gabrpham <Gabriel.Pham@amd.com> Co-authored-by: Maisam Arif <Maisam.Arif@amd.com>
Este commit está contenido en:
cometido por
GitHub
padre
680a92769c
commit
27be824745
@@ -8,6 +8,41 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
### Added
|
||||
|
||||
- **Added Power Profile set/get/reset to amd-smi CLI**.
|
||||
- New `amd-smi static --profile` command to display current and available power profiles.
|
||||
- New `amd-smi set --profile <PROFILE>` command to set the power profile.
|
||||
- New `amd-smi reset --profile` command to reset power profile back to default (bootup default).
|
||||
- Available profiles: CUSTOM, VIDEO, POWER_SAVING, COMPUTE, VR, 3D_FULL_SCREEN, BOOTUP_DEFAULT.
|
||||
|
||||
```console
|
||||
$ amd-smi static --profile
|
||||
GPU: 0
|
||||
POWER_PROFILE:
|
||||
CURRENT: COMPUTE
|
||||
NUM_PROFILES: 7
|
||||
PROFILES:
|
||||
CUSTOM
|
||||
VIDEO
|
||||
POWER_SAVING
|
||||
COMPUTE
|
||||
VR
|
||||
3D_FULL_SCREEN
|
||||
BOOTUP_DEFAULT
|
||||
```
|
||||
|
||||
```console
|
||||
$ sudo amd-smi set --profile VIDEO
|
||||
GPU: 0
|
||||
PROFILE: Successfully set power profile to VIDEO
|
||||
```
|
||||
|
||||
```console
|
||||
$ sudo amd-smi reset --profile
|
||||
GPU: 0
|
||||
RESET_PROFILE:
|
||||
POWER_PROFILE: Successfully reset Power Profile to default (bootup default)
|
||||
```
|
||||
|
||||
- **Added `os_kernel_version` to `amd-smi static --driver` and `amd-smi` output**.
|
||||
- Displays the Linux kernel version from `os.uname().release`.
|
||||
|
||||
|
||||
@@ -393,7 +393,7 @@ class AMDSMICommands():
|
||||
def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None,
|
||||
limit=None, driver=None, ras=None, board=None, numa=None, vram=None,
|
||||
cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None,
|
||||
soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None):
|
||||
soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None, profile=None):
|
||||
"""Get Static information for target gpu
|
||||
|
||||
Args:
|
||||
@@ -485,8 +485,10 @@ class AMDSMICommands():
|
||||
args.soc_pstate = soc_pstate
|
||||
if xgmi_plpd:
|
||||
args.xgmi_plpd = xgmi_plpd
|
||||
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd"]
|
||||
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd]
|
||||
if profile:
|
||||
args.profile = profile
|
||||
current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd", "profile"]
|
||||
current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd, args.profile]
|
||||
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
if numa:
|
||||
@@ -956,6 +958,30 @@ class AMDSMICommands():
|
||||
static_dict['policies'] = policies_str
|
||||
else:
|
||||
static_dict['xgmi_plpd'] = policy_info
|
||||
if 'profile' in current_platform_args:
|
||||
if args.profile:
|
||||
try:
|
||||
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
|
||||
|
||||
# Parse available profiles from bitfield
|
||||
available_profiles = self.helpers.parse_available_profiles(
|
||||
profile_status['available_profiles']
|
||||
)
|
||||
|
||||
# Get current profile name
|
||||
current_profile = self.helpers.get_profile_name_from_mask(
|
||||
profile_status['current']
|
||||
)
|
||||
|
||||
# Store output
|
||||
static_dict['profile'] = {
|
||||
'available_profiles': available_profiles,
|
||||
'current': current_profile,
|
||||
'num_profiles': profile_status['num_profiles']
|
||||
}
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
static_dict['profile'] = e.get_error_info()
|
||||
logging.debug("Failed to get power profile info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
if 'process_isolation' in current_platform_args:
|
||||
if args.process_isolation:
|
||||
try:
|
||||
@@ -1265,7 +1291,7 @@ class AMDSMICommands():
|
||||
board=None, numa=None, vram=None, cache=None, partition=None,
|
||||
dfc_ucode=None, fb_info=None, num_vf=None, cpu=None,
|
||||
interface_ver=None, soc_pstate=None, xgmi_plpd = None, process_isolation=None,
|
||||
clock=None):
|
||||
clock=None, profile=None):
|
||||
"""Get Static information for target gpu and cpu
|
||||
|
||||
Args:
|
||||
@@ -1317,7 +1343,7 @@ class AMDSMICommands():
|
||||
gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras",
|
||||
"board", "numa", "vram", "cache", "partition",
|
||||
"dfc_ucode", "fb_info", "num_vf", "soc_pstate", "xgmi_plpd",
|
||||
"process_isolation", "clock"]
|
||||
"process_isolation", "clock", "profile"]
|
||||
for attr in gpu_attributes:
|
||||
if hasattr(args, attr):
|
||||
if getattr(args, attr):
|
||||
@@ -1347,8 +1373,8 @@ class AMDSMICommands():
|
||||
self.static_gpu(args, multiple_devices, gpu, asic,
|
||||
bus, vbios, limit, driver, ras,
|
||||
board, numa, vram, cache, partition,
|
||||
dfc_ucode, fb_info, num_vf, soc_pstate,
|
||||
process_isolation, clock)
|
||||
dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd,
|
||||
process_isolation, clock, profile)
|
||||
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
|
||||
if args.cpu == None:
|
||||
args.cpu = self.cpu_handles
|
||||
@@ -1363,7 +1389,7 @@ class AMDSMICommands():
|
||||
bus, vbios, limit, driver, ras,
|
||||
board, numa, vram, cache, partition,
|
||||
dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd,
|
||||
process_isolation, clock)
|
||||
process_isolation, clock, profile)
|
||||
if self.logger.is_json_format():
|
||||
self.logger.combine_arrays_to_json()
|
||||
|
||||
@@ -4778,7 +4804,54 @@ class AMDSMICommands():
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
if args.profile:
|
||||
self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented")
|
||||
try:
|
||||
# Parse profile input (name or number)
|
||||
profile_input = args.profile.upper()
|
||||
name_mapping = self.helpers.get_power_profile_name_mapping()
|
||||
|
||||
if profile_input in name_mapping:
|
||||
profile_mask = name_mapping[profile_input]
|
||||
else:
|
||||
# Invalid profile - show available ones
|
||||
try:
|
||||
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
|
||||
available = self.helpers.parse_available_profiles(profile_status['available_profiles'])
|
||||
available_str = ", ".join(available)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
available_str = "Unable to fetch available profiles"
|
||||
logging.debug(f"Failed to fetch available profiles: {e.get_error_info()}")
|
||||
|
||||
self.logger.store_output(args.gpu, 'profile',
|
||||
f"Invalid profile: {args.profile}\n\nAvailable profiles: {available_str}")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
# Set the profile
|
||||
amdsmi_interface.amdsmi_set_gpu_power_profile(args.gpu, 0, profile_mask)
|
||||
|
||||
self.logger.store_output(args.gpu, 'profile',
|
||||
f"Successfully set power profile to {profile_input}")
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
|
||||
# Get available profiles for error message
|
||||
try:
|
||||
profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0)
|
||||
available = self.helpers.parse_available_profiles(profile_status['available_profiles'])
|
||||
available_str = ", ".join(available)
|
||||
except amdsmi_exception.AmdSmiLibraryException as get_error:
|
||||
available_str = "Unable to fetch available profiles"
|
||||
logging.debug(f"Failed to fetch available profiles: {get_error.get_error_info()}")
|
||||
|
||||
error_msg = f"[{e.get_error_info(detailed=False)}] Unable to set power profile to {args.profile}"
|
||||
self.logger.store_output(args.gpu, 'profile', error_msg)
|
||||
print(f"\nAvailable Power Profiles:\n\t{available_str}\n")
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_output()
|
||||
return
|
||||
|
||||
@@ -890,6 +890,34 @@ class AMDSMIHelpers():
|
||||
return power_profiles_str
|
||||
|
||||
|
||||
def get_power_profile_name_mapping(self):
|
||||
"""Returns dict mapping friendly names to enum values"""
|
||||
return {
|
||||
'CUSTOM': amdsmi_interface.AmdSmiPowerProfilePresetMasks.CUSTOM_MASK,
|
||||
'VIDEO': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VIDEO_MASK,
|
||||
'POWER_SAVING': amdsmi_interface.AmdSmiPowerProfilePresetMasks.POWER_SAVING_MASK,
|
||||
'COMPUTE': amdsmi_interface.AmdSmiPowerProfilePresetMasks.COMPUTE_MASK,
|
||||
'VR': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VR_MASK,
|
||||
'3D_FULL_SCREEN': amdsmi_interface.AmdSmiPowerProfilePresetMasks.THREE_D_FULL_SCR_MASK,
|
||||
'BOOTUP_DEFAULT': amdsmi_interface.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT,
|
||||
}
|
||||
|
||||
|
||||
def get_profile_name_from_mask(self, mask):
|
||||
"""Convert mask value to friendly name"""
|
||||
reverse_mapping = {v: k for k, v in self.get_power_profile_name_mapping().items()}
|
||||
return reverse_mapping.get(mask, 'UNKNOWN')
|
||||
|
||||
|
||||
def parse_available_profiles(self, available_profiles_bitfield):
|
||||
"""Extract list of profile names from bitfield"""
|
||||
profiles = []
|
||||
for name, mask in self.get_power_profile_name_mapping().items():
|
||||
if available_profiles_bitfield & mask:
|
||||
profiles.append(name)
|
||||
return profiles
|
||||
|
||||
|
||||
def get_perf_det_levels(self):
|
||||
perf_det_level_str = [level.name for level in amdsmi_interface.AmdSmiDevPerfLevel]
|
||||
if 'UNKNOWN' in perf_det_level_str:
|
||||
|
||||
@@ -908,6 +908,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
soc_pstate_help = "The available soc pstate policy"
|
||||
xgmi_plpd_help = "The available XGMI per-link power down policy"
|
||||
process_isolation_help = "The process isolation status"
|
||||
profile_help = "Display current and available power profiles"
|
||||
clk_options = self.helpers.get_clock_types()[0]
|
||||
clk_options.remove('PCIE')
|
||||
clk_option_str = ", ".join(clk_options) + ", ALL"
|
||||
@@ -958,6 +959,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
|
||||
static_parser.add_argument('-P', '--soc-pstate', action='store_true', required=False, help=soc_pstate_help)
|
||||
static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
|
||||
static_parser.add_argument('-o', '--profile', action='store_true', required=False, help=profile_help)
|
||||
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
static_parser.add_argument('-u', '--numa', action='store_true', required=False, help=numa_help)
|
||||
|
||||
Referencia en una nueva incidencia
Block a user