diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index f2ccdb1338..c1d26e3f55 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -8,6 +8,41 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Added +- **Added Power Profile set/get/reset to amd-smi CLI**. + - New `amd-smi static --profile` command to display current and available power profiles. + - New `amd-smi set --profile ` command to set the power profile. + - New `amd-smi reset --profile` command to reset power profile back to default (bootup default). + - Available profiles: CUSTOM, VIDEO, POWER_SAVING, COMPUTE, VR, 3D_FULL_SCREEN, BOOTUP_DEFAULT. + + ```console + $ amd-smi static --profile + GPU: 0 + POWER_PROFILE: + CURRENT: COMPUTE + NUM_PROFILES: 7 + PROFILES: + CUSTOM + VIDEO + POWER_SAVING + COMPUTE + VR + 3D_FULL_SCREEN + BOOTUP_DEFAULT + ``` + + ```console + $ sudo amd-smi set --profile VIDEO + GPU: 0 + PROFILE: Successfully set power profile to VIDEO + ``` + + ```console + $ sudo amd-smi reset --profile + GPU: 0 + RESET_PROFILE: + POWER_PROFILE: Successfully reset Power Profile to default (bootup default) + ``` + - **Added `os_kernel_version` to `amd-smi static --driver` and `amd-smi` output**. - Displays the Linux kernel version from `os.uname().release`. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index e1a2222362..5806324168 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -393,7 +393,7 @@ class AMDSMICommands(): def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None, limit=None, driver=None, ras=None, board=None, numa=None, vram=None, cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None, - soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None): + soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None, profile=None): """Get Static information for target gpu Args: @@ -485,8 +485,10 @@ class AMDSMICommands(): args.soc_pstate = soc_pstate if xgmi_plpd: args.xgmi_plpd = xgmi_plpd - current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd"] - current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd] + if profile: + args.profile = profile + current_platform_args += ["ras", "limit", "soc_pstate", "xgmi_plpd", "profile"] + current_platform_values += [args.ras, args.limit, args.soc_pstate, args.xgmi_plpd, args.profile] if self.helpers.is_linux() and not self.helpers.is_virtual_os(): if numa: @@ -956,6 +958,30 @@ class AMDSMICommands(): static_dict['policies'] = policies_str else: static_dict['xgmi_plpd'] = policy_info + if 'profile' in current_platform_args: + if args.profile: + try: + profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0) + + # Parse available profiles from bitfield + available_profiles = self.helpers.parse_available_profiles( + profile_status['available_profiles'] + ) + + # Get current profile name + current_profile = self.helpers.get_profile_name_from_mask( + profile_status['current'] + ) + + # Store output + static_dict['profile'] = { + 'available_profiles': available_profiles, + 'current': current_profile, + 'num_profiles': profile_status['num_profiles'] + } + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict['profile'] = e.get_error_info() + logging.debug("Failed to get power profile info for gpu %s | %s", gpu_id, e.get_error_info()) if 'process_isolation' in current_platform_args: if args.process_isolation: try: @@ -1265,7 +1291,7 @@ class AMDSMICommands(): board=None, numa=None, vram=None, cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None, cpu=None, interface_ver=None, soc_pstate=None, xgmi_plpd = None, process_isolation=None, - clock=None): + clock=None, profile=None): """Get Static information for target gpu and cpu Args: @@ -1317,7 +1343,7 @@ class AMDSMICommands(): gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras", "board", "numa", "vram", "cache", "partition", "dfc_ucode", "fb_info", "num_vf", "soc_pstate", "xgmi_plpd", - "process_isolation", "clock"] + "process_isolation", "clock", "profile"] for attr in gpu_attributes: if hasattr(args, attr): if getattr(args, attr): @@ -1347,8 +1373,8 @@ class AMDSMICommands(): self.static_gpu(args, multiple_devices, gpu, asic, bus, vbios, limit, driver, ras, board, numa, vram, cache, partition, - dfc_ucode, fb_info, num_vf, soc_pstate, - process_isolation, clock) + dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd, + process_isolation, clock, profile) elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized if args.cpu == None: args.cpu = self.cpu_handles @@ -1363,7 +1389,7 @@ class AMDSMICommands(): bus, vbios, limit, driver, ras, board, numa, vram, cache, partition, dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd, - process_isolation, clock) + process_isolation, clock, profile) if self.logger.is_json_format(): self.logger.combine_arrays_to_json() @@ -4778,7 +4804,54 @@ class AMDSMICommands(): self.logger.clear_multiple_devices_output() return if args.profile: - self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented") + try: + # Parse profile input (name or number) + profile_input = args.profile.upper() + name_mapping = self.helpers.get_power_profile_name_mapping() + + if profile_input in name_mapping: + profile_mask = name_mapping[profile_input] + else: + # Invalid profile - show available ones + try: + profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0) + available = self.helpers.parse_available_profiles(profile_status['available_profiles']) + available_str = ", ".join(available) + except amdsmi_exception.AmdSmiLibraryException as e: + available_str = "Unable to fetch available profiles" + logging.debug(f"Failed to fetch available profiles: {e.get_error_info()}") + + self.logger.store_output(args.gpu, 'profile', + f"Invalid profile: {args.profile}\n\nAvailable profiles: {available_str}") + self.logger.print_output() + self.logger.clear_multiple_devices_output() + return + + # Set the profile + amdsmi_interface.amdsmi_set_gpu_power_profile(args.gpu, 0, profile_mask) + + self.logger.store_output(args.gpu, 'profile', + f"Successfully set power profile to {profile_input}") + except amdsmi_exception.AmdSmiLibraryException as e: + if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: + raise PermissionError('Command requires elevation') from e + + # Get available profiles for error message + try: + profile_status = amdsmi_interface.amdsmi_get_gpu_power_profile_presets(args.gpu, 0) + available = self.helpers.parse_available_profiles(profile_status['available_profiles']) + available_str = ", ".join(available) + except amdsmi_exception.AmdSmiLibraryException as get_error: + available_str = "Unable to fetch available profiles" + logging.debug(f"Failed to fetch available profiles: {get_error.get_error_info()}") + + error_msg = f"[{e.get_error_info(detailed=False)}] Unable to set power profile to {args.profile}" + self.logger.store_output(args.gpu, 'profile', error_msg) + print(f"\nAvailable Power Profiles:\n\t{available_str}\n") + self.logger.print_output() + self.logger.clear_multiple_devices_output() + return + self.logger.print_output() self.logger.clear_multiple_devices_output() return diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index 9f46c0caf4..d2e4d25a3e 100755 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -890,6 +890,34 @@ class AMDSMIHelpers(): return power_profiles_str + def get_power_profile_name_mapping(self): + """Returns dict mapping friendly names to enum values""" + return { + 'CUSTOM': amdsmi_interface.AmdSmiPowerProfilePresetMasks.CUSTOM_MASK, + 'VIDEO': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VIDEO_MASK, + 'POWER_SAVING': amdsmi_interface.AmdSmiPowerProfilePresetMasks.POWER_SAVING_MASK, + 'COMPUTE': amdsmi_interface.AmdSmiPowerProfilePresetMasks.COMPUTE_MASK, + 'VR': amdsmi_interface.AmdSmiPowerProfilePresetMasks.VR_MASK, + '3D_FULL_SCREEN': amdsmi_interface.AmdSmiPowerProfilePresetMasks.THREE_D_FULL_SCR_MASK, + 'BOOTUP_DEFAULT': amdsmi_interface.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT, + } + + + def get_profile_name_from_mask(self, mask): + """Convert mask value to friendly name""" + reverse_mapping = {v: k for k, v in self.get_power_profile_name_mapping().items()} + return reverse_mapping.get(mask, 'UNKNOWN') + + + def parse_available_profiles(self, available_profiles_bitfield): + """Extract list of profile names from bitfield""" + profiles = [] + for name, mask in self.get_power_profile_name_mapping().items(): + if available_profiles_bitfield & mask: + profiles.append(name) + return profiles + + def get_perf_det_levels(self): perf_det_level_str = [level.name for level in amdsmi_interface.AmdSmiDevPerfLevel] if 'UNKNOWN' in perf_det_level_str: diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index f172c3a3ae..5b57e44cd5 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -908,6 +908,7 @@ class AMDSMIParser(argparse.ArgumentParser): soc_pstate_help = "The available soc pstate policy" xgmi_plpd_help = "The available XGMI per-link power down policy" process_isolation_help = "The process isolation status" + profile_help = "Display current and available power profiles" clk_options = self.helpers.get_clock_types()[0] clk_options.remove('PCIE') clk_option_str = ", ".join(clk_options) + ", ALL" @@ -958,6 +959,7 @@ class AMDSMIParser(argparse.ArgumentParser): static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help) static_parser.add_argument('-P', '--soc-pstate', action='store_true', required=False, help=soc_pstate_help) static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help) + static_parser.add_argument('-o', '--profile', action='store_true', required=False, help=profile_help) if self.helpers.is_linux() and not self.helpers.is_virtual_os(): static_parser.add_argument('-u', '--numa', action='store_true', required=False, help=numa_help)