From 0c4db0414043f00c6bcedec0cf583ed3480e760f Mon Sep 17 00:00:00 2001 From: "Pham, Gabriel" Date: Fri, 20 Dec 2024 16:32:10 -0500 Subject: [PATCH] [SWDEV-476303] Exposed valid values for set command (#8) Updated amd-smi set help text --------- Signed-off-by: gabrpham Signed-off-by: Pham, Gabriel [ROCm/amdsmi commit: 93a027ec951b90e7a543fac62d6b0cacb3bd444e] --- projects/amdsmi/CHANGELOG.md | 9 +++ projects/amdsmi/amdsmi_cli/amdsmi_helpers.py | 61 ++++++++++++++++++++ projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 27 ++++++--- 3 files changed, 88 insertions(+), 9 deletions(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 18d30fdeb0..ba8036dc0b 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -198,6 +198,15 @@ GPU: 1 ### Optimized +- **Added additional help information to `amd-smi set --help` command**. + - sub commands now detail what values are acceptable as input. These include: + - `amd-smi set --perf-level` with performance levels + - `amd-smi set --profile` with power profiles + - `amd-smi set --perf-determinism` with preset GPU frequency limits + - `amd-smi set --power-cap` with valid power cap values + - `amd-smi set --soc-pstate` with soc pstate policy ids + - `amd-smi set --xgmi-plpd` with xgmi per link power down policy ids + - **Modified `amd-smi` CLI to allow case insensitive arguments if the argument does not begin with a single dash**. - With this change `amd-smi version` and `amd-smi VERSION` will now yield the same output. - `amd-smi static --bus` and `amd-smi STATIC --BUS` will produce identical results. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index 5c53583df3..8f4801a46d 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -701,6 +701,67 @@ class AMDSMIHelpers(): return clock_types_str, clock_types_int + def get_power_profiles(self): + power_profiles_str = [profile.name for profile in amdsmi_interface.AmdSmiPowerProfilePresetMasks] + if 'UNKNOWN' in power_profiles_str: + power_profiles_str.remove('UNKNOWN') + return power_profiles_str + + + def get_perf_det_levels(self): + perf_det_level_str = [level.name for level in amdsmi_interface.AmdSmiDevPerfLevel] + if 'UNKNOWN' in perf_det_level_str: + perf_det_level_str.remove('UNKNOWN') + return perf_det_level_str + + + def get_power_caps(self): + device_handles = amdsmi_interface.amdsmi_get_processor_handles() + power_cap_min = amdsmi_interface.MaxUIntegerTypes.UINT64_T # start out at max and min and then find real min and max + power_cap_max = 0 + for dev in device_handles: + power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(dev) + if power_cap_info['max_power_cap'] > power_cap_max: + power_cap_max = power_cap_info['max_power_cap'] + if power_cap_info['min_power_cap'] < power_cap_max: + power_cap_min = power_cap_info['min_power_cap'] + return (power_cap_min, power_cap_max) + + + def get_soc_pstates(self): + device_handles = amdsmi_interface.amdsmi_get_processor_handles() + soc_pstate_profile_list = [] + for dev in device_handles: + try: + soc_pstate_info = amdsmi_interface.amdsmi_get_soc_pstate(dev) + except amdsmi_interface.AmdSmiLibraryException as e: + continue + for policy in soc_pstate_info['policies']: + policy_string = f"{policy['policy_id']}: {policy['policy_description']}" + if not policy_string in soc_pstate_profile_list: + soc_pstate_profile_list.append(policy_string) + if len(soc_pstate_profile_list) == 0: + soc_pstate_profile_list.append("N/A") + return soc_pstate_profile_list + + + def get_xgmi_plpd_policies(self): + device_handles = amdsmi_interface.amdsmi_get_processor_handles() + xgmi_plpd_profile_list = [] + for dev in device_handles: + try: + xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(dev) + except amdsmi_interface.AmdSmiLibraryException as e: + continue + for policy in xgmi_plpd_info['plpds']: + policy_string = f"{policy['policy_id']}: {policy['policy_description']}" + if not policy_string in xgmi_plpd_profile_list: + xgmi_plpd_profile_list.append(policy_string) + if len(xgmi_plpd_profile_list) == 0: + xgmi_plpd_profile_list.append("N/A") + return xgmi_plpd_profile_list + + def validate_clock_type(self, input_clock_type): valid_clock_types_str, valid_clock_types_int = self.get_clock_types() diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index c178d3041c..8371b1348f 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -25,6 +25,7 @@ import os import sys import time import collections +from amdsmi import amdsmi_interface from typing import Optional from typing import Union @@ -1072,19 +1073,27 @@ class AMDSMIParser(argparse.ArgumentParser): # Help text for Arguments only on BM platforms set_fan_help = "Set GPU fan speed (0-255 or 0-100%%)" - set_perf_level_help = "Set performance level" - set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes" - set_perf_det_help = "Set GPU clock frequency limit and performance level to determinism\n to get minimal performance variation" + perf_level_help_choices_str = ", ".join(self.helpers.get_perf_levels()[0][0:-1]) + set_perf_level_help = f"Set one of the following performance levels:\n\t{perf_level_help_choices_str}" + power_profile_choices_str = ", ".join(self.helpers.get_power_profiles()[0:-1]) + set_profile_help = f"Set power profile level (#) or choose one of available profiles:\n\t{power_profile_choices_str}" + perf_det_choices_str = ", ".join(self.helpers.get_perf_det_levels()) + set_perf_det_help = f"Set performance determinism and select one of the corresponding performance levels:\n\t{perf_det_choices_str}" compute_partition_choices_str = ", ".join(self.helpers.get_compute_partition_types()) memory_partition_choices_str = ", ".join(self.helpers.get_memory_partition_types()) set_compute_partition_help = f"Set one of the following the compute partition modes:\n\t{compute_partition_choices_str}" set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}" - set_power_cap_help = "Set power capacity limit" - set_soc_pstate_help = "Set the GPU soc pstate policy using policy id\n" - set_xgmi_plpd_help = "Set the GPU XGMI per-link power down policy using policy id\n" - set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies:\n\tamd-smi set -L (sclk | mclk) (min | max) value" - set_clock_freq_help = "Set the sclk (aka gfxclk), mclk, fclk, pcie, or socclk frequency performance level.\nCan take range of acceptable levels." - set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis:\n\t0 for disable and 1 for enable.\n" + power_cap_min, power_cap_max = self.helpers.get_power_caps() + power_cap_max = self.helpers.convert_SI_unit(power_cap_max, AMDSMIHelpers.SI_Unit.MICRO) + power_cap_min = self.helpers.convert_SI_unit(power_cap_min, AMDSMIHelpers.SI_Unit.MICRO) + set_power_cap_help = f"Set power capacity limit:\n\tmin cap: {power_cap_min} W, max cap: {power_cap_max} W" + soc_pstate_help_info = ", ".join(self.helpers.get_soc_pstates()) + set_soc_pstate_help = f"Set the GPU soc pstate policy using policy id, an integer. Valid id's include:\n\t{soc_pstate_help_info}" + xgmi_plpd_help_info = ", ".join(self.helpers.get_xgmi_plpd_policies()) + set_xgmi_plpd_help = f"Set the GPU XGMI per-link power down policy using policy id, an integer. Valid id's include:\n\t{xgmi_plpd_help_info}" + set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies. \n\tex: amd-smi set -L (sclk | mclk) (min | max) value" + set_clock_freq_help = "Set a number of sclk (aka gfxclk), mclk, fclk, pcie, or socclk frequency performance levels.\n\tUse `amd-smi static --clock` to find acceptable levels." + set_process_isolation_help = "Enable or disable the GPU process isolation on a per partition basis: 0 for disable and 1 for enable.\n" # Help text for CPU set options set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value."