From cb42e8e444c768ba972ebc6e29e3a4e566d30849 Mon Sep 17 00:00:00 2001 From: gabrpham Date: Wed, 4 Dec 2024 00:10:32 -0600 Subject: [PATCH] [SWDEV-484382] Added new command `amd-smi static --clock` Signed-off-by: gabrpham Change-Id: I49e1aa2e699734d81c40c76c62da1cecc5bd3c0e [ROCm/amdsmi commit: bc16e1a5da5fed0330d193c51fed0157595abfc4] --- projects/amdsmi/CHANGELOG.md | 41 ++++++++++ projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 76 +++++++++++++++++-- projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 5 ++ .../amdsmi/docs/how-to/amdsmi-cli-tool.md | 32 ++++++++ 4 files changed, 146 insertions(+), 8 deletions(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index a41098f0a9..d6749e6226 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -3,6 +3,47 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/projects/amdsmi](https://rocm.docs.amd.com/projects/amdsmi/en/latest/). ***All information listed below is for reference and subject to change.*** +## amd_smi_lib for ROCm 6.4.0 + +### Added + +- **Added new command `amd-smi static -C/--clock`**. + This new command displays the clock frequency performance levels for the selected GPUs and clocks. + +```shell +amd-smi static --clock all -g 0 +GPU: 0 + CLOCK: + SYS: + CURRENT LEVEL: 2 + FREQUENCY_LEVELS: + 0: 300 MHz + 1: 904 MHz + 2: 1165 MHz + 3: 1360 MHz + 4: 1440 MHz + 5: 1544 MHz + 6: 1627 MHz + 7: 1720 MHz + 8: 1800 MHz + MEM: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 167 MHz + DF: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 1400 MHz + SOC: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 302 MHz + DCEF: N/A + VCLK0: N/A + VCLK1: N/A + DCLK0: N/A + DCLK1: N/A +``` ## amd_smi_lib for ROCm 6.4.0 diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index fdb2d80408..9262e02ca7 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -31,7 +31,7 @@ import os from _version import __version__ from amdsmi_helpers import AMDSMIHelpers from amdsmi_logger import AMDSMILogger -from amdsmi_cli_exceptions import AmdSmiRequiredCommandException +from amdsmi_cli_exceptions import AmdSmiRequiredCommandException, AmdSmiInvalidParameterValueException from rocm_version import get_rocm_version from amdsmi import amdsmi_interface from amdsmi import amdsmi_exception @@ -271,7 +271,7 @@ class AMDSMICommands(): def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None, limit=None, driver=None, ras=None, board=None, numa=None, vram=None, cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None, - soc_pstate=None, xgmi_plpd=None, process_isolation=None): + soc_pstate=None, xgmi_plpd=None, process_isolation=None, clock=None): """Get Static information for target gpu Args: @@ -321,12 +321,19 @@ class AMDSMICommands(): args.cache = cache if process_isolation: args.process_isolation = process_isolation + if clock: + args.clock = clock + # args.clock defaults to False so if it was overwritten to empty list, that indicates that it was given as an arguments but with an empty list + if args.clock == []: + args.clock = True # Store args that are applicable to the current platform current_platform_args = ["asic", "bus", "vbios", "driver", "ras", - "vram", "cache", "board", "process_isolation"] + "vram", "cache", "board", "process_isolation", + "clock"] current_platform_values = [args.asic, args.bus, args.vbios, args.driver, args.ras, - args.vram, args.cache, args.board, args.process_isolation] + args.vram, args.cache, args.board, args.process_isolation, + args.clock] if self.helpers.is_linux() and self.helpers.is_baremetal(): if partition: @@ -829,6 +836,58 @@ class AMDSMICommands(): logging.debug("Failed to get cache info for gpu %s | %s", gpu_id, e.get_error_info()) static_dict['cache_info'] = cache_info_list + if 'clock' in current_platform_args: + if isinstance(args.clock, bool) and args.clock == True: + args.clock = ['sys', 'mem', 'df', 'soc', 'dcef', 'vclk0', 'vclk1', 'dclk0', 'dclk1'] + if isinstance(args.clock, list): + # remove potential duplicates from list + args.clock = list(set(args.clock)) + # check that clock is valid option + if "all" in args.clock or len(args.clock) == 0: + args.clock = ['sys', 'mem', 'df', 'soc', 'dcef', 'vclk0', 'vclk1', 'dclk0', 'dclk1'] + clk_dict = {} + + for clk in args.clock: + clk_type = clk.lower() + if clk_type == "sys": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.SYS + elif clk_type == "mem": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.MEM + elif clk_type == "df": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.DF + elif clk_type == "soc": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.SOC + elif clk_type == "dcef": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCEF + # vclk and dclk currently do not support levels so average clk is given for frequency levels + elif clk_type == "vclk0": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK0 + elif clk_type == "vclk1": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK1 + elif clk_type == "dclk0": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK0 + elif clk_type == "dclk1": + clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK1 + else: + clk_type_conversion = "N/A" + output_format = self.helpers.get_output_format() + raise AmdSmiInvalidParameterValueException(clk_type, output_format) # clk type given is bad + + try: + frequencies = amdsmi_interface.amdsmi_get_clk_freq(args.gpu, clk_type_conversion) + freq_dict = {} + freq_dict.update({'current level':frequencies['current']}) + freq_dict.update({'frequency_levels':{}}) + for level in range(len(frequencies['frequency'])): + freq = str(self.helpers.convert_SI_unit(frequencies['frequency'][level], AMDSMIHelpers.SI_Unit.MICRO)) + " MHz" + freq_dict['frequency_levels'].update({level:freq}) + except amdsmi_exception.AmdSmiLibraryException as e: + freq_dict = "N/A" + clk_dict.update({clk:freq_dict}) + + static_dict['clock'] = clk_dict + else: + raise amdsmi_exception.AmdSmiParameterException(args.clock, list[str]) # Convert and store output by pid for csv format multiple_devices_csv_override = False @@ -864,7 +923,8 @@ class AMDSMICommands(): bus=None, vbios=None, limit=None, driver=None, ras=None, board=None, numa=None, vram=None, cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None, cpu=None, - interface_ver=None, soc_pstate=None, xgmi_plpd = None, process_isolation=None): + interface_ver=None, soc_pstate=None, xgmi_plpd = None, process_isolation=None, + clock=None): """Get Static information for target gpu and cpu Args: @@ -916,7 +976,7 @@ class AMDSMICommands(): gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras", "board", "numa", "vram", "cache", "partition", "dfc_ucode", "fb_info", "num_vf", "soc_pstate", "xgmi_plpd", - "process_isolation"] + "process_isolation", "clock"] for attr in gpu_attributes: if hasattr(args, attr): if getattr(args, attr): @@ -947,7 +1007,7 @@ class AMDSMICommands(): bus, vbios, limit, driver, ras, board, numa, vram, cache, partition, dfc_ucode, fb_info, num_vf, soc_pstate, - process_isolation) + process_isolation, clock) elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized if args.cpu == None: args.cpu = self.cpu_handles @@ -962,7 +1022,7 @@ class AMDSMICommands(): bus, vbios, limit, driver, ras, board, numa, vram, cache, partition, dfc_ucode, fb_info, num_vf, soc_pstate, xgmi_plpd, - process_isolation) + process_isolation, clock) def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True): diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 27d0aeb601..21aec247cb 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -607,6 +607,10 @@ class AMDSMIParser(argparse.ArgumentParser): soc_pstate_help = "The available soc pstate policy" xgmi_plpd_help = "The available XGMI per-link power down policy" process_isolation_help = "The process isolation status" + clk_options = self.helpers.get_clock_types()[0] + clk_options.remove('PCIE') + clk_option_str = ", ".join(clk_options) + ", ALL" + clock_help = f"Show one or more valid clock frequency levels. Available options:\n\t{clk_option_str}" # Options arguments help text for Hypervisors and Baremetal ras_help = "Displays RAS features information" @@ -642,6 +646,7 @@ class AMDSMIParser(argparse.ArgumentParser): static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help) static_parser.add_argument('-R', '--process-isolation', action='store_true', required=False, help=process_isolation_help) static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help) + static_parser.add_argument('-C', '--clock', default=False, nargs='*', type=str, required=False, help=clock_help) # Options to display on Hypervisors and Baremetal if self.helpers.is_hypervisor() or self.helpers.is_baremetal(): diff --git a/projects/amdsmi/docs/how-to/amdsmi-cli-tool.md b/projects/amdsmi/docs/how-to/amdsmi-cli-tool.md index 687d164ede..daaf17056e 100644 --- a/projects/amdsmi/docs/how-to/amdsmi-cli-tool.md +++ b/projects/amdsmi/docs/how-to/amdsmi-cli-tool.md @@ -158,6 +158,8 @@ Static Arguments: -B, --board All board information -R, --process-isolation The process isolation status -r, --ras Displays RAS features information + -C, --clock [CLOCK ...] Show one or more valid clock frequency levels. Available options: + SYS, DF, DCEF, SOC, MEM, VCLK0, VCLK1, DCLK0, DCLK1, ALL -p, --partition Partition information -l, --limit All limit metric values (i.e. power and thermal limits) -P, --policy The available DPM policy @@ -855,5 +857,35 @@ GPU: 0 CACHE_LEVEL: 3 MAX_NUM_CU_SHARED: 228 NUM_CACHE_INSTANCE: 1 + CLOCK: + SYS: + CURRENT LEVEL: 2 + FREQUENCY_LEVELS: + 0: 300 MHz + 1: 904 MHz + 2: 1165 MHz + 3: 1360 MHz + 4: 1440 MHz + 5: 1544 MHz + 6: 1627 MHz + 7: 1720 MHz + 8: 1800 MHz + DF: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 1400 MHz + DCEF: N/A + SOC: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 302 MHz + MEM: + CURRENT LEVEL: 0 + FREQUENCY_LEVELS: + 0: 167 MHz + VCLK0: N/A + VCLK1: N/A + DCLK1: N/A + DCLK0: N/A ... ```