From d71dac97668271bc2827cf3e28329136a411bac0 Mon Sep 17 00:00:00 2001 From: gabrpham Date: Wed, 4 Dec 2024 13:54:20 -0600 Subject: [PATCH] [SWDEV-484382] Added fclk and socclk to `amd-smi metric -c` Signed-off-by: gabrpham Change-Id: Ie7e19c757b05455693c0d26eeb5e8b6c1e238375 [ROCm/amdsmi commit: fe290a20569bd4adeee3b2da88dd4a8fc61e45a2] --- projects/amdsmi/CHANGELOG.md | 22 +++++- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 67 +++++++++++++++++++ projects/amdsmi/src/amd_smi/amd_smi.cc | 7 ++ projects/amdsmi/src/amd_smi/amd_smi_utils.cc | 6 ++ 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index f450b505e5..d8c59176fe 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -7,8 +7,28 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Added +- **Added fclk and socclk info to `amd-smi metric -c/--clock`**. + fclk and socclk information such as min and max clock have been added to the metric command, in line with all the other clocks. + + ```shell + amd-smi metric -c -g 1 + ... + FCLK_0: + CLK: 2301 MHz + MIN_CLK: 601 MHz + MAX_CLK: 2301 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + SOCCLK_0: + CLK: 1500 MHz + MIN_CLK: 500 MHz + MAX_CLK: 1500 MHz + CLK_LOCKED: N/A + DEEP_SLEEP: DISABLED + ``` + - **Added new command `amd-smi set -c/--clock-level`**. - This new command sets the performance level of the selected clock on the desired GPUs. The command can accept a range of acceptable levels, but will not set the level when a level is beyond the number of frequency levels as show in `amd-smi static -C/--clock` + This new command sets the performance level of the selected clock on the desired GPUs. The command can accept a range of acceptable levels, but will not set the level when a level is beyond the number of frequency levels as show in `amd-smi static -C/--clock`. ```shell sudo amd-smi set -c sclk 5 6 diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 9073594e99..957b2c8031 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1682,6 +1682,18 @@ class AMDSMICommands(): "clk_locked" : "N/A", "deep_sleep" : "N/A"} + clocks["fclk_0"] = {"clk" : "N/A", + "min_clk" : "N/A", + "max_clk" : "N/A", + "clk_locked" : "N/A", + "deep_sleep" : "N/A"} + + clocks["socclk_0"] = {"clk" : "N/A", + "min_clk" : "N/A", + "max_clk" : "N/A", + "clk_locked" : "N/A", + "deep_sleep" : "N/A"} + clock_unit = "MHz" # TODO make the deepsleep threshold correspond to the * in sysfs for current deep sleep status deep_sleep_threshold = 140 @@ -1759,6 +1771,31 @@ class AMDSMICommands(): clocks[dclk_index]["deep_sleep"] = "ENABLED" else: clocks[dclk_index]["deep_sleep"] = "DISABLED" + + # Populate FCLK clock value; fclk not present in gpu_metrics so use amdsmi_get_clk_freq + frequency_dict = amdsmi_interface.amdsmi_get_clk_freq(args.gpu, amdsmi_interface.AmdSmiClkType.DF) + current_fclk_clock = frequency_dict['frequency'][frequency_dict['current']] + current_fclk_clock = self.helpers.convert_SI_unit(current_fclk_clock, self.helpers.SI_Unit.MICRO) + clocks["fclk_0"]["clk"] = self.helpers.unit_format(self.logger, + current_fclk_clock, + clock_unit) + + if int(current_fclk_clock) <= deep_sleep_threshold: + clocks["fclk_0"]["deep_sleep"] = "ENABLED" + else: + clocks["fclk_0"]["deep_sleep"] = "DISABLED" + + # Populate SOCCLK clock value + current_socclk_clock = gpu_metric["current_socclk"] + clocks["socclk_0"]["clk"] = self.helpers.unit_format(self.logger, + current_socclk_clock, + clock_unit) + + if int(current_socclk_clock) <= deep_sleep_threshold: + clocks["socclk_0"]["deep_sleep"] = "ENABLED" + else: + clocks["socclk_0"]["deep_sleep"] = "DISABLED" + except Exception as e: logging.debug("Failed to get gpu_metrics_info for gpu %s | %s", gpu_id, e) @@ -1831,6 +1868,36 @@ class AMDSMICommands(): except amdsmi_exception.AmdSmiLibraryException as e: logging.debug("Failed to get vclk and/or dclk clock info for gpu %s | %s", gpu_id, e.get_error_info()) + # FCLK min and max clocks + try: + fclk_clk_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu, + amdsmi_interface.AmdSmiClkType.DF) + # if the current clock is N/A then we shouldn't populate the max and min values + if clocks["fclk_0"]["clk"] != "N/A": + clocks["fclk_0"]["min_clk"] = self.helpers.unit_format(self.logger, + fclk_clk_info_dict["min_clk"], + clock_unit) + clocks["fclk_0"]["max_clk"] = self.helpers.unit_format(self.logger, + fclk_clk_info_dict["max_clk"], + clock_unit) + except amdsmi_exception.AmdSmiLibraryException as e: + logging.debug("Failed to get fclk info for gpu %s | %s", gpu_id, e.get_error_info()) + + # SOCCLK min and max clocks + try: + socclk_clk_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu, + amdsmi_interface.AmdSmiClkType.SOC) + # if the current clock is N/A then we shouldn't populate the max and min values + if clocks["socclk_0"]["clk"] != "N/A": + clocks["socclk_0"]["min_clk"] = self.helpers.unit_format(self.logger, + socclk_clk_info_dict["min_clk"], + clock_unit) + clocks["socclk_0"]["max_clk"] = self.helpers.unit_format(self.logger, + socclk_clk_info_dict["max_clk"], + clock_unit) + except amdsmi_exception.AmdSmiLibraryException as e: + logging.debug("Failed to get socclk info for gpu %s | %s", gpu_id, e.get_error_info()) + values_dict['clock'] = clocks if "temperature" in current_platform_args: if args.temperature: diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 2f5cfd3821..4bed5a380f 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -2217,6 +2217,13 @@ amdsmi_get_clock_info(amdsmi_processor_handle processor_handle, amdsmi_clk_type_ case AMDSMI_CLK_TYPE_DCLK1: info->clk = metrics.current_dclk1; break; + case AMDSMI_CLK_TYPE_SOC: + info->clk = metrics.current_socclk; + break; + // fclk/df not supported by gpu metrics so providing default value which cannot be contrued to be valid + case AMDSMI_CLK_TYPE_DF: + info->clk = UINT32_MAX; + break; default: return AMDSMI_STATUS_INVAL; } diff --git a/projects/amdsmi/src/amd_smi/amd_smi_utils.cc b/projects/amdsmi/src/amd_smi/amd_smi_utils.cc index 7f79b24a09..4c6a8cc74e 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi_utils.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi_utils.cc @@ -275,6 +275,12 @@ amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_ case AMDSMI_CLK_TYPE_DCLK1: fullpath += "/pp_dpm_dclk1"; break; + case AMDSMI_CLK_TYPE_SOC: + fullpath += "/pp_dpm_socclk"; + break; + case AMDSMI_CLK_TYPE_DF: + fullpath += "/pp_dpm_fclk"; + break; default: return AMDSMI_STATUS_INVAL; }