From 7d109001ac2cede2748172328f53b68e5bb44d1f Mon Sep 17 00:00:00 2001 From: "Kanangot Balakrishnan, Bindhiya" Date: Wed, 26 Mar 2025 17:45:08 -0500 Subject: [PATCH] [SWDEV-513855] Add power cap to power monitor (#193) Added power cap to display on amd-smi monitor -p. Updated help and Changelog as well. Signed-off-by: Kanangot Balakrishnan, Bindhiya --- CHANGELOG.md | 11 +++++++++++ amdsmi_cli/amdsmi_commands.py | 18 ++++++++++++++++++ amdsmi_cli/amdsmi_logger.py | 2 ++ amdsmi_cli/amdsmi_parser.py | 2 +- 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07fa9c2829..6006d2ca7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,17 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Changed +- **Added Power Cap to amd-smi monitor**. + - 'amd-smi monitor -p' will display the power cap along with power. + ```shell + $ amd-smi monitor -p + GPU POWER PWR_CAP + 0 148 W 750 W + 1 156 W 750 W + 2 153 W 750 W + ... + ``` + - **Modified VRAM display for `amd-smi monitor -v`**. - Added free VRAM and VRAM percentage. diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 525506317d..1612ed345b 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -5104,6 +5104,24 @@ class AMDSMICommands(): self.logger.table_header += 'POWER'.rjust(7) + if args.power_usage and not args.default_output: + # Get Max Power Cap + try: + power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu) + monitor_values['max_power'] = power_cap_info['max_power_cap'] + monitor_values['max_power'] = self.helpers.convert_SI_unit(monitor_values['max_power'], AMDSMIHelpers.SI_Unit.MICRO) + + if self.logger.is_human_readable_format() and monitor_values['max_power'] != "N/A": + monitor_values['max_power'] = f"{monitor_values['max_power']} {power_unit}" + if self.logger.is_json_format() and monitor_values['max_power'] != "N/A": + monitor_values['max_power'] = {"value" : monitor_values['max_power'], + "unit" : power_unit} + except amdsmi_exception.AmdSmiLibraryException as e: + monitor_values['max_power'] = "N/A" + logging.debug("Failed to get power cap info for gpu %s | %s", gpu_id, e.get_error_info()) + + self.logger.table_header += 'PWR_CAP'.rjust(9) + if args.temperature: try: temperature = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['temperature_hotspot'] diff --git a/amdsmi_cli/amdsmi_logger.py b/amdsmi_cli/amdsmi_logger.py index a4c95dff86..99b47e876d 100644 --- a/amdsmi_cli/amdsmi_logger.py +++ b/amdsmi_cli/amdsmi_logger.py @@ -128,6 +128,8 @@ class AMDSMILogger(): table_values += string_value.rjust(10) + ' ' elif key == 'power_usage': table_values += string_value.rjust(7) + elif key == 'max_power': + table_values += string_value.rjust(9) elif key in ('hotspot_temperature', 'memory_temperature'): table_values += string_value.rjust(8) elif key in ('gfx', 'mem'): diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 019e97dd8f..46c19c06b7 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -1252,7 +1252,7 @@ class AMDSMIParser(argparse.ArgumentParser): monitor_optionals_title = "Monitor Arguments" # Help text for Arguments only on Guest and BM platforms - power_usage_help = "Monitor power usage in Watts" + power_usage_help = "Monitor power usage and power cap in Watts" temperature_help = "Monitor temperature in Celsius" gfx_util_help = "Monitor graphics utilization (%%) and clock (MHz)" mem_util_help = "Monitor memory utilization (%%) and clock (MHz)"