[SWDEV-513855] Add power cap to power monitor (#193)

Added power cap to display on amd-smi monitor -p.
Updated help and Changelog as well.

Signed-off-by: Kanangot Balakrishnan, Bindhiya <Bindhiya.KanangotBalakrishnan@amd.com>
This commit is contained in:
Kanangot Balakrishnan, Bindhiya
2025-03-26 17:45:08 -05:00
zatwierdzone przez GitHub
rodzic 9b64dcb61a
commit 7d109001ac
4 zmienionych plików z 32 dodań i 1 usunięć
+11
Wyświetl plik
@@ -20,6 +20,17 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Changed
- **Added Power Cap to amd-smi monitor**.
- 'amd-smi monitor -p' will display the power cap along with power.
```shell
$ amd-smi monitor -p
GPU POWER PWR_CAP
0 148 W 750 W
1 156 W 750 W
2 153 W 750 W
...
```
- **Modified VRAM display for `amd-smi monitor -v`**.
- Added free VRAM and VRAM percentage.
+18
Wyświetl plik
@@ -5104,6 +5104,24 @@ class AMDSMICommands():
self.logger.table_header += 'POWER'.rjust(7)
if args.power_usage and not args.default_output:
# Get Max Power Cap
try:
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
monitor_values['max_power'] = power_cap_info['max_power_cap']
monitor_values['max_power'] = self.helpers.convert_SI_unit(monitor_values['max_power'], AMDSMIHelpers.SI_Unit.MICRO)
if self.logger.is_human_readable_format() and monitor_values['max_power'] != "N/A":
monitor_values['max_power'] = f"{monitor_values['max_power']} {power_unit}"
if self.logger.is_json_format() and monitor_values['max_power'] != "N/A":
monitor_values['max_power'] = {"value" : monitor_values['max_power'],
"unit" : power_unit}
except amdsmi_exception.AmdSmiLibraryException as e:
monitor_values['max_power'] = "N/A"
logging.debug("Failed to get power cap info for gpu %s | %s", gpu_id, e.get_error_info())
self.logger.table_header += 'PWR_CAP'.rjust(9)
if args.temperature:
try:
temperature = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['temperature_hotspot']
+2
Wyświetl plik
@@ -128,6 +128,8 @@ class AMDSMILogger():
table_values += string_value.rjust(10) + ' '
elif key == 'power_usage':
table_values += string_value.rjust(7)
elif key == 'max_power':
table_values += string_value.rjust(9)
elif key in ('hotspot_temperature', 'memory_temperature'):
table_values += string_value.rjust(8)
elif key in ('gfx', 'mem'):
+1 -1
Wyświetl plik
@@ -1252,7 +1252,7 @@ class AMDSMIParser(argparse.ArgumentParser):
monitor_optionals_title = "Monitor Arguments"
# Help text for Arguments only on Guest and BM platforms
power_usage_help = "Monitor power usage in Watts"
power_usage_help = "Monitor power usage and power cap in Watts"
temperature_help = "Monitor temperature in Celsius"
gfx_util_help = "Monitor graphics utilization (%%) and clock (MHz)"
mem_util_help = "Monitor memory utilization (%%) and clock (MHz)"