[SWDEV-513855] Add power cap to power monitor (#193)
Added power cap to display on amd-smi monitor -p.
Updated help and Changelog as well.
Signed-off-by: Kanangot Balakrishnan, Bindhiya <Bindhiya.KanangotBalakrishnan@amd.com>
[ROCm/amdsmi commit: 7d109001ac]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
b442530b74
Коммит
a5f5da8b90
@@ -20,6 +20,17 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
### Changed
|
||||
|
||||
- **Added Power Cap to amd-smi monitor**.
|
||||
- 'amd-smi monitor -p' will display the power cap along with power.
|
||||
```shell
|
||||
$ amd-smi monitor -p
|
||||
GPU POWER PWR_CAP
|
||||
0 148 W 750 W
|
||||
1 156 W 750 W
|
||||
2 153 W 750 W
|
||||
...
|
||||
```
|
||||
|
||||
- **Modified VRAM display for `amd-smi monitor -v`**.
|
||||
- Added free VRAM and VRAM percentage.
|
||||
|
||||
|
||||
@@ -5104,6 +5104,24 @@ class AMDSMICommands():
|
||||
|
||||
self.logger.table_header += 'POWER'.rjust(7)
|
||||
|
||||
if args.power_usage and not args.default_output:
|
||||
# Get Max Power Cap
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
monitor_values['max_power'] = power_cap_info['max_power_cap']
|
||||
monitor_values['max_power'] = self.helpers.convert_SI_unit(monitor_values['max_power'], AMDSMIHelpers.SI_Unit.MICRO)
|
||||
|
||||
if self.logger.is_human_readable_format() and monitor_values['max_power'] != "N/A":
|
||||
monitor_values['max_power'] = f"{monitor_values['max_power']} {power_unit}"
|
||||
if self.logger.is_json_format() and monitor_values['max_power'] != "N/A":
|
||||
monitor_values['max_power'] = {"value" : monitor_values['max_power'],
|
||||
"unit" : power_unit}
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
monitor_values['max_power'] = "N/A"
|
||||
logging.debug("Failed to get power cap info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'PWR_CAP'.rjust(9)
|
||||
|
||||
if args.temperature:
|
||||
try:
|
||||
temperature = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['temperature_hotspot']
|
||||
|
||||
@@ -128,6 +128,8 @@ class AMDSMILogger():
|
||||
table_values += string_value.rjust(10) + ' '
|
||||
elif key == 'power_usage':
|
||||
table_values += string_value.rjust(7)
|
||||
elif key == 'max_power':
|
||||
table_values += string_value.rjust(9)
|
||||
elif key in ('hotspot_temperature', 'memory_temperature'):
|
||||
table_values += string_value.rjust(8)
|
||||
elif key in ('gfx', 'mem'):
|
||||
|
||||
@@ -1252,7 +1252,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
monitor_optionals_title = "Monitor Arguments"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
power_usage_help = "Monitor power usage in Watts"
|
||||
power_usage_help = "Monitor power usage and power cap in Watts"
|
||||
temperature_help = "Monitor temperature in Celsius"
|
||||
gfx_util_help = "Monitor graphics utilization (%%) and clock (MHz)"
|
||||
mem_util_help = "Monitor memory utilization (%%) and clock (MHz)"
|
||||
|
||||
Ссылка в новой задаче
Block a user