[SWDEV-520148] Modify VRAM details in monitor output (#199)

Earlier amd-smi monitor was showing VRAM usage as used and total.
Modified it to display free VRAM and VRAM percentage. Updated
Changelog.

Signed-off-by: Kanangot Balakrishnan, Bindhiya <Bindhiya.KanangotBalakrishnan@amd.com>
This commit is contained in:
Kanangot Balakrishnan, Bindhiya
2025-03-26 13:12:41 -05:00
committed by GitHub
parent 3681f900ee
commit 3ddfbcc0a3
3 changed files with 30 additions and 1 deletions
+12
View File
@@ -18,6 +18,18 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
- Increasing available JPEG engines to 40.
Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.
### Changed
- **Modified VRAM display for `amd-smi monitor -v`**.
- Added free VRAM and VRAM percentage.
```shell
$ amd-smi monitor -v
GPU VRAM_USED VRAM_FREE VRAM_TOTAL VRAM%
0 174 MB 16011 MB 16185 MB 0.01 %
1 78 MB 347 MB 425 MB 0.18 %
...
```
## amd_smi_lib for ROCm 6.4.1
+13
View File
@@ -5336,23 +5336,36 @@ class AMDSMICommands():
try:
vram_usage = amdsmi_interface.amdsmi_get_gpu_vram_usage(args.gpu)
monitor_values['vram_used'] = vram_usage['vram_used']
monitor_values['vram_free'] = vram_usage['vram_total'] - vram_usage['vram_used']
monitor_values['vram_total'] = vram_usage['vram_total']
monitor_values['vram_percent'] = round ((vram_usage['vram_used'] / vram_usage['vram_total']), 2)
vram_usage_unit = "MB"
vram_percent_unit = "%"
if self.logger.is_human_readable_format():
monitor_values['vram_used'] = f"{monitor_values['vram_used']} {vram_usage_unit}"
monitor_values['vram_free'] = f"{monitor_values['vram_free']} {vram_usage_unit}"
monitor_values['vram_total'] = f"{monitor_values['vram_total']} {vram_usage_unit}"
monitor_values['vram_percent'] = f"{monitor_values['vram_percent']} {vram_percent_unit}"
if self.logger.is_json_format():
monitor_values['vram_used'] = {"value" : monitor_values['vram_used'],
"unit" : vram_usage_unit}
monitor_values['vram_free'] = {"value" : monitor_values['vram_free'],
"unit" : vram_usage_unit}
monitor_values['vram_total'] = {"value" : monitor_values['vram_total'],
"unit" : vram_usage_unit}
monitor_values['vram_percent'] = {"value" : monitor_values['vram_percent'],
"unit" : vram_percent_unit}
except amdsmi_exception.AmdSmiLibraryException as e:
monitor_values['vram_used'] = "N/A"
monitor_values['vram_free'] = "N/A"
monitor_values['vram_total'] = "N/A"
monitor_values['vram_percent'] = "N/A"
logging.debug("Failed to get vram memory usage on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.table_header += 'VRAM_USED'.rjust(11)
self.logger.table_header += 'VRAM_FREE'.rjust(12)
self.logger.table_header += 'VRAM_TOTAL'.rjust(12)
self.logger.table_header += 'VRAM%'.rjust(9)
if args.vram_usage and args.default_output:
try:
+5 -1
View File
@@ -136,11 +136,15 @@ class AMDSMILogger():
table_values += string_value.rjust(10)
elif key in ('mem_clock', 'vram_used'):
table_values += string_value.rjust(11)
elif key in ('vram_total', 'vram_free'):
table_values += string_value.rjust(12)
elif key == 'vram_percent':
table_values += string_value.rjust(9)
elif key in ('encoder', 'decoder'):
table_values += string_value.rjust(7)
elif key in ('vclock', 'dclock'):
table_values += string_value.rjust(10)
elif key in ('single_bit_ecc', 'double_bit_ecc', 'pcie_bw', 'vram_total'):
elif key in ('single_bit_ecc', 'double_bit_ecc', 'pcie_bw'):
table_values += string_value.rjust(12)
elif key in ('pcie_replay'):
table_values += string_value.rjust(13)