Added GTT Memory to default output process table (#480)
* Added GTT Memory to default command and adjusted table format
---------
Signed-off-by: gabrpham <Gabriel.Pham@amd.com>
[ROCm/amdsmi commit: 940ece6813]
Šī revīzija ir iekļauta:
revīziju iesūtīja
GitHub
vecāks
b1753ad3b3
revīzija
dfaf8386fa
@@ -13,57 +13,49 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
```console
|
||||
$ amd-smi
|
||||
+------------------------------------------------------------------------------+
|
||||
| AMD-SMI 26.10.10+42441c78 amdgpu version: 6.15.5 ROCm version: 7.0.0 |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| BDF GPU-Name | Mem-Util Temp UECC Power-Usage |
|
||||
| GPU HIP-ID OAM-ID Partition-Mode | GFX-Util Fan Memory-Usage |
|
||||
|======================================+=======================================|
|
||||
| 0000:0c:00.0 AMD Instinct MI300X | 0 % 37 °C 0 141/750 W |
|
||||
| 0 0 2 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:22:00.0 AMD Instinct MI300X | 0 % 40 °C 0 155/750 W |
|
||||
| 1 1 1 SPX/NPS1 | 0 % N/A 284/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:38:00.0 AMD Instinct MI300X | 0 % 37 °C 0 141/750 W |
|
||||
| 2 2 0 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:5c:00.0 AMD Instinct MI300X | 0 % 37 °C 0 139/750 W |
|
||||
| 3 3 3 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:9f:00.0 AMD Instinct MI300X | 0 % 37 °C 0 140/750 W |
|
||||
| 4 4 7 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:af:00.0 AMD Instinct MI300X | 0 % 37 °C 0 142/750 W |
|
||||
| 5 5 5 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:bf:00.0 AMD Instinct MI300X | 0 % 36 °C 0 138/750 W |
|
||||
| 6 6 4 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
|--------------------------------------+---------------------------------------|
|
||||
| 0000:df:00.0 AMD Instinct MI300X | 0 % 40 °C 0 138/750 W |
|
||||
| 7 7 6 SPX/NPS1 | 0 % N/A 283/196592 MB |
|
||||
+--------------------------------------+---------------------------------------+
|
||||
+------------------------------------------------------------------------------+
|
||||
| Processes: |
|
||||
| GPU PID Process Name VRAM_MEM MEM_USAGE NUM_CU |
|
||||
|==============================================================================|
|
||||
| 0 269867 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 0 269888 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 1 269867 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 1 269888 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 2 269867 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 2 269888 rvs 17.9 GB 19.2 GB 38 |
|
||||
| 3 269867 rvs 17.9 GB 19.2 GB 76 |
|
||||
| 3 269888 rvs 17.9 GB 19.2 GB 0 |
|
||||
| 4 269867 rvs 17.9 GB 19.0 GB 37 |
|
||||
| 4 269888 rvs 17.9 GB 19.2 GB 36 |
|
||||
| 5 269867 rvs 17.9 GB 19.0 GB 76 |
|
||||
| 5 269888 rvs 17.9 GB 19.2 GB 0 |
|
||||
| 6 269867 rvs 17.9 GB 19.0 GB 76 |
|
||||
| 6 269888 rvs 17.9 GB 19.2 GB 0 |
|
||||
| 7 269867 rvs 17.9 GB 19.2 GB 34 |
|
||||
| 7 269888 rvs 17.9 GB 19.2 GB 38 |
|
||||
+------------------------------------------------------------------------------+
|
||||
+------------------------------------------------------------------------------+
|
||||
| AMD-SMI 26.0.0+eaa54ecc amdgpu version: 6.12.12 ROCm version: 7.0.0 |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| BDF GPU-Name | Mem-Uti Temp UEC Power-Usage |
|
||||
| GPU HIP-ID OAM-ID Partition-Mode | GFX-Uti Fan Mem-Usage |
|
||||
|=====================================+========================================|
|
||||
| 0000:0c:00.0 AMD Instinct MI300X | 13 % 60 °C 0 734/750 W |
|
||||
| 0 0 2 SPX/NPS1 | 98 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:22:00.0 AMD Instinct MI300X | 10 % 60 °C 0 652/750 W |
|
||||
| 1 1 1 SPX/NPS1 | 83 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:38:00.0 AMD Instinct MI300X | 5 % 55 °C 0 376/750 W |
|
||||
| 2 2 0 SPX/NPS1 | 34 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:5c:00.0 AMD Instinct MI300X | 2 % 57 °C 0 234/750 W |
|
||||
| 3 3 3 SPX/NPS1 | 12 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:9f:00.0 AMD Instinct MI300X | 1 % 57 °C 0 219/750 W |
|
||||
| 4 4 7 SPX/NPS1 | 11 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:af:00.0 AMD Instinct MI300X | 3 % 61 °C 0 295/750 W |
|
||||
| 5 5 5 SPX/NPS1 | 23 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:bf:00.0 AMD Instinct MI300X | 5 % 58 °C 0 367/750 W |
|
||||
| 6 6 4 SPX/NPS1 | 36 % N/A 4976/196592 MB |
|
||||
|-------------------------------------+----------------------------------------|
|
||||
| 0000:df:00.0 AMD Instinct MI300X | 6 % 62 °C 0 434/750 W |
|
||||
| 7 7 6 SPX/NPS1 | 47 % N/A 4976/196592 MB |
|
||||
+-------------------------------------+----------------------------------------+
|
||||
+------------------------------------------------------------------------------+
|
||||
| Processes: |
|
||||
| GPU PID Process Name GTT_MEM VRAM_MEM MEM_USAGE NUM_CU |
|
||||
|==============================================================================|
|
||||
| 0 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 0 |
|
||||
| 1 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 0 |
|
||||
| 2 1253994 rvs 2.0 MB 2.5 GB 4.6 GB 0 |
|
||||
| 3 1253994 rvs 2.0 MB 2.5 GB 4.6 GB 0 |
|
||||
| 4 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 114 |
|
||||
| 5 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 114 |
|
||||
| 6 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 114 |
|
||||
| 7 1253994 rvs 2.0 MB 2.4 GB 4.6 GB 0 |
|
||||
+------------------------------------------------------------------------------+
|
||||
```
|
||||
|
||||
- **Added support for GPU metrics 1.8**.
|
||||
|
||||
@@ -6614,7 +6614,7 @@ class AMDSMICommands():
|
||||
hip_id = "N/A"
|
||||
gpu_info_dict.update({"hip_id": hip_id})
|
||||
|
||||
# mem utilization, GPU utilization, power usage, and temperature
|
||||
# mem utilization, GPU utilization, power usage, and temperature from gpu_metrics
|
||||
if gpu_metrics != "N/A":
|
||||
mem_util = gpu_metrics['average_umc_activity']
|
||||
mem_util = round(mem_util)
|
||||
@@ -6683,10 +6683,11 @@ class AMDSMICommands():
|
||||
try:
|
||||
raw_process_list = amdsmi_interface.amdsmi_get_gpu_process_list(processor)
|
||||
for proc in raw_process_list:
|
||||
proc_info_dict = {"gpu": "N/A", "pid": "N/A", "name": "N/A", "vram": "N/A", "mem_usage": "N/A", "cu_occupancy": "N/A"}
|
||||
proc_info_dict = {"gpu": "N/A", "pid": "N/A", "name": "N/A","gtt": "N/A", "vram": "N/A", "mem_usage": "N/A", "cu_occupancy": "N/A"}
|
||||
proc_info_dict['gpu'] = gpu_id
|
||||
proc_info_dict['pid'] = proc['pid']
|
||||
proc_info_dict['name'] = proc['name']
|
||||
proc_info_dict['gtt'] = self.helpers.convert_bytes_to_readable(proc['memory_usage']['gtt_mem'])
|
||||
proc_info_dict['vram'] = self.helpers.convert_bytes_to_readable(proc['memory_usage']['vram_mem'])
|
||||
proc_info_dict['mem_usage'] = self.helpers.convert_bytes_to_readable(proc['mem'])
|
||||
proc_info_dict['cu_occupancy'] = str(proc['cu_occupancy'])
|
||||
|
||||
@@ -986,9 +986,9 @@ class AMDSMILogger():
|
||||
def print_default_output(self, output: Dict):
|
||||
# some template lines
|
||||
default_line_1 = "+------------------------------------------------------------------------------+"
|
||||
default_line_2 = "|--------------------------------------+---------------------------------------|"
|
||||
default_line_3 = "|======================================+=======================================|"
|
||||
default_line_4 = "+--------------------------------------+---------------------------------------+"
|
||||
default_line_2 = "|-------------------------------------+----------------------------------------|"
|
||||
default_line_3 = "|=====================================+========================================|"
|
||||
default_line_4 = "+-------------------------------------+----------------------------------------+"
|
||||
default_line_5 = "|==============================================================================|"
|
||||
|
||||
# print the version information first
|
||||
@@ -1008,8 +1008,8 @@ class AMDSMILogger():
|
||||
print(default_line_1)
|
||||
print("| AMD-SMI {0:20s} amdgpu version: {1:8s} ROCm version: {2:8s} |".format(amd_smi_version.ljust(20), amdgpu_version, rocm_version))
|
||||
print(default_line_2)
|
||||
print("| BDF GPU-Name | Mem-Util Temp UECC Power-Usage |")
|
||||
print("| GPU HIP-ID OAM-ID Partition-Mode | GFX-Util Fan Memory-Usage |")
|
||||
print("| BDF GPU-Name | Mem-Uti Temp UEC Power-Usage |")
|
||||
print("| GPU HIP-ID OAM-ID Partition-Mode | GFX-Uti Fan Mem-Usage |")
|
||||
print(default_line_3)
|
||||
|
||||
line_count = 0
|
||||
@@ -1026,41 +1026,42 @@ class AMDSMILogger():
|
||||
mem_util = gpu_info['mem_util']
|
||||
if mem_util != "N/A":
|
||||
mem_util = str(mem_util) + " %"
|
||||
mem_util = mem_util.rjust(8)
|
||||
mem_util = mem_util.ljust(5)
|
||||
|
||||
temp = gpu_info['temp']
|
||||
if temp != "N/A":
|
||||
temp = str(temp) + " \u00b0C"
|
||||
temp = temp.rjust(6)
|
||||
|
||||
u_ecc = str(gpu_info['uncorr_ecc']).rjust(5)
|
||||
u_ecc = str(gpu_info['uncorr_ecc']).ljust(5)
|
||||
|
||||
power_usage = gpu_info['power_usage']
|
||||
if power_usage != "N/A":
|
||||
power_usage = f"{gpu_info['power_usage']['current_power']}/{gpu_info['power_usage']['power_limit']} W"
|
||||
power_usage = str(power_usage).rjust(12)
|
||||
print("| {0:12.12s} {1:22.22s} | {2:8.8s} {3:6.6s} {4:5.5s} {5:12.12s} |".format(bdf, market_name, mem_util, temp, u_ecc, power_usage))
|
||||
|
||||
power_usage = str(power_usage).rjust(13)
|
||||
|
||||
gpu_id = str(gpu_info['gpu_id']).rjust(3)
|
||||
hip_id = str(gpu_info['hip_id']).rjust(6)
|
||||
oam_id = str(gpu_info['oam_id']).rjust(7)
|
||||
oam_id = str(gpu_info['oam_id']).rjust(6)
|
||||
partition_modes = str(gpu_info['partition_mode']).rjust(14)
|
||||
|
||||
gfx_util = gpu_info['gfx_util']
|
||||
if gfx_util != "N/A":
|
||||
gfx_util = str(gfx_util) + " %"
|
||||
gfx_util = gfx_util.rjust(8)
|
||||
gfx_util = gfx_util.ljust(5)
|
||||
|
||||
fan = gpu_info['fan']
|
||||
if fan != "N/A":
|
||||
fan = str(fan) + " %"
|
||||
fan = fan.rjust(7)
|
||||
fan = fan.rjust(6)
|
||||
|
||||
mem_usage = gpu_info['mem_usage']
|
||||
if mem_usage != "N/A":
|
||||
mem_usage = f"{gpu_info['mem_usage']['used_vram']}/{gpu_info['mem_usage']['total_vram']} MB"
|
||||
mem_usage = mem_usage.rjust(19)
|
||||
print("| {0:3.3s} {1:6.6s} {2:7.7s} {3:14.14s} | {4:8.8s} {5:7.7s} {6:19.19s} |".format(gpu_id, hip_id, oam_id, partition_modes, gfx_util, fan, mem_usage))
|
||||
mem_usage = mem_usage.rjust(21)
|
||||
|
||||
print("| {0:12.12s} {1:22.22s} | {2:5.5s} {3:6.6s} {4:5.5s} {5:13.13s} |".format(bdf, market_name, mem_util, temp, u_ecc, power_usage))
|
||||
print("| {0:3.3s} {1:6.6s} {2:6.6s} {3:14.14s} | {4:5.5s} {5:6.6s} {6:21.21s} |".format(gpu_id, hip_id, oam_id, partition_modes, gfx_util, fan, mem_usage))
|
||||
|
||||
if line_count < end:
|
||||
print(default_line_2)
|
||||
@@ -1071,18 +1072,19 @@ class AMDSMILogger():
|
||||
# print process list of all GPUs last
|
||||
print(default_line_1)
|
||||
print("| Processes: |")
|
||||
print("| GPU PID Process Name VRAM_MEM MEM_USAGE NUM_CU |")
|
||||
print("| GPU PID Process Name GTT_MEM VRAM_MEM MEM_USAGE NUM_CU |")
|
||||
print(default_line_5)
|
||||
if len(output['processes']) != 0:
|
||||
for process in output['processes']:
|
||||
gpu_id = str(process['gpu']).rjust(4)
|
||||
pid = str(process['pid']).rjust(9)
|
||||
process_name = str(process['name']).ljust(29)
|
||||
vram_mem = str(process['vram']).rjust(9)
|
||||
process_name = str(process['name']).ljust(20)
|
||||
gtt_mem = str(process['gtt']).rjust(8)
|
||||
vram_mem = str(process['vram']).rjust(8)
|
||||
mem_usage = str(process['mem_usage']).rjust(9)
|
||||
cu_occupancy = str(process['cu_occupancy']).rjust(6)
|
||||
print("| {0:4s} {1:9s} {2:29s} {3:9s} {4:9s} {5:6s} |".format(
|
||||
gpu_id, pid, process_name, vram_mem, mem_usage, cu_occupancy))
|
||||
print("| {0:4s} {1:9s} {2:20s} {3:8s} {4:8s} {5:9s} {6:6s} |".format(
|
||||
gpu_id, pid, process_name, gtt_mem, vram_mem, mem_usage, cu_occupancy))
|
||||
else:
|
||||
print("| No running processes found |")
|
||||
print(default_line_1)
|
||||
Atsaukties uz šo jaunā problēmā
Block a user