SWDEV-435197 - Add process table to CLI monitor subcommand

Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: Ibe06f4a4be619ae9cba909c2474b0e482eeb87d5


[ROCm/amdsmi commit: 92f014059e]
Этот коммит содержится в:
Maisam Arif
2024-06-09 16:31:34 -05:00
родитель 5a62ef2b7c
Коммит 3ef31b3ed0
6 изменённых файлов: 528 добавлений и 96 удалений
+13
Просмотреть файл
@@ -8,6 +8,19 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Additions
- **Added optional process table under `amd-smi monitor -q`**.
The monitor subcommand within the CLI Tool now has the `-q` option to enable an optional process table underneath the original monitored output.
```shell
$ amd-smi monitor -q
GPU POWER GPU_TEMP MEM_TEMP GFX_UTIL GFX_CLOCK MEM_UTIL MEM_CLOCK ENC_UTIL ENC_CLOCK DEC_UTIL DEC_CLOCK SINGLE_ECC DOUBLE_ECC PCIE_REPLAY VRAM_USED VRAM_TOTAL PCIE_BW
0 199 W 103 °C 84 °C 99 % 1920 MHz 31 % 1000 MHz N/A 0 MHz N/A 0 MHz 0 0 0 1235 MB 16335 MB N/A Mb/s
PROCESS INFO:
GPU NAME PID GTT_MEM CPU_MEM VRAM_MEM MEM_USAGE GFX ENC
0 rvs 1564865 0.0 B 0.0 B 1.1 GB 0.0 B 0 ns 0 ns
```
- **Added Handling to detect VMs with passthrough configurations in CLI Tool**.
CLI Tool had only allowed a restricted set of options for Virtual Machines with passthrough GPUs. Now we offer an expanded set of functions availble to passthrough configured GPUs.
+119 -32
Просмотреть файл
@@ -2646,23 +2646,23 @@ class AMDSMICommands():
if self.logger.is_human_readable_format():
process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage'])
for usage_metric in process_info['usage']:
process_info['usage'][usage_metric] = f"{process_info['usage'][usage_metric]} {engine_usage_unit}"
for usage_metric in process_info['memory_usage']:
process_info['memory_usage'][usage_metric] = self.helpers.convert_bytes_to_readable(process_info['memory_usage'][usage_metric])
elif self.logger.is_json_format():
process_info['mem_usage'] = {"value" : process_info['mem_usage'],
"unit" : memory_usage_unit}
process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric])
memory_usage_unit = ""
for usage_metric in process_info['usage']:
process_info['usage'][usage_metric] = {"value" : process_info['usage'][usage_metric],
"unit" : engine_usage_unit}
process_info['mem_usage'] = self.helpers.unit_format(self.logger,
process_info['mem_usage'],
memory_usage_unit)
for usage_metric in process_info['memory_usage']:
process_info['memory_usage'][usage_metric] = {"value" : process_info['memory_usage'][usage_metric],
"unit" : memory_usage_unit}
for usage_metric in process_info['usage']:
process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger,
process_info['usage'][usage_metric],
engine_usage_unit)
for usage_metric in process_info['memory_usage']:
process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger,
process_info['memory_usage'][usage_metric],
memory_usage_unit)
filtered_process_values.append({'process_info': process_info})
@@ -3967,7 +3967,7 @@ class AMDSMICommands():
def monitor(self, args, multiple_devices=False, watching_output=False, gpu=None,
watch=None, watch_time=None, iterations=None, power_usage=None,
temperature=None, gfx_util=None, mem_util=None, encoder=None, decoder=None,
ecc=None, vram_usage=None, pcie=None):
ecc=None, vram_usage=None, pcie=None, process=None):
""" Populate a table with each GPU as an index to rows of targeted data
Args:
@@ -3986,6 +3986,7 @@ class AMDSMICommands():
ecc (bool, optional): Value override for args.ecc. Defaults to None.
vram_usage (bool, optional): Value override for args.vram_usage. Defaults to None.
pcie (bool, optional): Value override for args.pcie. Defaults to None.
process (bool, optional): Value override for args.process. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -4023,12 +4024,15 @@ class AMDSMICommands():
args.vram_usage = vram_usage
if pcie:
args.pcie = pcie
if process:
args.process = process
# Handle No GPU passed
if args.gpu == None:
args.gpu = self.device_handles
# If all arguments are False, the print all values
# Don't include process in this logic as it's an optional edge case
if not any([args.power_usage, args.temperature, args.gfx, args.mem,
args.encoder, args.decoder, args.ecc,
args.vram_usage, args.pcie]):
@@ -4049,23 +4053,28 @@ class AMDSMICommands():
for gpu in args.gpu:
stored_gpus.append(gpu)
# Store output from multiple devices
# Store output from multiple devices without printing to console
for device_handle in args.gpu:
self.monitor(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
# Reload original gpus
args.gpu = stored_gpus
# Print multiple device output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True)
dual_csv_output = False
if args.process:
if self.logger.is_csv_format():
dual_csv_output = True
# Flush the output
self.logger.print_output(multiple_device_enabled=True,
watching_output=watching_output,
tabular=True,
dual_csv_output=dual_csv_output)
# Add output to total watch output and clear multiple device output
if watching_output:
self.logger.store_watch_output(multiple_device_enabled=True)
# Flush the watching output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True)
return
elif len(args.gpu) == 1:
args.gpu = args.gpu[0]
@@ -4077,15 +4086,11 @@ class AMDSMICommands():
# Get gpu_id for logging
gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu)
# Clear the table header
self.logger.table_header = ''
# Store timestamp for watch output
# Reset the table header and store the timestamp if watch output is enabled
self.logger.table_header = 'GPU'
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.table_header += 'TIMESTAMP'.rjust(10) + ' '
self.logger.table_header += 'GPU'
self.logger.table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.table_header
if args.power_usage:
try:
@@ -4332,16 +4337,98 @@ class AMDSMICommands():
self.logger.store_output(args.gpu, 'values', monitor_values)
# intialize dual_csv_format; applicable to process only
dual_csv_output = False
# Store process list seperately
if args.process:
# Populate initial processes
try:
process_list = amdsmi_interface.amdsmi_get_gpu_process_list(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
logging.debug("Failed to get process list for gpu %s | %s", gpu_id, e.get_error_info())
raise e
# Clean processes dictionary
filtered_process_values = []
for process_info in process_list:
process_info['mem_usage'] = process_info.pop('mem')
process_info['usage'] = process_info.pop('engine_usage')
engine_usage_unit = "ns"
memory_usage_unit = "B"
if self.logger.is_human_readable_format():
process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage'])
for usage_metric in process_info['memory_usage']:
process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric])
memory_usage_unit = ""
process_info['mem_usage'] = self.helpers.unit_format(self.logger,
process_info['mem_usage'],
memory_usage_unit)
for usage_metric in process_info['usage']:
process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger,
process_info['usage'][usage_metric],
engine_usage_unit)
for usage_metric in process_info['memory_usage']:
process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger,
process_info['memory_usage'][usage_metric],
memory_usage_unit)
filtered_process_values.append({'process_info': process_info})
# If no processes are populated then we populate an N/A placeholder
if not filtered_process_values:
logging.debug("Monitor - Failed to detect any process on gpu %s", gpu_id)
filtered_process_values.append({'process_info': "N/A"})
for index, process in enumerate(filtered_process_values):
if process['process_info'] == "N/A":
filtered_process_values[index]['process_info'] = "No running processes detected"
# Build the process table's title and header
self.logger.secondary_table_title = "PROCESS INFO"
self.logger.secondary_table_header = 'GPU'.rjust(3) + "NAME".rjust(22) + "PID".rjust(9) + "GTT_MEM".rjust(10) + \
"CPU_MEM".rjust(10) + "VRAM_MEM".rjust(10) + "MEM_USAGE".rjust(11) + \
"GFX".rjust(8) + "ENC".rjust(8)
if watching_output:
self.logger.secondary_table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.secondary_table_header
logging.debug(f"Monitor - Process Info for GPU {gpu_id} | {filtered_process_values}")
if self.logger.is_json_format():
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
if self.logger.is_human_readable_format():
# Print out process in flattened format
# The logger detects if process list is present and pulls it out and prints
# that table with timestamp, gpu, and prints headers separately
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
if self.logger.is_csv_format():
dual_csv_output = True
# The logger detects if process list is present and pulls it out and prints
# that table with timestamp, gpu, and prints headers separately
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
# Now handling the single gpu case only
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
return
self.logger.print_output(watching_output=watching_output, tabular=True)
if watching_output: # End of single gpu add to watch_output
if watching_output and not self.logger.destination == "stdout": # End of single gpu add to watch_output
self.logger.store_watch_output(multiple_device_enabled=False)
self.logger.print_output(multiple_device_enabled=False, watching_output=watching_output, tabular=True, dual_csv_output=dual_csv_output)
def rocm_smi(self, args):
print("Placeholder for rocm-smi legacy commands")
+1 -1
Просмотреть файл
@@ -738,7 +738,7 @@ class AMDSMIHelpers():
if logger.is_json_format():
return {"value": value, "unit": unit}
if logger.is_human_readable_format():
return f"{value} {unit}"
return f"{value} {unit}".rstrip()
return f"{value}"
class SI_Unit(float, Enum):
+387 -63
Просмотреть файл
@@ -38,8 +38,10 @@ class AMDSMILogger():
self.watch_output = []
self.format = format # csv, json, or human_readable
self.destination = destination # stdout, path to a file (append)
self.table_header = ""
self.table_title = ""
self.table_header = ""
self.secondary_table_title = ""
self.secondary_table_header = ""
self.helpers = AMDSMIHelpers()
@@ -102,47 +104,89 @@ class AMDSMILogger():
return output_dict
def _convert_json_to_human_readable(self, json_object: Dict[str, any], tabular=False):
def _convert_json_to_tabular(self, json_object: Dict[str, any]):
# TODO make dynamic
if tabular:
table_values = ''
for key, value in json_object.items():
value = str(value)
if key == 'gpu':
table_values += value.rjust(3)
elif key == 'timestamp':
table_values += value.rjust(10) + ' '
elif key == 'power_usage':
table_values += value.rjust(7)
elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'):
table_values += value.rjust(11)
elif key == 'vram_total' or 'ecc' in key:
table_values += value.rjust(12)
elif key in ['pcie_replay']:
table_values += value.rjust(13)
# Only for handling topology tables
elif 'gpu_' in key:
table_values += value.ljust(13)
# Only for handling xgmi tables
elif key == "gpu#":
table_values += value.ljust(7)
elif key == "bdf":
table_values += value.ljust(13)
elif "bdf_" in key:
table_values += value.ljust(13)
elif key == "bit_rate":
table_values += value.ljust(9)
elif key == "max_bandwidth":
table_values += value.ljust(14)
elif key == "link_type":
table_values += value.ljust(10)
elif key == "RW":
table_values += " " + value.ljust(52)
# Default spacing
else:
table_values += value.rjust(10)
return table_values.rstrip()
table_values = ''
stored_gpu = ''
stored_timestamp = ''
for key, value in json_object.items():
string_value = str(value)
if key == 'gpu':
stored_gpu = string_value
table_values += string_value.rjust(3)
elif key == 'timestamp':
stored_timestamp = string_value
table_values += string_value.rjust(10) + ' '
elif key == 'power_usage':
table_values += string_value.rjust(7)
elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'):
table_values += string_value.rjust(11)
elif key == 'vram_total' or 'ecc' in key:
table_values += string_value.rjust(12)
elif key in ['pcie_replay']:
table_values += string_value.rjust(13)
# Only for handling topology tables
elif 'gpu_' in key:
table_values += string_value.ljust(13)
# Only for handling xgmi tables
elif key == "gpu#":
table_values += string_value.ljust(7)
elif key == "bdf":
table_values += string_value.ljust(13)
elif "bdf_" in key:
table_values += string_value.ljust(13)
elif key == "bit_rate":
table_values += string_value.ljust(9)
elif key == "max_bandwidth":
table_values += string_value.ljust(14)
elif key == "link_type":
table_values += string_value.ljust(10)
elif key == "RW":
table_values += " " + string_value.ljust(52)
elif key == "process_list":
#Add an additional padding between the first instance of GPU and NAME
table_values += ' '
for process_dict in value:
if process_dict['process_info'] == "No running processes detected":
# Add N/A for empty process_info
table_values += "N/A".rjust(20) + "N/A".rjust(9) + "N/A".rjust(10) + \
"N/A".rjust(10) + "N/A".rjust(10) + "N/A".rjust(11) + \
"N/A".rjust(8) + "N/A".rjust(8) + '\n'
else:
for process_key, process_value in process_dict['process_info'].items():
string_process_value = str(process_value)
if process_key == "name":
# Truncate name if too long
process_name = string_process_value[:20]
if process_name == "":
process_name = "N/A"
table_values += process_name.rjust(20)
elif process_key == "pid":
table_values += string_process_value.rjust(9)
elif process_key == "memory_usage":
for memory_key, memory_value in process_value.items():
table_values += str(memory_value).rjust(10)
elif process_key == "mem_usage":
table_values += string_process_value.rjust(11)
elif process_key == "usage":
for usage_key, usage_value in process_value.items():
table_values += str(usage_value).rjust(8)
# Add the stored gpu and stored timestamp to the next line
table_values += '\n'
if stored_timestamp:
table_values += stored_timestamp.ljust(10) + ' '
table_values += stored_gpu.rjust(3) + ' '
# Remove excess two values after a new line in table_values
table_values = table_values[:table_values.rfind('\n')]
table_values += '\n'
# Default spacing
else:
table_values += string_value.rjust(10)
return table_values.rstrip()
def _convert_json_to_human_readable(self, json_object: Dict[str, any]):
# First Capitalize all keys in the json object
capitalized_json = self._capitalize_keys(json_object)
@@ -203,9 +247,7 @@ class AMDSMILogger():
Args:
target_dict (dict): Dictionary to flatten
parent_key (str):
"""
# print(target_dict)
output_dict = {}
# First flatten out values
@@ -398,7 +440,7 @@ class AMDSMILogger():
self.output = {}
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False):
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False):
""" Print current output acording to format and then destination
params:
multiple_device_enabled (bool) - True if printing output from
@@ -411,12 +453,19 @@ class AMDSMILogger():
self._print_json_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
elif self.is_csv_format():
self._print_csv_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
if dual_csv_output:
self._print_dual_csv_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
else:
self._print_csv_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
elif self.is_human_readable_format():
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output,
tabular=tabular)
# If tabular output is enabled, redirect to _print_tabular_output
if tabular:
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
else:
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
def _print_json_output(self, multiple_device_enabled=False, watching_output=False):
@@ -493,19 +542,178 @@ class AMDSMILogger():
writer.writerows(stored_csv_output)
def _print_dual_csv_output(self, multiple_device_enabled=False, watching_output=False):
if multiple_device_enabled:
stored_csv_output = self.multiple_device_output
else:
if not isinstance(self.output, list):
stored_csv_output = [self.output]
primary_csv_output = []
secondary_csv_output = []
if stored_csv_output:
# Split stored_csv_output into primary_csv and secondary_csv
for output_dict in stored_csv_output:
if 'process_list' in output_dict:
# Add a new entry for each process_info
for process_info_dict in output_dict['process_list']:
secondary_output_dict = {}
if watching_output:
secondary_output_dict['timestamp'] = output_dict['timestamp']
secondary_output_dict['gpu'] = output_dict['gpu']
if isinstance(process_info_dict["process_info"], dict):
for process_field, process_value in process_info_dict["process_info"].items():
if isinstance(process_value, dict):
for key, value in process_value.items():
secondary_output_dict[key] = value
else:
secondary_output_dict[process_field] = process_value
else:
# Handle no process found case
secondary_output_dict["process_info"] = process_info_dict["process_info"]
secondary_csv_output.append(secondary_output_dict)
primary_output_dict = {}
for key, value in output_dict.items():
if key != 'process_list':
primary_output_dict[key] = value
primary_csv_output.append(primary_output_dict)
# Ensure uniform data within primary and secondary csv outputs
if primary_csv_output:
primary_keys = set()
for output in primary_csv_output:
for key in output:
primary_keys.add(key)
# insert empty data to align with keys that may not exist
for index, output_dict in enumerate(primary_csv_output):
remaining_keys = primary_keys - set(output_dict.keys())
for key in remaining_keys:
primary_csv_output[index][key] = "N/A"
if secondary_csv_output:
secondary_keys = set()
for output in secondary_csv_output:
for key in output:
secondary_keys.add(key)
# insert empty data to align with keys that may not exist
for index, output_dict in enumerate(secondary_csv_output):
remaining_keys = secondary_keys - set(output_dict.keys())
for key in remaining_keys:
secondary_csv_output[index][key] = "N/A"
if self.destination == 'stdout':
if primary_csv_output:
# Get the header as a list of the first element to maintain order
csv_header = primary_csv_output[0].keys()
csv_stdout_output = self.CsvStdoutBuilder()
writer = csv.DictWriter(csv_stdout_output, csv_header)
writer.writeheader()
writer.writerows(primary_csv_output)
print(str(csv_stdout_output))
if secondary_csv_output:
# Get the header as a list of the first element to maintain order
csv_header = secondary_csv_output[0].keys()
csv_stdout_output = self.CsvStdoutBuilder()
writer = csv.DictWriter(csv_stdout_output, csv_header)
writer.writeheader()
writer.writerows(secondary_csv_output)
print(str(csv_stdout_output))
if watching_output:
print()
else:
if watching_output:
with self.destination.open('w', newline = '') as output_file:
primary_csv_output = []
secondary_csv_output = []
if self.watch_output:
# Split watch_output into primary_csv and secondary_csv
for output_dict in self.watch_output:
if 'process_list' in output_dict:
# Add a new entry for each process_info
for process_info_dict in output_dict['process_list']:
secondary_output_dict = {}
if watching_output:
secondary_output_dict['timestamp'] = output_dict['timestamp']
secondary_output_dict['gpu'] = output_dict['gpu']
if isinstance(process_info_dict["process_info"], dict):
for process_field, process_value in process_info_dict["process_info"].items():
if isinstance(process_value, dict):
for key, value in process_value.items():
secondary_output_dict[key] = value
else:
secondary_output_dict[process_field] = process_value
else:
# Handle no process found case
secondary_output_dict["process_info"] = process_info_dict["process_info"]
secondary_csv_output.append(secondary_output_dict)
primary_output_dict = {}
for key, value in output_dict.items():
if key != 'process_list':
primary_output_dict[key] = value
primary_csv_output.append(primary_output_dict)
# Ensure uniform data within primary and secondary csv outputs
if primary_csv_output:
primary_keys = set()
for output in primary_csv_output:
for key in output:
primary_keys.add(key)
# insert empty data to align with keys that may not exist
for index, output_dict in enumerate(primary_csv_output):
remaining_keys = primary_keys - set(output_dict.keys())
for key in remaining_keys:
primary_csv_output[index][key] = "N/A"
if secondary_csv_output:
secondary_keys = set()
for output in secondary_csv_output:
for key in output:
secondary_keys.add(key)
# insert empty data to align with keys that may not exist
for index, output_dict in enumerate(secondary_csv_output):
remaining_keys = secondary_keys - set(output_dict.keys())
for key in remaining_keys:
secondary_csv_output[index][key] = "N/A"
if primary_csv_output:
# Get the header as a list of the first element to maintain order
csv_header = primary_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(primary_csv_output)
if secondary_csv_output:
output_file.write("\n")
csv_header = secondary_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(secondary_csv_output)
else:
with self.destination.open('a', newline = '') as output_file:
if primary_csv_output:
# Get the header as a list of the first element to maintain order
csv_header = primary_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(primary_csv_output)
if secondary_csv_output:
output_file.write("\n")
csv_header = secondary_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(secondary_csv_output)
def _print_human_readable_output(self, multiple_device_enabled=False, watching_output=False, tabular=False):
human_readable_output = ''
# If tabular output is enabled, redirect to _print_tabular_output
if tabular:
if self.table_title:
human_readable_output += self.table_title + ':\n'
human_readable_output += self.table_header + '\n'
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
return
human_readable_output = ''
if multiple_device_enabled:
for output in self.multiple_device_output:
human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular)
human_readable_output += '\n'
for device_output in self.multiple_device_output:
human_readable_output += self._convert_json_to_human_readable(device_output) + '\n'
else:
human_readable_output += self._convert_json_to_human_readable(self.output, tabular=tabular)
human_readable_output += self._convert_json_to_human_readable(self.output)
if self.destination == 'stdout':
try:
@@ -518,15 +726,131 @@ class AMDSMILogger():
if watching_output:
with self.destination.open('w') as output_file:
human_readable_output = ''
if tabular:
if self.table_title:
human_readable_output += self.table_title + '\n'
human_readable_output += self.table_header + '\n'
for output in self.watch_output:
human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular)
if tabular:
human_readable_output += '\n'
human_readable_output += self._convert_json_to_human_readable(output)
output_file.write(human_readable_output + '\n')
else:
with self.destination.open('a') as output_file:
output_file.write(human_readable_output + '\n')
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False):
primary_table = ''
secondary_table = ''
# Populate primary table without process_list
# Populate secondary table with process_list if exists
if multiple_device_enabled and self.multiple_device_output:
for device_output in self.multiple_device_output:
if 'process_list' in device_output:
process_table_dict = {}
if watching_output:
process_table_dict['timestamp'] = device_output['timestamp']
process_table_dict['gpu'] = device_output['gpu']
process_table_dict['process_list'] = device_output['process_list']
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
# Add primary table keys without process_list
primary_table_output = {}
for key, value in device_output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
else: # Single device output
if 'process_list' in self.output:
process_table_dict = {}
if watching_output:
process_table_dict['timestamp'] = self.output['timestamp']
process_table_dict['gpu'] = self.output['gpu']
process_table_dict['process_list'] = self.output['process_list']
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
# Add primary table keys without process_list
primary_table_output = {}
for key, value in self.output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
primary_table = primary_table.rstrip()
secondary_table = secondary_table.rstrip()
# Add primary table title and header to primary_table
if primary_table:
primary_table_heading = ''
if self.table_title:
primary_table_heading = self.table_title + ':\n'
primary_table_heading += self.table_header + '\n'
primary_table = primary_table_heading + primary_table
# Add secondary table title and header to secondary_table
# Currently just process_info uses this logic
if secondary_table:
secondary_table_heading = ''
if self.secondary_table_title:
secondary_table_heading = '\n' + self.secondary_table_title + ':\n'
secondary_table_heading += self.secondary_table_header + '\n'
secondary_table = secondary_table_heading + secondary_table
if self.destination == 'stdout':
try:
# printing as unicode may fail if locale is not set properly
print(primary_table)
if secondary_table:
print(secondary_table)
if watching_output:
print("\n")
except UnicodeEncodeError:
# print as ascii, ignore incompatible characters
print(primary_table.encode('ascii', 'ignore').decode('ascii'))
if secondary_table:
print(secondary_table.encode('ascii', 'ignore').decode('ascii'))
if watching_output:
print("\n")
else:
if watching_output: # Write all stored watched output to a file
with self.destination.open('w') as output_file:
primary_table = ''
secondary_table = ''
# Add process_list to the secondary_table
# Add remaining watch_output to the primary_table
for device_output in self.watch_output:
# if process_list is detected in device_output store in secondary_table
if 'process_list' in device_output:
process_table_dict = {
'timestamp': device_output['timestamp'],
'gpu': device_output['gpu'],
'process_list': device_output['process_list']
}
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
# Add primary table keys without process_list
primary_table_output = {}
for key, value in device_output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
primary_table = primary_table.rstrip() # Remove trailing new line
secondary_table = secondary_table.rstrip()
# Add primary table title and header to primary_table
if primary_table:
primary_table_heading = ''
if self.table_title:
primary_table_heading = self.table_title + ':\n'
primary_table_heading += self.table_header + '\n'
primary_table = primary_table_heading + primary_table
# Add secondary table title and header to secondary_table
# Currently just process_info uses this logic
if secondary_table:
secondary_table_heading = ''
if self.secondary_table_title:
secondary_table_heading = '\n' + self.secondary_table_title + ':\n'
secondary_table_heading += self.secondary_table_header + '\n'
secondary_table = secondary_table_heading + secondary_table
# Write both full tables to the file
output_file.write(primary_table)
if secondary_table:
output_file.write("\n" + secondary_table)
else: # Write all singular output to a file
with self.destination.open('a') as output_file:
output_file.write(primary_table + '\n')
output_file.write(secondary_table)
+2
Просмотреть файл
@@ -1126,6 +1126,7 @@ class AMDSMIParser(argparse.ArgumentParser):
ecc_help = "Monitor ECC single bit, ECC double bit, and PCIe replay error counts"
mem_usage_help = "Monitor memory usage in MB"
pcie_bandwidth_help = "Monitor PCIe bandwidth in Mb/s"
process_help = "Enable Process information table below monitor output"
# Create monitor subparser
monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help)
@@ -1148,6 +1149,7 @@ class AMDSMIParser(argparse.ArgumentParser):
monitor_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
monitor_parser.add_argument('-v', '--vram-usage', action='store_true', required=False, help=mem_usage_help)
monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_bandwidth_help)
monitor_parser.add_argument('-q', '--process', action='store_true', required=False, help=process_help)
def _add_rocm_smi_parser(self, subparsers, func):
+6
Просмотреть файл
@@ -677,8 +677,11 @@ Output: Dictionary with fields
Field | Description
---|---
`current_socket_power` | current socket power
`average_socket_power` | average socket power
`gfx_voltage` | voltage gfx
`soc_voltage` | voltage soc
`mem_voltage` | voltage mem
`power_limit` | power limit
Exceptions that can be thrown by `amdsmi_get_power_info` function:
@@ -697,8 +700,11 @@ try:
else:
for device in devices:
power_measure = amdsmi_get_power_info(device)
print(power_measure['current_socket_power'])
print(power_measure['average_socket_power'])
print(power_measure['gfx_voltage'])
print(power_measure['soc_voltage'])
print(power_measure['mem_voltage'])
print(power_measure['power_limit'])
except AmdSmiException as e:
print(e)