SWDEV-435197 - Add process table to CLI monitor subcommand
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: Ibe06f4a4be619ae9cba909c2474b0e482eeb87d5
[ROCm/amdsmi commit: 92f014059e]
Этот коммит содержится в:
@@ -8,6 +8,19 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
### Additions
|
||||
|
||||
- **Added optional process table under `amd-smi monitor -q`**.
|
||||
The monitor subcommand within the CLI Tool now has the `-q` option to enable an optional process table underneath the original monitored output.
|
||||
|
||||
```shell
|
||||
$ amd-smi monitor -q
|
||||
GPU POWER GPU_TEMP MEM_TEMP GFX_UTIL GFX_CLOCK MEM_UTIL MEM_CLOCK ENC_UTIL ENC_CLOCK DEC_UTIL DEC_CLOCK SINGLE_ECC DOUBLE_ECC PCIE_REPLAY VRAM_USED VRAM_TOTAL PCIE_BW
|
||||
0 199 W 103 °C 84 °C 99 % 1920 MHz 31 % 1000 MHz N/A 0 MHz N/A 0 MHz 0 0 0 1235 MB 16335 MB N/A Mb/s
|
||||
|
||||
PROCESS INFO:
|
||||
GPU NAME PID GTT_MEM CPU_MEM VRAM_MEM MEM_USAGE GFX ENC
|
||||
0 rvs 1564865 0.0 B 0.0 B 1.1 GB 0.0 B 0 ns 0 ns
|
||||
```
|
||||
|
||||
- **Added Handling to detect VMs with passthrough configurations in CLI Tool**.
|
||||
CLI Tool had only allowed a restricted set of options for Virtual Machines with passthrough GPUs. Now we offer an expanded set of functions availble to passthrough configured GPUs.
|
||||
|
||||
|
||||
@@ -2646,23 +2646,23 @@ class AMDSMICommands():
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage'])
|
||||
|
||||
for usage_metric in process_info['usage']:
|
||||
process_info['usage'][usage_metric] = f"{process_info['usage'][usage_metric]} {engine_usage_unit}"
|
||||
|
||||
for usage_metric in process_info['memory_usage']:
|
||||
process_info['memory_usage'][usage_metric] = self.helpers.convert_bytes_to_readable(process_info['memory_usage'][usage_metric])
|
||||
elif self.logger.is_json_format():
|
||||
process_info['mem_usage'] = {"value" : process_info['mem_usage'],
|
||||
"unit" : memory_usage_unit}
|
||||
process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric])
|
||||
memory_usage_unit = ""
|
||||
|
||||
for usage_metric in process_info['usage']:
|
||||
process_info['usage'][usage_metric] = {"value" : process_info['usage'][usage_metric],
|
||||
"unit" : engine_usage_unit}
|
||||
process_info['mem_usage'] = self.helpers.unit_format(self.logger,
|
||||
process_info['mem_usage'],
|
||||
memory_usage_unit)
|
||||
|
||||
for usage_metric in process_info['memory_usage']:
|
||||
process_info['memory_usage'][usage_metric] = {"value" : process_info['memory_usage'][usage_metric],
|
||||
"unit" : memory_usage_unit}
|
||||
for usage_metric in process_info['usage']:
|
||||
process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger,
|
||||
process_info['usage'][usage_metric],
|
||||
engine_usage_unit)
|
||||
|
||||
for usage_metric in process_info['memory_usage']:
|
||||
process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger,
|
||||
process_info['memory_usage'][usage_metric],
|
||||
memory_usage_unit)
|
||||
|
||||
filtered_process_values.append({'process_info': process_info})
|
||||
|
||||
@@ -3967,7 +3967,7 @@ class AMDSMICommands():
|
||||
def monitor(self, args, multiple_devices=False, watching_output=False, gpu=None,
|
||||
watch=None, watch_time=None, iterations=None, power_usage=None,
|
||||
temperature=None, gfx_util=None, mem_util=None, encoder=None, decoder=None,
|
||||
ecc=None, vram_usage=None, pcie=None):
|
||||
ecc=None, vram_usage=None, pcie=None, process=None):
|
||||
""" Populate a table with each GPU as an index to rows of targeted data
|
||||
|
||||
Args:
|
||||
@@ -3986,6 +3986,7 @@ class AMDSMICommands():
|
||||
ecc (bool, optional): Value override for args.ecc. Defaults to None.
|
||||
vram_usage (bool, optional): Value override for args.vram_usage. Defaults to None.
|
||||
pcie (bool, optional): Value override for args.pcie. Defaults to None.
|
||||
process (bool, optional): Value override for args.process. Defaults to None.
|
||||
|
||||
Raises:
|
||||
ValueError: Value error if no gpu value is provided
|
||||
@@ -4023,12 +4024,15 @@ class AMDSMICommands():
|
||||
args.vram_usage = vram_usage
|
||||
if pcie:
|
||||
args.pcie = pcie
|
||||
if process:
|
||||
args.process = process
|
||||
|
||||
# Handle No GPU passed
|
||||
if args.gpu == None:
|
||||
args.gpu = self.device_handles
|
||||
|
||||
# If all arguments are False, the print all values
|
||||
# Don't include process in this logic as it's an optional edge case
|
||||
if not any([args.power_usage, args.temperature, args.gfx, args.mem,
|
||||
args.encoder, args.decoder, args.ecc,
|
||||
args.vram_usage, args.pcie]):
|
||||
@@ -4049,23 +4053,28 @@ class AMDSMICommands():
|
||||
for gpu in args.gpu:
|
||||
stored_gpus.append(gpu)
|
||||
|
||||
# Store output from multiple devices
|
||||
# Store output from multiple devices without printing to console
|
||||
for device_handle in args.gpu:
|
||||
self.monitor(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
|
||||
|
||||
# Reload original gpus
|
||||
args.gpu = stored_gpus
|
||||
|
||||
# Print multiple device output
|
||||
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True)
|
||||
dual_csv_output = False
|
||||
if args.process:
|
||||
if self.logger.is_csv_format():
|
||||
dual_csv_output = True
|
||||
|
||||
# Flush the output
|
||||
self.logger.print_output(multiple_device_enabled=True,
|
||||
watching_output=watching_output,
|
||||
tabular=True,
|
||||
dual_csv_output=dual_csv_output)
|
||||
|
||||
# Add output to total watch output and clear multiple device output
|
||||
if watching_output:
|
||||
self.logger.store_watch_output(multiple_device_enabled=True)
|
||||
|
||||
# Flush the watching output
|
||||
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True)
|
||||
|
||||
return
|
||||
elif len(args.gpu) == 1:
|
||||
args.gpu = args.gpu[0]
|
||||
@@ -4077,15 +4086,11 @@ class AMDSMICommands():
|
||||
# Get gpu_id for logging
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu)
|
||||
|
||||
# Clear the table header
|
||||
self.logger.table_header = ''
|
||||
|
||||
# Store timestamp for watch output
|
||||
# Reset the table header and store the timestamp if watch output is enabled
|
||||
self.logger.table_header = 'GPU'
|
||||
if watching_output:
|
||||
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
|
||||
self.logger.table_header += 'TIMESTAMP'.rjust(10) + ' '
|
||||
|
||||
self.logger.table_header += 'GPU'
|
||||
self.logger.table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.table_header
|
||||
|
||||
if args.power_usage:
|
||||
try:
|
||||
@@ -4332,16 +4337,98 @@ class AMDSMICommands():
|
||||
|
||||
self.logger.store_output(args.gpu, 'values', monitor_values)
|
||||
|
||||
# intialize dual_csv_format; applicable to process only
|
||||
dual_csv_output = False
|
||||
|
||||
# Store process list seperately
|
||||
if args.process:
|
||||
# Populate initial processes
|
||||
try:
|
||||
process_list = amdsmi_interface.amdsmi_get_gpu_process_list(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
logging.debug("Failed to get process list for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
raise e
|
||||
|
||||
# Clean processes dictionary
|
||||
filtered_process_values = []
|
||||
for process_info in process_list:
|
||||
process_info['mem_usage'] = process_info.pop('mem')
|
||||
process_info['usage'] = process_info.pop('engine_usage')
|
||||
|
||||
engine_usage_unit = "ns"
|
||||
memory_usage_unit = "B"
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage'])
|
||||
for usage_metric in process_info['memory_usage']:
|
||||
process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric])
|
||||
memory_usage_unit = ""
|
||||
|
||||
process_info['mem_usage'] = self.helpers.unit_format(self.logger,
|
||||
process_info['mem_usage'],
|
||||
memory_usage_unit)
|
||||
|
||||
for usage_metric in process_info['usage']:
|
||||
process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger,
|
||||
process_info['usage'][usage_metric],
|
||||
engine_usage_unit)
|
||||
|
||||
for usage_metric in process_info['memory_usage']:
|
||||
process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger,
|
||||
process_info['memory_usage'][usage_metric],
|
||||
memory_usage_unit)
|
||||
|
||||
filtered_process_values.append({'process_info': process_info})
|
||||
|
||||
# If no processes are populated then we populate an N/A placeholder
|
||||
if not filtered_process_values:
|
||||
logging.debug("Monitor - Failed to detect any process on gpu %s", gpu_id)
|
||||
filtered_process_values.append({'process_info': "N/A"})
|
||||
|
||||
for index, process in enumerate(filtered_process_values):
|
||||
if process['process_info'] == "N/A":
|
||||
filtered_process_values[index]['process_info'] = "No running processes detected"
|
||||
|
||||
# Build the process table's title and header
|
||||
self.logger.secondary_table_title = "PROCESS INFO"
|
||||
self.logger.secondary_table_header = 'GPU'.rjust(3) + "NAME".rjust(22) + "PID".rjust(9) + "GTT_MEM".rjust(10) + \
|
||||
"CPU_MEM".rjust(10) + "VRAM_MEM".rjust(10) + "MEM_USAGE".rjust(11) + \
|
||||
"GFX".rjust(8) + "ENC".rjust(8)
|
||||
|
||||
if watching_output:
|
||||
self.logger.secondary_table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.secondary_table_header
|
||||
|
||||
logging.debug(f"Monitor - Process Info for GPU {gpu_id} | {filtered_process_values}")
|
||||
|
||||
if self.logger.is_json_format():
|
||||
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
# Print out process in flattened format
|
||||
# The logger detects if process list is present and pulls it out and prints
|
||||
# that table with timestamp, gpu, and prints headers separately
|
||||
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
|
||||
|
||||
if self.logger.is_csv_format():
|
||||
dual_csv_output = True
|
||||
# The logger detects if process list is present and pulls it out and prints
|
||||
# that table with timestamp, gpu, and prints headers separately
|
||||
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
|
||||
|
||||
# Now handling the single gpu case only
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
return # Skip printing when there are multiple devices
|
||||
return
|
||||
|
||||
self.logger.print_output(watching_output=watching_output, tabular=True)
|
||||
|
||||
if watching_output: # End of single gpu add to watch_output
|
||||
if watching_output and not self.logger.destination == "stdout": # End of single gpu add to watch_output
|
||||
self.logger.store_watch_output(multiple_device_enabled=False)
|
||||
|
||||
|
||||
self.logger.print_output(multiple_device_enabled=False, watching_output=watching_output, tabular=True, dual_csv_output=dual_csv_output)
|
||||
|
||||
|
||||
def rocm_smi(self, args):
|
||||
print("Placeholder for rocm-smi legacy commands")
|
||||
|
||||
|
||||
@@ -738,7 +738,7 @@ class AMDSMIHelpers():
|
||||
if logger.is_json_format():
|
||||
return {"value": value, "unit": unit}
|
||||
if logger.is_human_readable_format():
|
||||
return f"{value} {unit}"
|
||||
return f"{value} {unit}".rstrip()
|
||||
return f"{value}"
|
||||
|
||||
class SI_Unit(float, Enum):
|
||||
|
||||
@@ -38,8 +38,10 @@ class AMDSMILogger():
|
||||
self.watch_output = []
|
||||
self.format = format # csv, json, or human_readable
|
||||
self.destination = destination # stdout, path to a file (append)
|
||||
self.table_header = ""
|
||||
self.table_title = ""
|
||||
self.table_header = ""
|
||||
self.secondary_table_title = ""
|
||||
self.secondary_table_header = ""
|
||||
self.helpers = AMDSMIHelpers()
|
||||
|
||||
|
||||
@@ -102,47 +104,89 @@ class AMDSMILogger():
|
||||
return output_dict
|
||||
|
||||
|
||||
def _convert_json_to_human_readable(self, json_object: Dict[str, any], tabular=False):
|
||||
def _convert_json_to_tabular(self, json_object: Dict[str, any]):
|
||||
# TODO make dynamic
|
||||
if tabular:
|
||||
table_values = ''
|
||||
for key, value in json_object.items():
|
||||
value = str(value)
|
||||
if key == 'gpu':
|
||||
table_values += value.rjust(3)
|
||||
elif key == 'timestamp':
|
||||
table_values += value.rjust(10) + ' '
|
||||
elif key == 'power_usage':
|
||||
table_values += value.rjust(7)
|
||||
elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'):
|
||||
table_values += value.rjust(11)
|
||||
elif key == 'vram_total' or 'ecc' in key:
|
||||
table_values += value.rjust(12)
|
||||
elif key in ['pcie_replay']:
|
||||
table_values += value.rjust(13)
|
||||
# Only for handling topology tables
|
||||
elif 'gpu_' in key:
|
||||
table_values += value.ljust(13)
|
||||
# Only for handling xgmi tables
|
||||
elif key == "gpu#":
|
||||
table_values += value.ljust(7)
|
||||
elif key == "bdf":
|
||||
table_values += value.ljust(13)
|
||||
elif "bdf_" in key:
|
||||
table_values += value.ljust(13)
|
||||
elif key == "bit_rate":
|
||||
table_values += value.ljust(9)
|
||||
elif key == "max_bandwidth":
|
||||
table_values += value.ljust(14)
|
||||
elif key == "link_type":
|
||||
table_values += value.ljust(10)
|
||||
elif key == "RW":
|
||||
table_values += " " + value.ljust(52)
|
||||
# Default spacing
|
||||
else:
|
||||
table_values += value.rjust(10)
|
||||
return table_values.rstrip()
|
||||
table_values = ''
|
||||
stored_gpu = ''
|
||||
stored_timestamp = ''
|
||||
for key, value in json_object.items():
|
||||
string_value = str(value)
|
||||
if key == 'gpu':
|
||||
stored_gpu = string_value
|
||||
table_values += string_value.rjust(3)
|
||||
elif key == 'timestamp':
|
||||
stored_timestamp = string_value
|
||||
table_values += string_value.rjust(10) + ' '
|
||||
elif key == 'power_usage':
|
||||
table_values += string_value.rjust(7)
|
||||
elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'):
|
||||
table_values += string_value.rjust(11)
|
||||
elif key == 'vram_total' or 'ecc' in key:
|
||||
table_values += string_value.rjust(12)
|
||||
elif key in ['pcie_replay']:
|
||||
table_values += string_value.rjust(13)
|
||||
# Only for handling topology tables
|
||||
elif 'gpu_' in key:
|
||||
table_values += string_value.ljust(13)
|
||||
# Only for handling xgmi tables
|
||||
elif key == "gpu#":
|
||||
table_values += string_value.ljust(7)
|
||||
elif key == "bdf":
|
||||
table_values += string_value.ljust(13)
|
||||
elif "bdf_" in key:
|
||||
table_values += string_value.ljust(13)
|
||||
elif key == "bit_rate":
|
||||
table_values += string_value.ljust(9)
|
||||
elif key == "max_bandwidth":
|
||||
table_values += string_value.ljust(14)
|
||||
elif key == "link_type":
|
||||
table_values += string_value.ljust(10)
|
||||
elif key == "RW":
|
||||
table_values += " " + string_value.ljust(52)
|
||||
elif key == "process_list":
|
||||
#Add an additional padding between the first instance of GPU and NAME
|
||||
table_values += ' '
|
||||
for process_dict in value:
|
||||
if process_dict['process_info'] == "No running processes detected":
|
||||
# Add N/A for empty process_info
|
||||
table_values += "N/A".rjust(20) + "N/A".rjust(9) + "N/A".rjust(10) + \
|
||||
"N/A".rjust(10) + "N/A".rjust(10) + "N/A".rjust(11) + \
|
||||
"N/A".rjust(8) + "N/A".rjust(8) + '\n'
|
||||
else:
|
||||
for process_key, process_value in process_dict['process_info'].items():
|
||||
string_process_value = str(process_value)
|
||||
if process_key == "name":
|
||||
# Truncate name if too long
|
||||
process_name = string_process_value[:20]
|
||||
if process_name == "":
|
||||
process_name = "N/A"
|
||||
table_values += process_name.rjust(20)
|
||||
elif process_key == "pid":
|
||||
table_values += string_process_value.rjust(9)
|
||||
elif process_key == "memory_usage":
|
||||
for memory_key, memory_value in process_value.items():
|
||||
table_values += str(memory_value).rjust(10)
|
||||
elif process_key == "mem_usage":
|
||||
table_values += string_process_value.rjust(11)
|
||||
elif process_key == "usage":
|
||||
for usage_key, usage_value in process_value.items():
|
||||
table_values += str(usage_value).rjust(8)
|
||||
# Add the stored gpu and stored timestamp to the next line
|
||||
table_values += '\n'
|
||||
if stored_timestamp:
|
||||
table_values += stored_timestamp.ljust(10) + ' '
|
||||
table_values += stored_gpu.rjust(3) + ' '
|
||||
|
||||
# Remove excess two values after a new line in table_values
|
||||
table_values = table_values[:table_values.rfind('\n')]
|
||||
table_values += '\n'
|
||||
# Default spacing
|
||||
else:
|
||||
table_values += string_value.rjust(10)
|
||||
return table_values.rstrip()
|
||||
|
||||
|
||||
def _convert_json_to_human_readable(self, json_object: Dict[str, any]):
|
||||
# First Capitalize all keys in the json object
|
||||
capitalized_json = self._capitalize_keys(json_object)
|
||||
|
||||
@@ -203,9 +247,7 @@ class AMDSMILogger():
|
||||
|
||||
Args:
|
||||
target_dict (dict): Dictionary to flatten
|
||||
parent_key (str):
|
||||
"""
|
||||
# print(target_dict)
|
||||
output_dict = {}
|
||||
# First flatten out values
|
||||
|
||||
@@ -398,7 +440,7 @@ class AMDSMILogger():
|
||||
self.output = {}
|
||||
|
||||
|
||||
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False):
|
||||
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False):
|
||||
""" Print current output acording to format and then destination
|
||||
params:
|
||||
multiple_device_enabled (bool) - True if printing output from
|
||||
@@ -411,12 +453,19 @@ class AMDSMILogger():
|
||||
self._print_json_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
elif self.is_csv_format():
|
||||
self._print_csv_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
if dual_csv_output:
|
||||
self._print_dual_csv_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
else:
|
||||
self._print_csv_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
elif self.is_human_readable_format():
|
||||
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output,
|
||||
tabular=tabular)
|
||||
# If tabular output is enabled, redirect to _print_tabular_output
|
||||
if tabular:
|
||||
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
|
||||
else:
|
||||
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
|
||||
|
||||
def _print_json_output(self, multiple_device_enabled=False, watching_output=False):
|
||||
@@ -493,19 +542,178 @@ class AMDSMILogger():
|
||||
writer.writerows(stored_csv_output)
|
||||
|
||||
|
||||
def _print_dual_csv_output(self, multiple_device_enabled=False, watching_output=False):
|
||||
if multiple_device_enabled:
|
||||
stored_csv_output = self.multiple_device_output
|
||||
else:
|
||||
if not isinstance(self.output, list):
|
||||
stored_csv_output = [self.output]
|
||||
|
||||
primary_csv_output = []
|
||||
secondary_csv_output = []
|
||||
|
||||
if stored_csv_output:
|
||||
# Split stored_csv_output into primary_csv and secondary_csv
|
||||
for output_dict in stored_csv_output:
|
||||
if 'process_list' in output_dict:
|
||||
# Add a new entry for each process_info
|
||||
for process_info_dict in output_dict['process_list']:
|
||||
secondary_output_dict = {}
|
||||
if watching_output:
|
||||
secondary_output_dict['timestamp'] = output_dict['timestamp']
|
||||
secondary_output_dict['gpu'] = output_dict['gpu']
|
||||
if isinstance(process_info_dict["process_info"], dict):
|
||||
for process_field, process_value in process_info_dict["process_info"].items():
|
||||
if isinstance(process_value, dict):
|
||||
for key, value in process_value.items():
|
||||
secondary_output_dict[key] = value
|
||||
else:
|
||||
secondary_output_dict[process_field] = process_value
|
||||
else:
|
||||
# Handle no process found case
|
||||
secondary_output_dict["process_info"] = process_info_dict["process_info"]
|
||||
secondary_csv_output.append(secondary_output_dict)
|
||||
primary_output_dict = {}
|
||||
for key, value in output_dict.items():
|
||||
if key != 'process_list':
|
||||
primary_output_dict[key] = value
|
||||
primary_csv_output.append(primary_output_dict)
|
||||
|
||||
# Ensure uniform data within primary and secondary csv outputs
|
||||
if primary_csv_output:
|
||||
primary_keys = set()
|
||||
for output in primary_csv_output:
|
||||
for key in output:
|
||||
primary_keys.add(key)
|
||||
# insert empty data to align with keys that may not exist
|
||||
for index, output_dict in enumerate(primary_csv_output):
|
||||
remaining_keys = primary_keys - set(output_dict.keys())
|
||||
for key in remaining_keys:
|
||||
primary_csv_output[index][key] = "N/A"
|
||||
if secondary_csv_output:
|
||||
secondary_keys = set()
|
||||
for output in secondary_csv_output:
|
||||
for key in output:
|
||||
secondary_keys.add(key)
|
||||
# insert empty data to align with keys that may not exist
|
||||
for index, output_dict in enumerate(secondary_csv_output):
|
||||
remaining_keys = secondary_keys - set(output_dict.keys())
|
||||
for key in remaining_keys:
|
||||
secondary_csv_output[index][key] = "N/A"
|
||||
|
||||
if self.destination == 'stdout':
|
||||
if primary_csv_output:
|
||||
# Get the header as a list of the first element to maintain order
|
||||
csv_header = primary_csv_output[0].keys()
|
||||
csv_stdout_output = self.CsvStdoutBuilder()
|
||||
writer = csv.DictWriter(csv_stdout_output, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(primary_csv_output)
|
||||
print(str(csv_stdout_output))
|
||||
if secondary_csv_output:
|
||||
# Get the header as a list of the first element to maintain order
|
||||
csv_header = secondary_csv_output[0].keys()
|
||||
csv_stdout_output = self.CsvStdoutBuilder()
|
||||
writer = csv.DictWriter(csv_stdout_output, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(secondary_csv_output)
|
||||
print(str(csv_stdout_output))
|
||||
if watching_output:
|
||||
print()
|
||||
else:
|
||||
if watching_output:
|
||||
with self.destination.open('w', newline = '') as output_file:
|
||||
primary_csv_output = []
|
||||
secondary_csv_output = []
|
||||
if self.watch_output:
|
||||
# Split watch_output into primary_csv and secondary_csv
|
||||
for output_dict in self.watch_output:
|
||||
if 'process_list' in output_dict:
|
||||
# Add a new entry for each process_info
|
||||
for process_info_dict in output_dict['process_list']:
|
||||
secondary_output_dict = {}
|
||||
if watching_output:
|
||||
secondary_output_dict['timestamp'] = output_dict['timestamp']
|
||||
secondary_output_dict['gpu'] = output_dict['gpu']
|
||||
if isinstance(process_info_dict["process_info"], dict):
|
||||
for process_field, process_value in process_info_dict["process_info"].items():
|
||||
if isinstance(process_value, dict):
|
||||
for key, value in process_value.items():
|
||||
secondary_output_dict[key] = value
|
||||
else:
|
||||
secondary_output_dict[process_field] = process_value
|
||||
else:
|
||||
# Handle no process found case
|
||||
secondary_output_dict["process_info"] = process_info_dict["process_info"]
|
||||
secondary_csv_output.append(secondary_output_dict)
|
||||
primary_output_dict = {}
|
||||
for key, value in output_dict.items():
|
||||
if key != 'process_list':
|
||||
primary_output_dict[key] = value
|
||||
primary_csv_output.append(primary_output_dict)
|
||||
|
||||
# Ensure uniform data within primary and secondary csv outputs
|
||||
if primary_csv_output:
|
||||
primary_keys = set()
|
||||
for output in primary_csv_output:
|
||||
for key in output:
|
||||
primary_keys.add(key)
|
||||
# insert empty data to align with keys that may not exist
|
||||
for index, output_dict in enumerate(primary_csv_output):
|
||||
remaining_keys = primary_keys - set(output_dict.keys())
|
||||
for key in remaining_keys:
|
||||
primary_csv_output[index][key] = "N/A"
|
||||
if secondary_csv_output:
|
||||
secondary_keys = set()
|
||||
for output in secondary_csv_output:
|
||||
for key in output:
|
||||
secondary_keys.add(key)
|
||||
# insert empty data to align with keys that may not exist
|
||||
for index, output_dict in enumerate(secondary_csv_output):
|
||||
remaining_keys = secondary_keys - set(output_dict.keys())
|
||||
for key in remaining_keys:
|
||||
secondary_csv_output[index][key] = "N/A"
|
||||
|
||||
if primary_csv_output:
|
||||
# Get the header as a list of the first element to maintain order
|
||||
csv_header = primary_csv_output[0].keys()
|
||||
writer = csv.DictWriter(output_file, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(primary_csv_output)
|
||||
if secondary_csv_output:
|
||||
output_file.write("\n")
|
||||
csv_header = secondary_csv_output[0].keys()
|
||||
writer = csv.DictWriter(output_file, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(secondary_csv_output)
|
||||
else:
|
||||
with self.destination.open('a', newline = '') as output_file:
|
||||
if primary_csv_output:
|
||||
# Get the header as a list of the first element to maintain order
|
||||
csv_header = primary_csv_output[0].keys()
|
||||
writer = csv.DictWriter(output_file, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(primary_csv_output)
|
||||
if secondary_csv_output:
|
||||
output_file.write("\n")
|
||||
csv_header = secondary_csv_output[0].keys()
|
||||
writer = csv.DictWriter(output_file, csv_header)
|
||||
writer.writeheader()
|
||||
writer.writerows(secondary_csv_output)
|
||||
|
||||
def _print_human_readable_output(self, multiple_device_enabled=False, watching_output=False, tabular=False):
|
||||
human_readable_output = ''
|
||||
# If tabular output is enabled, redirect to _print_tabular_output
|
||||
if tabular:
|
||||
if self.table_title:
|
||||
human_readable_output += self.table_title + ':\n'
|
||||
human_readable_output += self.table_header + '\n'
|
||||
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
|
||||
return
|
||||
|
||||
human_readable_output = ''
|
||||
|
||||
if multiple_device_enabled:
|
||||
for output in self.multiple_device_output:
|
||||
human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular)
|
||||
human_readable_output += '\n'
|
||||
for device_output in self.multiple_device_output:
|
||||
human_readable_output += self._convert_json_to_human_readable(device_output) + '\n'
|
||||
else:
|
||||
human_readable_output += self._convert_json_to_human_readable(self.output, tabular=tabular)
|
||||
human_readable_output += self._convert_json_to_human_readable(self.output)
|
||||
|
||||
if self.destination == 'stdout':
|
||||
try:
|
||||
@@ -518,15 +726,131 @@ class AMDSMILogger():
|
||||
if watching_output:
|
||||
with self.destination.open('w') as output_file:
|
||||
human_readable_output = ''
|
||||
if tabular:
|
||||
if self.table_title:
|
||||
human_readable_output += self.table_title + '\n'
|
||||
human_readable_output += self.table_header + '\n'
|
||||
for output in self.watch_output:
|
||||
human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular)
|
||||
if tabular:
|
||||
human_readable_output += '\n'
|
||||
human_readable_output += self._convert_json_to_human_readable(output)
|
||||
output_file.write(human_readable_output + '\n')
|
||||
else:
|
||||
with self.destination.open('a') as output_file:
|
||||
output_file.write(human_readable_output + '\n')
|
||||
|
||||
|
||||
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False):
|
||||
primary_table = ''
|
||||
secondary_table = ''
|
||||
|
||||
# Populate primary table without process_list
|
||||
# Populate secondary table with process_list if exists
|
||||
if multiple_device_enabled and self.multiple_device_output:
|
||||
for device_output in self.multiple_device_output:
|
||||
if 'process_list' in device_output:
|
||||
process_table_dict = {}
|
||||
if watching_output:
|
||||
process_table_dict['timestamp'] = device_output['timestamp']
|
||||
process_table_dict['gpu'] = device_output['gpu']
|
||||
process_table_dict['process_list'] = device_output['process_list']
|
||||
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
|
||||
# Add primary table keys without process_list
|
||||
primary_table_output = {}
|
||||
for key, value in device_output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
else: # Single device output
|
||||
if 'process_list' in self.output:
|
||||
process_table_dict = {}
|
||||
if watching_output:
|
||||
process_table_dict['timestamp'] = self.output['timestamp']
|
||||
process_table_dict['gpu'] = self.output['gpu']
|
||||
process_table_dict['process_list'] = self.output['process_list']
|
||||
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
|
||||
# Add primary table keys without process_list
|
||||
primary_table_output = {}
|
||||
for key, value in self.output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
primary_table = primary_table.rstrip()
|
||||
secondary_table = secondary_table.rstrip()
|
||||
|
||||
# Add primary table title and header to primary_table
|
||||
if primary_table:
|
||||
primary_table_heading = ''
|
||||
if self.table_title:
|
||||
primary_table_heading = self.table_title + ':\n'
|
||||
primary_table_heading += self.table_header + '\n'
|
||||
primary_table = primary_table_heading + primary_table
|
||||
|
||||
# Add secondary table title and header to secondary_table
|
||||
# Currently just process_info uses this logic
|
||||
if secondary_table:
|
||||
secondary_table_heading = ''
|
||||
if self.secondary_table_title:
|
||||
secondary_table_heading = '\n' + self.secondary_table_title + ':\n'
|
||||
secondary_table_heading += self.secondary_table_header + '\n'
|
||||
secondary_table = secondary_table_heading + secondary_table
|
||||
|
||||
if self.destination == 'stdout':
|
||||
try:
|
||||
# printing as unicode may fail if locale is not set properly
|
||||
print(primary_table)
|
||||
if secondary_table:
|
||||
print(secondary_table)
|
||||
if watching_output:
|
||||
print("\n")
|
||||
except UnicodeEncodeError:
|
||||
# print as ascii, ignore incompatible characters
|
||||
print(primary_table.encode('ascii', 'ignore').decode('ascii'))
|
||||
if secondary_table:
|
||||
print(secondary_table.encode('ascii', 'ignore').decode('ascii'))
|
||||
if watching_output:
|
||||
print("\n")
|
||||
else:
|
||||
if watching_output: # Write all stored watched output to a file
|
||||
with self.destination.open('w') as output_file:
|
||||
primary_table = ''
|
||||
secondary_table = ''
|
||||
# Add process_list to the secondary_table
|
||||
# Add remaining watch_output to the primary_table
|
||||
for device_output in self.watch_output:
|
||||
# if process_list is detected in device_output store in secondary_table
|
||||
if 'process_list' in device_output:
|
||||
process_table_dict = {
|
||||
'timestamp': device_output['timestamp'],
|
||||
'gpu': device_output['gpu'],
|
||||
'process_list': device_output['process_list']
|
||||
}
|
||||
secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n'
|
||||
# Add primary table keys without process_list
|
||||
primary_table_output = {}
|
||||
for key, value in device_output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
primary_table = primary_table.rstrip() # Remove trailing new line
|
||||
secondary_table = secondary_table.rstrip()
|
||||
|
||||
# Add primary table title and header to primary_table
|
||||
if primary_table:
|
||||
primary_table_heading = ''
|
||||
if self.table_title:
|
||||
primary_table_heading = self.table_title + ':\n'
|
||||
primary_table_heading += self.table_header + '\n'
|
||||
primary_table = primary_table_heading + primary_table
|
||||
|
||||
# Add secondary table title and header to secondary_table
|
||||
# Currently just process_info uses this logic
|
||||
if secondary_table:
|
||||
secondary_table_heading = ''
|
||||
if self.secondary_table_title:
|
||||
secondary_table_heading = '\n' + self.secondary_table_title + ':\n'
|
||||
secondary_table_heading += self.secondary_table_header + '\n'
|
||||
secondary_table = secondary_table_heading + secondary_table
|
||||
|
||||
# Write both full tables to the file
|
||||
output_file.write(primary_table)
|
||||
if secondary_table:
|
||||
output_file.write("\n" + secondary_table)
|
||||
else: # Write all singular output to a file
|
||||
with self.destination.open('a') as output_file:
|
||||
output_file.write(primary_table + '\n')
|
||||
output_file.write(secondary_table)
|
||||
|
||||
@@ -1126,6 +1126,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
ecc_help = "Monitor ECC single bit, ECC double bit, and PCIe replay error counts"
|
||||
mem_usage_help = "Monitor memory usage in MB"
|
||||
pcie_bandwidth_help = "Monitor PCIe bandwidth in Mb/s"
|
||||
process_help = "Enable Process information table below monitor output"
|
||||
|
||||
# Create monitor subparser
|
||||
monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help)
|
||||
@@ -1148,6 +1149,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
monitor_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
|
||||
monitor_parser.add_argument('-v', '--vram-usage', action='store_true', required=False, help=mem_usage_help)
|
||||
monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_bandwidth_help)
|
||||
monitor_parser.add_argument('-q', '--process', action='store_true', required=False, help=process_help)
|
||||
|
||||
|
||||
def _add_rocm_smi_parser(self, subparsers, func):
|
||||
|
||||
@@ -677,8 +677,11 @@ Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`current_socket_power` | current socket power
|
||||
`average_socket_power` | average socket power
|
||||
`gfx_voltage` | voltage gfx
|
||||
`soc_voltage` | voltage soc
|
||||
`mem_voltage` | voltage mem
|
||||
`power_limit` | power limit
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_get_power_info` function:
|
||||
@@ -697,8 +700,11 @@ try:
|
||||
else:
|
||||
for device in devices:
|
||||
power_measure = amdsmi_get_power_info(device)
|
||||
print(power_measure['current_socket_power'])
|
||||
print(power_measure['average_socket_power'])
|
||||
print(power_measure['gfx_voltage'])
|
||||
print(power_measure['soc_voltage'])
|
||||
print(power_measure['mem_voltage'])
|
||||
print(power_measure['power_limit'])
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
|
||||
Ссылка в новой задаче
Block a user