diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 75c386e353..cbd9fc2a62 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -8,6 +8,19 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Additions +- **Added optional process table under `amd-smi monitor -q`**. +The monitor subcommand within the CLI Tool now has the `-q` option to enable an optional process table underneath the original monitored output. + +```shell +$ amd-smi monitor -q +GPU POWER GPU_TEMP MEM_TEMP GFX_UTIL GFX_CLOCK MEM_UTIL MEM_CLOCK ENC_UTIL ENC_CLOCK DEC_UTIL DEC_CLOCK SINGLE_ECC DOUBLE_ECC PCIE_REPLAY VRAM_USED VRAM_TOTAL PCIE_BW + 0 199 W 103 °C 84 °C 99 % 1920 MHz 31 % 1000 MHz N/A 0 MHz N/A 0 MHz 0 0 0 1235 MB 16335 MB N/A Mb/s + +PROCESS INFO: +GPU NAME PID GTT_MEM CPU_MEM VRAM_MEM MEM_USAGE GFX ENC + 0 rvs 1564865 0.0 B 0.0 B 1.1 GB 0.0 B 0 ns 0 ns +``` + - **Added Handling to detect VMs with passthrough configurations in CLI Tool**. CLI Tool had only allowed a restricted set of options for Virtual Machines with passthrough GPUs. Now we offer an expanded set of functions availble to passthrough configured GPUs. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index e68bc5dddb..6a078ffbe6 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -2646,23 +2646,23 @@ class AMDSMICommands(): if self.logger.is_human_readable_format(): process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage']) - - for usage_metric in process_info['usage']: - process_info['usage'][usage_metric] = f"{process_info['usage'][usage_metric]} {engine_usage_unit}" - for usage_metric in process_info['memory_usage']: - process_info['memory_usage'][usage_metric] = self.helpers.convert_bytes_to_readable(process_info['memory_usage'][usage_metric]) - elif self.logger.is_json_format(): - process_info['mem_usage'] = {"value" : process_info['mem_usage'], - "unit" : memory_usage_unit} + process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric]) + memory_usage_unit = "" - for usage_metric in process_info['usage']: - process_info['usage'][usage_metric] = {"value" : process_info['usage'][usage_metric], - "unit" : engine_usage_unit} + process_info['mem_usage'] = self.helpers.unit_format(self.logger, + process_info['mem_usage'], + memory_usage_unit) - for usage_metric in process_info['memory_usage']: - process_info['memory_usage'][usage_metric] = {"value" : process_info['memory_usage'][usage_metric], - "unit" : memory_usage_unit} + for usage_metric in process_info['usage']: + process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger, + process_info['usage'][usage_metric], + engine_usage_unit) + + for usage_metric in process_info['memory_usage']: + process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger, + process_info['memory_usage'][usage_metric], + memory_usage_unit) filtered_process_values.append({'process_info': process_info}) @@ -3967,7 +3967,7 @@ class AMDSMICommands(): def monitor(self, args, multiple_devices=False, watching_output=False, gpu=None, watch=None, watch_time=None, iterations=None, power_usage=None, temperature=None, gfx_util=None, mem_util=None, encoder=None, decoder=None, - ecc=None, vram_usage=None, pcie=None): + ecc=None, vram_usage=None, pcie=None, process=None): """ Populate a table with each GPU as an index to rows of targeted data Args: @@ -3986,6 +3986,7 @@ class AMDSMICommands(): ecc (bool, optional): Value override for args.ecc. Defaults to None. vram_usage (bool, optional): Value override for args.vram_usage. Defaults to None. pcie (bool, optional): Value override for args.pcie. Defaults to None. + process (bool, optional): Value override for args.process. Defaults to None. Raises: ValueError: Value error if no gpu value is provided @@ -4023,12 +4024,15 @@ class AMDSMICommands(): args.vram_usage = vram_usage if pcie: args.pcie = pcie + if process: + args.process = process # Handle No GPU passed if args.gpu == None: args.gpu = self.device_handles # If all arguments are False, the print all values + # Don't include process in this logic as it's an optional edge case if not any([args.power_usage, args.temperature, args.gfx, args.mem, args.encoder, args.decoder, args.ecc, args.vram_usage, args.pcie]): @@ -4049,23 +4053,28 @@ class AMDSMICommands(): for gpu in args.gpu: stored_gpus.append(gpu) - # Store output from multiple devices + # Store output from multiple devices without printing to console for device_handle in args.gpu: self.monitor(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle) # Reload original gpus args.gpu = stored_gpus - # Print multiple device output - self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True) + dual_csv_output = False + if args.process: + if self.logger.is_csv_format(): + dual_csv_output = True + + # Flush the output + self.logger.print_output(multiple_device_enabled=True, + watching_output=watching_output, + tabular=True, + dual_csv_output=dual_csv_output) # Add output to total watch output and clear multiple device output if watching_output: self.logger.store_watch_output(multiple_device_enabled=True) - # Flush the watching output - self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output, tabular=True) - return elif len(args.gpu) == 1: args.gpu = args.gpu[0] @@ -4077,15 +4086,11 @@ class AMDSMICommands(): # Get gpu_id for logging gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu) - # Clear the table header - self.logger.table_header = '' - - # Store timestamp for watch output + # Reset the table header and store the timestamp if watch output is enabled + self.logger.table_header = 'GPU' if watching_output: self.logger.store_output(args.gpu, 'timestamp', int(time.time())) - self.logger.table_header += 'TIMESTAMP'.rjust(10) + ' ' - - self.logger.table_header += 'GPU' + self.logger.table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.table_header if args.power_usage: try: @@ -4332,16 +4337,98 @@ class AMDSMICommands(): self.logger.store_output(args.gpu, 'values', monitor_values) + # intialize dual_csv_format; applicable to process only + dual_csv_output = False + + # Store process list seperately + if args.process: + # Populate initial processes + try: + process_list = amdsmi_interface.amdsmi_get_gpu_process_list(args.gpu) + except amdsmi_exception.AmdSmiLibraryException as e: + if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: + raise PermissionError('Command requires elevation') from e + logging.debug("Failed to get process list for gpu %s | %s", gpu_id, e.get_error_info()) + raise e + + # Clean processes dictionary + filtered_process_values = [] + for process_info in process_list: + process_info['mem_usage'] = process_info.pop('mem') + process_info['usage'] = process_info.pop('engine_usage') + + engine_usage_unit = "ns" + memory_usage_unit = "B" + + if self.logger.is_human_readable_format(): + process_info['mem_usage'] = self.helpers.convert_bytes_to_readable(process_info['mem_usage']) + for usage_metric in process_info['memory_usage']: + process_info["memory_usage"][usage_metric] = self.helpers.convert_bytes_to_readable(process_info["memory_usage"][usage_metric]) + memory_usage_unit = "" + + process_info['mem_usage'] = self.helpers.unit_format(self.logger, + process_info['mem_usage'], + memory_usage_unit) + + for usage_metric in process_info['usage']: + process_info['usage'][usage_metric] = self.helpers.unit_format(self.logger, + process_info['usage'][usage_metric], + engine_usage_unit) + + for usage_metric in process_info['memory_usage']: + process_info['memory_usage'][usage_metric] = self.helpers.unit_format(self.logger, + process_info['memory_usage'][usage_metric], + memory_usage_unit) + + filtered_process_values.append({'process_info': process_info}) + + # If no processes are populated then we populate an N/A placeholder + if not filtered_process_values: + logging.debug("Monitor - Failed to detect any process on gpu %s", gpu_id) + filtered_process_values.append({'process_info': "N/A"}) + + for index, process in enumerate(filtered_process_values): + if process['process_info'] == "N/A": + filtered_process_values[index]['process_info'] = "No running processes detected" + + # Build the process table's title and header + self.logger.secondary_table_title = "PROCESS INFO" + self.logger.secondary_table_header = 'GPU'.rjust(3) + "NAME".rjust(22) + "PID".rjust(9) + "GTT_MEM".rjust(10) + \ + "CPU_MEM".rjust(10) + "VRAM_MEM".rjust(10) + "MEM_USAGE".rjust(11) + \ + "GFX".rjust(8) + "ENC".rjust(8) + + if watching_output: + self.logger.secondary_table_header = 'TIMESTAMP'.rjust(10) + ' ' + self.logger.secondary_table_header + + logging.debug(f"Monitor - Process Info for GPU {gpu_id} | {filtered_process_values}") + + if self.logger.is_json_format(): + self.logger.store_output(args.gpu, 'process_list', filtered_process_values) + + if self.logger.is_human_readable_format(): + # Print out process in flattened format + # The logger detects if process list is present and pulls it out and prints + # that table with timestamp, gpu, and prints headers separately + self.logger.store_output(args.gpu, 'process_list', filtered_process_values) + + if self.logger.is_csv_format(): + dual_csv_output = True + # The logger detects if process list is present and pulls it out and prints + # that table with timestamp, gpu, and prints headers separately + self.logger.store_output(args.gpu, 'process_list', filtered_process_values) + + # Now handling the single gpu case only if multiple_devices: self.logger.store_multiple_device_output() - return # Skip printing when there are multiple devices + return - self.logger.print_output(watching_output=watching_output, tabular=True) - - if watching_output: # End of single gpu add to watch_output + if watching_output and not self.logger.destination == "stdout": # End of single gpu add to watch_output self.logger.store_watch_output(multiple_device_enabled=False) + self.logger.print_output(multiple_device_enabled=False, watching_output=watching_output, tabular=True, dual_csv_output=dual_csv_output) + + def rocm_smi(self, args): print("Placeholder for rocm-smi legacy commands") diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index 7db9be1cb1..93beabcb9c 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -738,7 +738,7 @@ class AMDSMIHelpers(): if logger.is_json_format(): return {"value": value, "unit": unit} if logger.is_human_readable_format(): - return f"{value} {unit}" + return f"{value} {unit}".rstrip() return f"{value}" class SI_Unit(float, Enum): diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_logger.py b/projects/amdsmi/amdsmi_cli/amdsmi_logger.py index 5b6e35457b..c0ffe5a8fb 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_logger.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_logger.py @@ -38,8 +38,10 @@ class AMDSMILogger(): self.watch_output = [] self.format = format # csv, json, or human_readable self.destination = destination # stdout, path to a file (append) - self.table_header = "" self.table_title = "" + self.table_header = "" + self.secondary_table_title = "" + self.secondary_table_header = "" self.helpers = AMDSMIHelpers() @@ -102,47 +104,89 @@ class AMDSMILogger(): return output_dict - def _convert_json_to_human_readable(self, json_object: Dict[str, any], tabular=False): + def _convert_json_to_tabular(self, json_object: Dict[str, any]): # TODO make dynamic - if tabular: - table_values = '' - for key, value in json_object.items(): - value = str(value) - if key == 'gpu': - table_values += value.rjust(3) - elif key == 'timestamp': - table_values += value.rjust(10) + ' ' - elif key == 'power_usage': - table_values += value.rjust(7) - elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'): - table_values += value.rjust(11) - elif key == 'vram_total' or 'ecc' in key: - table_values += value.rjust(12) - elif key in ['pcie_replay']: - table_values += value.rjust(13) - # Only for handling topology tables - elif 'gpu_' in key: - table_values += value.ljust(13) - # Only for handling xgmi tables - elif key == "gpu#": - table_values += value.ljust(7) - elif key == "bdf": - table_values += value.ljust(13) - elif "bdf_" in key: - table_values += value.ljust(13) - elif key == "bit_rate": - table_values += value.ljust(9) - elif key == "max_bandwidth": - table_values += value.ljust(14) - elif key == "link_type": - table_values += value.ljust(10) - elif key == "RW": - table_values += " " + value.ljust(52) - # Default spacing - else: - table_values += value.rjust(10) - return table_values.rstrip() + table_values = '' + stored_gpu = '' + stored_timestamp = '' + for key, value in json_object.items(): + string_value = str(value) + if key == 'gpu': + stored_gpu = string_value + table_values += string_value.rjust(3) + elif key == 'timestamp': + stored_timestamp = string_value + table_values += string_value.rjust(10) + ' ' + elif key == 'power_usage': + table_values += string_value.rjust(7) + elif key in ('gfx_clock', 'mem_clock', 'encoder_clock', 'decoder_clock', 'vram_used'): + table_values += string_value.rjust(11) + elif key == 'vram_total' or 'ecc' in key: + table_values += string_value.rjust(12) + elif key in ['pcie_replay']: + table_values += string_value.rjust(13) + # Only for handling topology tables + elif 'gpu_' in key: + table_values += string_value.ljust(13) + # Only for handling xgmi tables + elif key == "gpu#": + table_values += string_value.ljust(7) + elif key == "bdf": + table_values += string_value.ljust(13) + elif "bdf_" in key: + table_values += string_value.ljust(13) + elif key == "bit_rate": + table_values += string_value.ljust(9) + elif key == "max_bandwidth": + table_values += string_value.ljust(14) + elif key == "link_type": + table_values += string_value.ljust(10) + elif key == "RW": + table_values += " " + string_value.ljust(52) + elif key == "process_list": + #Add an additional padding between the first instance of GPU and NAME + table_values += ' ' + for process_dict in value: + if process_dict['process_info'] == "No running processes detected": + # Add N/A for empty process_info + table_values += "N/A".rjust(20) + "N/A".rjust(9) + "N/A".rjust(10) + \ + "N/A".rjust(10) + "N/A".rjust(10) + "N/A".rjust(11) + \ + "N/A".rjust(8) + "N/A".rjust(8) + '\n' + else: + for process_key, process_value in process_dict['process_info'].items(): + string_process_value = str(process_value) + if process_key == "name": + # Truncate name if too long + process_name = string_process_value[:20] + if process_name == "": + process_name = "N/A" + table_values += process_name.rjust(20) + elif process_key == "pid": + table_values += string_process_value.rjust(9) + elif process_key == "memory_usage": + for memory_key, memory_value in process_value.items(): + table_values += str(memory_value).rjust(10) + elif process_key == "mem_usage": + table_values += string_process_value.rjust(11) + elif process_key == "usage": + for usage_key, usage_value in process_value.items(): + table_values += str(usage_value).rjust(8) + # Add the stored gpu and stored timestamp to the next line + table_values += '\n' + if stored_timestamp: + table_values += stored_timestamp.ljust(10) + ' ' + table_values += stored_gpu.rjust(3) + ' ' + # Remove excess two values after a new line in table_values + table_values = table_values[:table_values.rfind('\n')] + table_values += '\n' + # Default spacing + else: + table_values += string_value.rjust(10) + return table_values.rstrip() + + + def _convert_json_to_human_readable(self, json_object: Dict[str, any]): # First Capitalize all keys in the json object capitalized_json = self._capitalize_keys(json_object) @@ -203,9 +247,7 @@ class AMDSMILogger(): Args: target_dict (dict): Dictionary to flatten - parent_key (str): """ - # print(target_dict) output_dict = {} # First flatten out values @@ -398,7 +440,7 @@ class AMDSMILogger(): self.output = {} - def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False): + def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False): """ Print current output acording to format and then destination params: multiple_device_enabled (bool) - True if printing output from @@ -411,12 +453,19 @@ class AMDSMILogger(): self._print_json_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output) elif self.is_csv_format(): - self._print_csv_output(multiple_device_enabled=multiple_device_enabled, - watching_output=watching_output) + if dual_csv_output: + self._print_dual_csv_output(multiple_device_enabled=multiple_device_enabled, + watching_output=watching_output) + else: + self._print_csv_output(multiple_device_enabled=multiple_device_enabled, + watching_output=watching_output) elif self.is_human_readable_format(): - self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled, - watching_output=watching_output, - tabular=tabular) + # If tabular output is enabled, redirect to _print_tabular_output + if tabular: + self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output) + else: + self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled, + watching_output=watching_output) def _print_json_output(self, multiple_device_enabled=False, watching_output=False): @@ -493,19 +542,178 @@ class AMDSMILogger(): writer.writerows(stored_csv_output) + def _print_dual_csv_output(self, multiple_device_enabled=False, watching_output=False): + if multiple_device_enabled: + stored_csv_output = self.multiple_device_output + else: + if not isinstance(self.output, list): + stored_csv_output = [self.output] + + primary_csv_output = [] + secondary_csv_output = [] + + if stored_csv_output: + # Split stored_csv_output into primary_csv and secondary_csv + for output_dict in stored_csv_output: + if 'process_list' in output_dict: + # Add a new entry for each process_info + for process_info_dict in output_dict['process_list']: + secondary_output_dict = {} + if watching_output: + secondary_output_dict['timestamp'] = output_dict['timestamp'] + secondary_output_dict['gpu'] = output_dict['gpu'] + if isinstance(process_info_dict["process_info"], dict): + for process_field, process_value in process_info_dict["process_info"].items(): + if isinstance(process_value, dict): + for key, value in process_value.items(): + secondary_output_dict[key] = value + else: + secondary_output_dict[process_field] = process_value + else: + # Handle no process found case + secondary_output_dict["process_info"] = process_info_dict["process_info"] + secondary_csv_output.append(secondary_output_dict) + primary_output_dict = {} + for key, value in output_dict.items(): + if key != 'process_list': + primary_output_dict[key] = value + primary_csv_output.append(primary_output_dict) + + # Ensure uniform data within primary and secondary csv outputs + if primary_csv_output: + primary_keys = set() + for output in primary_csv_output: + for key in output: + primary_keys.add(key) + # insert empty data to align with keys that may not exist + for index, output_dict in enumerate(primary_csv_output): + remaining_keys = primary_keys - set(output_dict.keys()) + for key in remaining_keys: + primary_csv_output[index][key] = "N/A" + if secondary_csv_output: + secondary_keys = set() + for output in secondary_csv_output: + for key in output: + secondary_keys.add(key) + # insert empty data to align with keys that may not exist + for index, output_dict in enumerate(secondary_csv_output): + remaining_keys = secondary_keys - set(output_dict.keys()) + for key in remaining_keys: + secondary_csv_output[index][key] = "N/A" + + if self.destination == 'stdout': + if primary_csv_output: + # Get the header as a list of the first element to maintain order + csv_header = primary_csv_output[0].keys() + csv_stdout_output = self.CsvStdoutBuilder() + writer = csv.DictWriter(csv_stdout_output, csv_header) + writer.writeheader() + writer.writerows(primary_csv_output) + print(str(csv_stdout_output)) + if secondary_csv_output: + # Get the header as a list of the first element to maintain order + csv_header = secondary_csv_output[0].keys() + csv_stdout_output = self.CsvStdoutBuilder() + writer = csv.DictWriter(csv_stdout_output, csv_header) + writer.writeheader() + writer.writerows(secondary_csv_output) + print(str(csv_stdout_output)) + if watching_output: + print() + else: + if watching_output: + with self.destination.open('w', newline = '') as output_file: + primary_csv_output = [] + secondary_csv_output = [] + if self.watch_output: + # Split watch_output into primary_csv and secondary_csv + for output_dict in self.watch_output: + if 'process_list' in output_dict: + # Add a new entry for each process_info + for process_info_dict in output_dict['process_list']: + secondary_output_dict = {} + if watching_output: + secondary_output_dict['timestamp'] = output_dict['timestamp'] + secondary_output_dict['gpu'] = output_dict['gpu'] + if isinstance(process_info_dict["process_info"], dict): + for process_field, process_value in process_info_dict["process_info"].items(): + if isinstance(process_value, dict): + for key, value in process_value.items(): + secondary_output_dict[key] = value + else: + secondary_output_dict[process_field] = process_value + else: + # Handle no process found case + secondary_output_dict["process_info"] = process_info_dict["process_info"] + secondary_csv_output.append(secondary_output_dict) + primary_output_dict = {} + for key, value in output_dict.items(): + if key != 'process_list': + primary_output_dict[key] = value + primary_csv_output.append(primary_output_dict) + + # Ensure uniform data within primary and secondary csv outputs + if primary_csv_output: + primary_keys = set() + for output in primary_csv_output: + for key in output: + primary_keys.add(key) + # insert empty data to align with keys that may not exist + for index, output_dict in enumerate(primary_csv_output): + remaining_keys = primary_keys - set(output_dict.keys()) + for key in remaining_keys: + primary_csv_output[index][key] = "N/A" + if secondary_csv_output: + secondary_keys = set() + for output in secondary_csv_output: + for key in output: + secondary_keys.add(key) + # insert empty data to align with keys that may not exist + for index, output_dict in enumerate(secondary_csv_output): + remaining_keys = secondary_keys - set(output_dict.keys()) + for key in remaining_keys: + secondary_csv_output[index][key] = "N/A" + + if primary_csv_output: + # Get the header as a list of the first element to maintain order + csv_header = primary_csv_output[0].keys() + writer = csv.DictWriter(output_file, csv_header) + writer.writeheader() + writer.writerows(primary_csv_output) + if secondary_csv_output: + output_file.write("\n") + csv_header = secondary_csv_output[0].keys() + writer = csv.DictWriter(output_file, csv_header) + writer.writeheader() + writer.writerows(secondary_csv_output) + else: + with self.destination.open('a', newline = '') as output_file: + if primary_csv_output: + # Get the header as a list of the first element to maintain order + csv_header = primary_csv_output[0].keys() + writer = csv.DictWriter(output_file, csv_header) + writer.writeheader() + writer.writerows(primary_csv_output) + if secondary_csv_output: + output_file.write("\n") + csv_header = secondary_csv_output[0].keys() + writer = csv.DictWriter(output_file, csv_header) + writer.writeheader() + writer.writerows(secondary_csv_output) + def _print_human_readable_output(self, multiple_device_enabled=False, watching_output=False, tabular=False): - human_readable_output = '' + # If tabular output is enabled, redirect to _print_tabular_output if tabular: - if self.table_title: - human_readable_output += self.table_title + ':\n' - human_readable_output += self.table_header + '\n' + self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output) + return + + human_readable_output = '' if multiple_device_enabled: - for output in self.multiple_device_output: - human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular) - human_readable_output += '\n' + for device_output in self.multiple_device_output: + human_readable_output += self._convert_json_to_human_readable(device_output) + '\n' else: - human_readable_output += self._convert_json_to_human_readable(self.output, tabular=tabular) + human_readable_output += self._convert_json_to_human_readable(self.output) if self.destination == 'stdout': try: @@ -518,15 +726,131 @@ class AMDSMILogger(): if watching_output: with self.destination.open('w') as output_file: human_readable_output = '' - if tabular: - if self.table_title: - human_readable_output += self.table_title + '\n' - human_readable_output += self.table_header + '\n' for output in self.watch_output: - human_readable_output += self._convert_json_to_human_readable(output, tabular=tabular) - if tabular: - human_readable_output += '\n' + human_readable_output += self._convert_json_to_human_readable(output) output_file.write(human_readable_output + '\n') else: with self.destination.open('a') as output_file: output_file.write(human_readable_output + '\n') + + + def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False): + primary_table = '' + secondary_table = '' + + # Populate primary table without process_list + # Populate secondary table with process_list if exists + if multiple_device_enabled and self.multiple_device_output: + for device_output in self.multiple_device_output: + if 'process_list' in device_output: + process_table_dict = {} + if watching_output: + process_table_dict['timestamp'] = device_output['timestamp'] + process_table_dict['gpu'] = device_output['gpu'] + process_table_dict['process_list'] = device_output['process_list'] + secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n' + # Add primary table keys without process_list + primary_table_output = {} + for key, value in device_output.items(): + if key != 'process_list': + primary_table_output[key] = value + primary_table += self._convert_json_to_tabular(primary_table_output) + '\n' + else: # Single device output + if 'process_list' in self.output: + process_table_dict = {} + if watching_output: + process_table_dict['timestamp'] = self.output['timestamp'] + process_table_dict['gpu'] = self.output['gpu'] + process_table_dict['process_list'] = self.output['process_list'] + secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n' + # Add primary table keys without process_list + primary_table_output = {} + for key, value in self.output.items(): + if key != 'process_list': + primary_table_output[key] = value + primary_table += self._convert_json_to_tabular(primary_table_output) + '\n' + primary_table = primary_table.rstrip() + secondary_table = secondary_table.rstrip() + + # Add primary table title and header to primary_table + if primary_table: + primary_table_heading = '' + if self.table_title: + primary_table_heading = self.table_title + ':\n' + primary_table_heading += self.table_header + '\n' + primary_table = primary_table_heading + primary_table + + # Add secondary table title and header to secondary_table + # Currently just process_info uses this logic + if secondary_table: + secondary_table_heading = '' + if self.secondary_table_title: + secondary_table_heading = '\n' + self.secondary_table_title + ':\n' + secondary_table_heading += self.secondary_table_header + '\n' + secondary_table = secondary_table_heading + secondary_table + + if self.destination == 'stdout': + try: + # printing as unicode may fail if locale is not set properly + print(primary_table) + if secondary_table: + print(secondary_table) + if watching_output: + print("\n") + except UnicodeEncodeError: + # print as ascii, ignore incompatible characters + print(primary_table.encode('ascii', 'ignore').decode('ascii')) + if secondary_table: + print(secondary_table.encode('ascii', 'ignore').decode('ascii')) + if watching_output: + print("\n") + else: + if watching_output: # Write all stored watched output to a file + with self.destination.open('w') as output_file: + primary_table = '' + secondary_table = '' + # Add process_list to the secondary_table + # Add remaining watch_output to the primary_table + for device_output in self.watch_output: + # if process_list is detected in device_output store in secondary_table + if 'process_list' in device_output: + process_table_dict = { + 'timestamp': device_output['timestamp'], + 'gpu': device_output['gpu'], + 'process_list': device_output['process_list'] + } + secondary_table += self._convert_json_to_tabular(process_table_dict) + '\n' + # Add primary table keys without process_list + primary_table_output = {} + for key, value in device_output.items(): + if key != 'process_list': + primary_table_output[key] = value + primary_table += self._convert_json_to_tabular(primary_table_output) + '\n' + primary_table = primary_table.rstrip() # Remove trailing new line + secondary_table = secondary_table.rstrip() + + # Add primary table title and header to primary_table + if primary_table: + primary_table_heading = '' + if self.table_title: + primary_table_heading = self.table_title + ':\n' + primary_table_heading += self.table_header + '\n' + primary_table = primary_table_heading + primary_table + + # Add secondary table title and header to secondary_table + # Currently just process_info uses this logic + if secondary_table: + secondary_table_heading = '' + if self.secondary_table_title: + secondary_table_heading = '\n' + self.secondary_table_title + ':\n' + secondary_table_heading += self.secondary_table_header + '\n' + secondary_table = secondary_table_heading + secondary_table + + # Write both full tables to the file + output_file.write(primary_table) + if secondary_table: + output_file.write("\n" + secondary_table) + else: # Write all singular output to a file + with self.destination.open('a') as output_file: + output_file.write(primary_table + '\n') + output_file.write(secondary_table) diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 60fd32bd64..10cebf4b2b 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -1126,6 +1126,7 @@ class AMDSMIParser(argparse.ArgumentParser): ecc_help = "Monitor ECC single bit, ECC double bit, and PCIe replay error counts" mem_usage_help = "Monitor memory usage in MB" pcie_bandwidth_help = "Monitor PCIe bandwidth in Mb/s" + process_help = "Enable Process information table below monitor output" # Create monitor subparser monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help) @@ -1148,6 +1149,7 @@ class AMDSMIParser(argparse.ArgumentParser): monitor_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help) monitor_parser.add_argument('-v', '--vram-usage', action='store_true', required=False, help=mem_usage_help) monitor_parser.add_argument('-r', '--pcie', action='store_true', required=False, help=pcie_bandwidth_help) + monitor_parser.add_argument('-q', '--process', action='store_true', required=False, help=process_help) def _add_rocm_smi_parser(self, subparsers, func): diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 82a25939d0..fdfdbefe2c 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -677,8 +677,11 @@ Output: Dictionary with fields Field | Description ---|--- +`current_socket_power` | current socket power `average_socket_power` | average socket power `gfx_voltage` | voltage gfx +`soc_voltage` | voltage soc +`mem_voltage` | voltage mem `power_limit` | power limit Exceptions that can be thrown by `amdsmi_get_power_info` function: @@ -697,8 +700,11 @@ try: else: for device in devices: power_measure = amdsmi_get_power_info(device) + print(power_measure['current_socket_power']) print(power_measure['average_socket_power']) print(power_measure['gfx_voltage']) + print(power_measure['soc_voltage']) + print(power_measure['mem_voltage']) print(power_measure['power_limit']) except AmdSmiException as e: print(e)