SWDEV-466598 - Fixed CLI process outputs

Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: I902e82b6e78311e99542b109435346889daa49fc


[ROCm/amdsmi commit: 9fb2c967de]
Этот коммит содержится в:
Maisam Arif
2024-06-08 05:36:01 -05:00
родитель 149cab5bc5
Коммит 807ca0ad89
5 изменённых файлов: 84 добавлений и 29 удалений
+34
Просмотреть файл
@@ -88,6 +88,40 @@ ASIC products. This requires users to update any ABIs using this structure.
### Fixes
- **Fixed multiple processes not being registered in `amd-smi process` with json and csv format**.
Multiple process outputs in the CLI tool were not being registered correctly. The json output did not handle multiple processes and is now in a new valid json format:
```shell
[
{
"gpu": 0,
"process_list": [
{
"process_info": {
"name": "TransferBench",
"pid": 420157,
"mem_usage": {
"value": 0,
"unit": "B"
}
}
},
{
"process_info": {
"name": "rvs",
"pid": 420315,
"mem_usage": {
"value": 0,
"unit": "B"
}
}
}
]
}
]
```
- **Removed `throttle-status` from `amd-smi monitor` as it is no longer reliably supported**.
Throttle status may work for older ASICs, but will be replaced with PVIOL and TVIOL metrics for future ASIC support. It remains a field in the gpu_metrics API and in `amd-smi metric --power`.
+28 -28
Просмотреть файл
@@ -2706,47 +2706,47 @@ class AMDSMICommands():
process_names.append(process_info)
filtered_process_values = process_names
# If the name or pid args filter processes out then insert an N/A placeholder
if not filtered_process_values:
filtered_process_values.append({'process_info': "N/A"})
logging.debug(f"Process Info for GPU {gpu_id} | {filtered_process_values}")
multiple_devices_csv_override = False
# Convert and store output by pid for csv format
if self.logger.is_csv_format():
# Check for empty list first
if not filtered_process_values:
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
else:
for process_info in filtered_process_values:
if process_info['process_info'] == "N/A":
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
else:
for key, value in process_info['process_info'].items():
multiple_devices_csv_override = True
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.store_output(args.gpu, key, value)
for index, process in enumerate(filtered_process_values):
if process['process_info'] == "N/A":
filtered_process_values[index]['process_info'] = "No running processes detected"
self.logger.store_multiple_device_output()
else:
if self.logger.is_json_format():
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
# Store values in logger.output
if not filtered_process_values:
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
else:
for process_info in filtered_process_values:
if process_info['process_info'] == "N/A":
process_info['process_info'] = 'No running processes detected'
self.logger.store_output(args.gpu, 'process_info', process_info['process_info'])
if self.logger.is_human_readable_format():
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
# When we print out process_info we remove the index
# The removal is needed only for human readable process format to align with Host
for index, process in enumerate(filtered_process_values):
self.logger.store_output(args.gpu, f'process_info_{index}', process['process_info'])
multiple_devices_csv_override = False
if self.logger.is_csv_format():
multiple_devices_csv_override = True
for process in filtered_process_values:
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.store_output(args.gpu, 'process_info', process['process_info'])
self.logger.store_multiple_device_output()
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override, watching_output=watching_output)
multiple_devices = multiple_devices or multiple_devices_csv_override
self.logger.print_output(multiple_device_enabled=multiple_devices, watching_output=watching_output)
if watching_output: # End of single gpu add to watch_output
self.logger.store_watch_output(multiple_device_enabled=multiple_devices_csv_override)
self.logger.store_watch_output(multiple_device_enabled=multiple_devices)
def profile(self, args):
+5
Просмотреть файл
@@ -72,9 +72,11 @@ class AMDSMILogger():
def is_human_readable_format(self):
return self.format == self.LoggerFormat.human_readable.value
def clear_multiple_devices_ouput(self):
self.multiple_device_output.clear()
def _capitalize_keys(self, input_dict):
output_dict = {}
for key in input_dict.keys():
@@ -163,6 +165,9 @@ class AMDSMILogger():
yaml_output = yaml_output.replace("AMDSMI_SPACING_REMOVAL:\n", "")
yaml_output = yaml_output.replace("'", "") # Remove ''
# Remove process_info indicies for Host parity:
yaml_output = re.sub(r'PROCESS_INFO_[0-9]+:', 'PROCESS_INFO:', yaml_output)
clean_yaml_output = ''
for line in yaml_output.splitlines():
line = line.split(':')
+11 -1
Просмотреть файл
@@ -148,6 +148,16 @@ class AMDSMIParser(argparse.ArgumentParser):
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(int_value, outputformat)
def _is_valid_string(self, string_value):
# Argument type validator
# This is for triggering a cli exception if an empty string is detected
if string_value:
return string_value
outputformat = self.helpers.get_output_format()
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(string_value, outputformat)
def _check_output_file_path(self):
""" Argument action validator:
Returns a path to a file from the output file path provided.
@@ -863,7 +873,7 @@ class AMDSMIParser(argparse.ArgumentParser):
process_parser.add_argument('-G', '--general', action='store_true', required=False, help=general_help)
process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help)
process_parser.add_argument('-p', '--pid', action='store', type=self._not_negative_int, required=False, help=pid_help)
process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help)
process_parser.add_argument('-n', '--name', action='store', type=self._is_valid_string, required=False, help=name_help)
def _add_profile_parser(self, subparsers, func):
+6
Просмотреть файл
@@ -158,6 +158,12 @@ amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const {
int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
ComputeProcessListType_t list_type)
{
/**
* Clear the compute_process_list before starting.
*/
compute_process_list.clear();
/**
* The first call to GetProcessInfo() helps to find the size it needs,
* so we can create a tailored size list.