SWDEV-466598 - Fixed CLI process outputs
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Change-Id: I902e82b6e78311e99542b109435346889daa49fc
[ROCm/amdsmi commit: 9fb2c967de]
Этот коммит содержится в:
@@ -88,6 +88,40 @@ ASIC products. This requires users to update any ABIs using this structure.
|
||||
|
||||
### Fixes
|
||||
|
||||
- **Fixed multiple processes not being registered in `amd-smi process` with json and csv format**.
|
||||
Multiple process outputs in the CLI tool were not being registered correctly. The json output did not handle multiple processes and is now in a new valid json format:
|
||||
|
||||
```shell
|
||||
[
|
||||
{
|
||||
"gpu": 0,
|
||||
"process_list": [
|
||||
{
|
||||
"process_info": {
|
||||
"name": "TransferBench",
|
||||
"pid": 420157,
|
||||
"mem_usage": {
|
||||
"value": 0,
|
||||
"unit": "B"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"process_info": {
|
||||
"name": "rvs",
|
||||
"pid": 420315,
|
||||
"mem_usage": {
|
||||
"value": 0,
|
||||
"unit": "B"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
- **Removed `throttle-status` from `amd-smi monitor` as it is no longer reliably supported**.
|
||||
Throttle status may work for older ASICs, but will be replaced with PVIOL and TVIOL metrics for future ASIC support. It remains a field in the gpu_metrics API and in `amd-smi metric --power`.
|
||||
|
||||
|
||||
@@ -2706,47 +2706,47 @@ class AMDSMICommands():
|
||||
process_names.append(process_info)
|
||||
filtered_process_values = process_names
|
||||
|
||||
# If the name or pid args filter processes out then insert an N/A placeholder
|
||||
if not filtered_process_values:
|
||||
filtered_process_values.append({'process_info': "N/A"})
|
||||
|
||||
logging.debug(f"Process Info for GPU {gpu_id} | {filtered_process_values}")
|
||||
|
||||
multiple_devices_csv_override = False
|
||||
# Convert and store output by pid for csv format
|
||||
if self.logger.is_csv_format():
|
||||
# Check for empty list first
|
||||
if not filtered_process_values:
|
||||
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
|
||||
else:
|
||||
for process_info in filtered_process_values:
|
||||
if process_info['process_info'] == "N/A":
|
||||
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
|
||||
else:
|
||||
for key, value in process_info['process_info'].items():
|
||||
multiple_devices_csv_override = True
|
||||
if watching_output:
|
||||
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
|
||||
self.logger.store_output(args.gpu, key, value)
|
||||
for index, process in enumerate(filtered_process_values):
|
||||
if process['process_info'] == "N/A":
|
||||
filtered_process_values[index]['process_info'] = "No running processes detected"
|
||||
|
||||
self.logger.store_multiple_device_output()
|
||||
else:
|
||||
if self.logger.is_json_format():
|
||||
if watching_output:
|
||||
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
|
||||
self.logger.store_output(args.gpu, 'process_list', filtered_process_values)
|
||||
|
||||
# Store values in logger.output
|
||||
if not filtered_process_values:
|
||||
self.logger.store_output(args.gpu, 'process_info', 'No running processes detected')
|
||||
else:
|
||||
for process_info in filtered_process_values:
|
||||
if process_info['process_info'] == "N/A":
|
||||
process_info['process_info'] = 'No running processes detected'
|
||||
self.logger.store_output(args.gpu, 'process_info', process_info['process_info'])
|
||||
if self.logger.is_human_readable_format():
|
||||
if watching_output:
|
||||
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
|
||||
# When we print out process_info we remove the index
|
||||
# The removal is needed only for human readable process format to align with Host
|
||||
for index, process in enumerate(filtered_process_values):
|
||||
self.logger.store_output(args.gpu, f'process_info_{index}', process['process_info'])
|
||||
|
||||
multiple_devices_csv_override = False
|
||||
if self.logger.is_csv_format():
|
||||
multiple_devices_csv_override = True
|
||||
for process in filtered_process_values:
|
||||
if watching_output:
|
||||
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
|
||||
self.logger.store_output(args.gpu, 'process_info', process['process_info'])
|
||||
self.logger.store_multiple_device_output()
|
||||
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
return # Skip printing when there are multiple devices
|
||||
|
||||
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override, watching_output=watching_output)
|
||||
multiple_devices = multiple_devices or multiple_devices_csv_override
|
||||
self.logger.print_output(multiple_device_enabled=multiple_devices, watching_output=watching_output)
|
||||
|
||||
if watching_output: # End of single gpu add to watch_output
|
||||
self.logger.store_watch_output(multiple_device_enabled=multiple_devices_csv_override)
|
||||
self.logger.store_watch_output(multiple_device_enabled=multiple_devices)
|
||||
|
||||
|
||||
def profile(self, args):
|
||||
|
||||
@@ -72,9 +72,11 @@ class AMDSMILogger():
|
||||
def is_human_readable_format(self):
|
||||
return self.format == self.LoggerFormat.human_readable.value
|
||||
|
||||
|
||||
def clear_multiple_devices_ouput(self):
|
||||
self.multiple_device_output.clear()
|
||||
|
||||
|
||||
def _capitalize_keys(self, input_dict):
|
||||
output_dict = {}
|
||||
for key in input_dict.keys():
|
||||
@@ -163,6 +165,9 @@ class AMDSMILogger():
|
||||
yaml_output = yaml_output.replace("AMDSMI_SPACING_REMOVAL:\n", "")
|
||||
yaml_output = yaml_output.replace("'", "") # Remove ''
|
||||
|
||||
# Remove process_info indicies for Host parity:
|
||||
yaml_output = re.sub(r'PROCESS_INFO_[0-9]+:', 'PROCESS_INFO:', yaml_output)
|
||||
|
||||
clean_yaml_output = ''
|
||||
for line in yaml_output.splitlines():
|
||||
line = line.split(':')
|
||||
|
||||
@@ -148,6 +148,16 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(int_value, outputformat)
|
||||
|
||||
|
||||
def _is_valid_string(self, string_value):
|
||||
# Argument type validator
|
||||
# This is for triggering a cli exception if an empty string is detected
|
||||
if string_value:
|
||||
return string_value
|
||||
|
||||
outputformat = self.helpers.get_output_format()
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(string_value, outputformat)
|
||||
|
||||
|
||||
def _check_output_file_path(self):
|
||||
""" Argument action validator:
|
||||
Returns a path to a file from the output file path provided.
|
||||
@@ -863,7 +873,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
process_parser.add_argument('-G', '--general', action='store_true', required=False, help=general_help)
|
||||
process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help)
|
||||
process_parser.add_argument('-p', '--pid', action='store', type=self._not_negative_int, required=False, help=pid_help)
|
||||
process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help)
|
||||
process_parser.add_argument('-n', '--name', action='store', type=self._is_valid_string, required=False, help=name_help)
|
||||
|
||||
|
||||
def _add_profile_parser(self, subparsers, func):
|
||||
|
||||
@@ -158,6 +158,12 @@ amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const {
|
||||
int32_t AMDSmiGPUDevice::get_compute_process_list_impl(GPUComputeProcessList_t& compute_process_list,
|
||||
ComputeProcessListType_t list_type)
|
||||
{
|
||||
|
||||
/**
|
||||
* Clear the compute_process_list before starting.
|
||||
*/
|
||||
compute_process_list.clear();
|
||||
|
||||
/**
|
||||
* The first call to GetProcessInfo() helps to find the size it needs,
|
||||
* so we can create a tailored size list.
|
||||
|
||||
Ссылка в новой задаче
Block a user