Updated CSV & Watch output
Change-Id: If88b9375482dbb9afa4e24b1847397b65d73d050
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>


[ROCm/amdsmi commit: 543c573cc7]
Этот коммит содержится в:
Maisam Arif
2023-04-21 08:02:53 -05:00
родитель 1de2329c18
Коммит 582cab11ff
5 изменённых файлов: 328 добавлений и 212 удалений
+1 -1
Просмотреть файл
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.4"
+208 -133
Просмотреть файл
@@ -20,16 +20,8 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
"""AMDSMICommands
This class contains all the commands corresponding to AMDSMIParser
Each command function will interact with AMDSMILogger to handle
displaying the output to the specified compatibility, format, and
destination.
"""
import threading
import time
from _version import __version__
from amdsmi_helpers import AMDSMIHelpers
@@ -39,6 +31,11 @@ from amdsmi import amdsmi_exception
class AMDSMICommands():
"""This class contains all the commands corresponding to AMDSMIParser
Each command function will interact with AMDSMILogger to handle
displaying the output to the specified compatibility, format, and
destination.
"""
def __init__(self, compatibility='amdsmi',
format='human_readable',
destination='stdout') -> None:
@@ -143,7 +140,7 @@ class AMDSMICommands():
# compatibility with gpuvsmi needs a list for single gpu
if self.logger.is_gpuvsmi_compatibility() and not multiple_devices:
self.logger.store_multiple_device_output()
self.logger.print_output(multiple_device_output=True)
self.logger.print_output(multiple_device_enabled=True)
else:
self.logger.print_output()
@@ -206,7 +203,7 @@ class AMDSMICommands():
if not any([args.asic, args.bus, args.vbios, args.limit, args.driver, args.caps, args.ras, args.board]):
args.asic = args.bus = args.vbios = args.limit = args.driver = args.caps = args.ras = args.board = self.all_arguments = True
values_dict = {}
static_dict = {}
if args.asic:
try:
@@ -218,9 +215,9 @@ class AMDSMICommands():
if asic_info['asic_serial'] != '':
asic_info['asic_serial'] = '0x' + asic_info['asic_serial']
values_dict['asic'] = asic_info
static_dict['asic'] = asic_info
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['asic'] = e.get_error_info()
static_dict['asic'] = e.get_error_info()
if not self.all_arguments:
raise e
if args.bus:
@@ -245,7 +242,7 @@ class AMDSMICommands():
raise e
bus_output_info.update(bus_info)
values_dict['bus'] = bus_output_info
static_dict['bus'] = bus_output_info
if args.vbios:
try:
vbios_info = amdsmi_interface.amdsmi_get_vbios_info(args.gpu)
@@ -255,9 +252,9 @@ class AMDSMICommands():
vbios_info['part_number'] = vbios_info.pop('part_number')
vbios_info['vbios_version'] = vbios_info.pop('vbios_version')
values_dict['vbios'] = vbios_info
static_dict['vbios'] = vbios_info
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['vbios'] = e.get_error_info()
static_dict['vbios'] = e.get_error_info()
if not self.all_arguments:
raise e
if args.board:
@@ -270,9 +267,9 @@ class AMDSMICommands():
board_info['product_number'] = board_info.pop('product_serial')
board_info['product_name'] = board_info.pop('product_name')
values_dict['board'] = board_info
static_dict['board'] = board_info
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['board'] = e.get_error_info()
static_dict['board'] = e.get_error_info()
if not self.all_arguments:
raise e
if args.limit:
@@ -322,22 +319,26 @@ class AMDSMICommands():
limit_info['temperature_junction'] = temp_junction_limit
limit_info['temperature_vram'] = temp_vram_limit
values_dict['limit'] = limit_info
static_dict['limit'] = limit_info
if args.driver:
try:
driver_info = {}
driver_info['driver_version'] = amdsmi_interface.amdsmi_get_driver_version(args.gpu)
values_dict['driver'] = driver_info
static_dict['driver'] = driver_info
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['driver'] = e.get_error_info()
static_dict['driver'] = e.get_error_info()
if not self.all_arguments:
raise e
if args.ras:
try:
values_dict['ras'] = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
if self.helpers.has_ras_support(args.gpu):
static_dict['ras'] = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
else:
static_dict['ras'] = 'N/A'
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['ras'] = e.get_error_info()
static_dict['ras'] = e.get_error_info()
if not self.all_arguments:
raise e
if args.caps:
@@ -348,21 +349,44 @@ class AMDSMICommands():
for capability_name, capability_value in caps_info.items():
if isinstance(capability_value, list):
caps_info[capability_name] = f"{capability_value}"
if isinstance(capability_value, bool):
caps_info[capability_name] = f"{bool(capability_value)}"
values_dict['caps'] = caps_info
if self.logger.is_csv_format() and self.logger.is_gpuvsmi_compatibility():
if 'mm_ip_list' in caps_info:
if caps_info['mm_ip_list']: # Don't index if it's not populated
caps_info['mm_ip_list'] = caps_info['mm_ip_list'][0]
static_dict['caps'] = caps_info
except amdsmi_exception.AmdSmiLibraryException as e:
values_dict['caps'] = e.get_error_info()
static_dict['caps'] = e.get_error_info()
if not self.all_arguments:
raise e
# Store values in logger.output
self.logger.store_output(args.gpu, 'values', values_dict)
multiple_devices_csv_override = False
# Convert and store output by pid for csv format
if self.logger.is_csv_format() and args.ras:
# expand if ras blocks are populated
if isinstance(static_dict['ras'], list):
ras_dicts = static_dict.pop('ras')
multiple_devices_csv_override = True
for ras_dict in ras_dicts:
for key, value in ras_dict.items():
self.logger.store_output(args.gpu, key, value)
self.logger.store_output(args.gpu, 'values', static_dict)
self.logger.store_multiple_device_output()
else:
# Store values if ras has an error
self.logger.store_output(args.gpu, 'values', static_dict)
else:
# Store values in logger.output
self.logger.store_output(args.gpu, 'values', static_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output()
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True):
@@ -445,7 +469,7 @@ class AMDSMICommands():
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_output=multiple_devices_csv_override)
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def bad_pages(self, args, multiple_devices=False, gpu=None, retired=None, pending=None, un_res=None):
@@ -456,7 +480,7 @@ class AMDSMICommands():
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
retired (bool, optional) - Value override for args.retired
pending (bool, optional) - Value override for args.pending
pending (bool, optional) - Value override for args.pending/
un_res (bool, optional) - Value override for args.un_res
Raises:
@@ -611,7 +635,6 @@ class AMDSMICommands():
Returns:
None: Print output via AMDSMILogger to destination
"""
# Set args.* to passed in arguments
if gpu:
args.gpu = gpu
@@ -662,20 +685,33 @@ class AMDSMICommands():
# Handle watch logic, will only enter this block once
if args.watch:
self.helpers.handle_watch(args=args, subcommand=self.metric)
self.logger.print_output(watch_output=True) # Print at the end of watch ( final flush )
self.helpers.handle_watch(args=args, subcommand=self.metric, logger=self.logger)
return
# Handle multiple GPUs
if isinstance(args.gpu, list):
if len(args.gpu) > 1:
for device_handle in args.gpu:
# Handle multiple_devices to print all output at once
self.metric(args, multiple_devices=True, watching_output=False, gpu=device_handle)
self.logger.print_output(multiple_device_output=True)
# Deepcopy gpus as recursion will destroy the gpu list
stored_gpus = []
for gpu in args.gpu:
stored_gpus.append(gpu)
# End of multiple gpus add to watch_output
# Store output from multiple devices
for device_handle in args.gpu:
self.metric(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
# Reload original gpus
args.gpu = stored_gpus
# Print multiple device output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output)
# Add output to total watch output and clear multiple device output
if watching_output:
self.logger.store_watch_output(multiple_devices=True)
self.logger.store_watch_output(multiple_device_enabled=True)
# Flush the watching output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output)
return
elif len(args.gpu) == 1:
@@ -822,14 +858,15 @@ class AMDSMICommands():
if args.ecc:
ecc_dict = {}
try:
ras_states = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
for state in ras_states:
if state['status'] == amdsmi_interface.AmdSmiRasErrState.ENABLED:
gpu_block = amdsmi_interface.AmdSmiGpuBlock[state['block']]
ecc_count = amdsmi_interface.amdsmi_get_ecc_error_count(args.gpu, gpu_block)
ecc_dict[state['block']] = {'correctable' : ecc_count['correctable_count'],
'uncorrectable': ecc_count['uncorrectable_count']}
if ecc_dict == {}:
if self.helpers.has_ras_support(args.gpu):
ras_states = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
for state in ras_states:
if state['status'] == amdsmi_interface.AmdSmiRasErrState.ENABLED:
gpu_block = amdsmi_interface.AmdSmiGpuBlock[state['block']]
ecc_count = amdsmi_interface.amdsmi_dev_get_ecc_count(args.gpu, gpu_block)
ecc_dict[state['block']] = {'correctable' : ecc_count['correctable_count'],
'uncorrectable': ecc_count['uncorrectable_count']}
if not ecc_dict:
ecc_dict['correctable'] = 'N/A'
ecc_dict['uncorrectable'] = 'N/A'
@@ -1021,17 +1058,19 @@ class AMDSMICommands():
values_dict['mem_usage'] = memory_total
# Store values in logger.output
# Store timestamp first if watching_output is enabled
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.store_output(args.gpu, 'values', values_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output()
self.logger.print_output(watching_output=watching_output)
if watching_output: # End of single gpu add to watch_output
self.logger.store_watch_output(multiple_devices=False)
self.logger.store_watch_output(multiple_device_enabled=False)
def process(self, args, multiple_devices=False, watching_output=False,
@@ -1082,21 +1121,33 @@ class AMDSMICommands():
# Handle watch logic, will only enter this block once
if args.watch:
args = self.helpers.handle_watch(args=args, subcommand=self.process)
self.logger.print_output(watch_output=True) # Print at the end of watch ( final flush )
self.helpers.handle_watch(args=args, subcommand=self.process, logger=self.logger)
return
# Handle multiple GPUs
if isinstance(args.gpu, list):
if len(args.gpu) > 1:
for device_handle in args.gpu:
# Handle multiple_devices to print all output at once
self.process(args, multiple_devices=True, watching_output=False, gpu=device_handle)
self.logger.print_output(multiple_device_output=True)
# Deepcopy gpus as recursion will destroy the gpu list
stored_gpus = []
for gpu in args.gpu:
stored_gpus.append(gpu)
# End of multiple gpus add to watch_output
# Store output from multiple devices
for device_handle in args.gpu:
self.process(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
# Reload original gpus
args.gpu = stored_gpus
# Print multiple device output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output)
# Add output to total watch output and clear multiple device output
if watching_output:
self.logger.store_watch_output(multiple_devices=True)
self.logger.store_watch_output(multiple_device_enabled=True)
# Flush the watching output
self.logger.print_output(multiple_device_enabled=True, watching_output=watching_output)
return
elif len(args.gpu) == 1:
@@ -1126,7 +1177,7 @@ class AMDSMICommands():
mem_usage_mb = (process_info['mem_usage']//1024) // 1024
if mem_usage_mb < 0:
process_info['mem_usage'] = (process_info['mem_usage']//1024)
process_info['mem_usage'] = process_info['mem_usage']//1024
mem_usage_unit = 'B'
else:
process_info['mem_usage'] = mem_usage_mb
@@ -1180,13 +1231,20 @@ class AMDSMICommands():
for process_info in filtered_process_values:
for key, value in process_info['process_info'].items():
multiple_devices_csv_override = True
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
self.logger.store_output(args.gpu, key, value)
self.logger.store_multiple_device_output()
else:
# Remove brackets if there is only one value
if len(filtered_process_values) == 1:
filtered_process_values = filtered_process_values[0]
if watching_output:
self.logger.store_output(args.gpu, 'timestamp', int(time.time()))
# Store values in logger.output
if filtered_process_values == []:
self.logger.store_output(args.gpu, 'values', {'process_info': 'Not Found'})
@@ -1197,10 +1255,10 @@ class AMDSMICommands():
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_output=multiple_devices_csv_override)
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override, watching_output=watching_output)
if watching_output: # End of single gpu add to watch_output
self.logger.store_watch_output(multiple_devices=False)
self.logger.store_watch_output(multiple_device_enabled=multiple_devices_csv_override)
def profile(self, args):
@@ -1262,23 +1320,31 @@ class AMDSMICommands():
if args.gpu is None:
args.gpu = self.device_handles
# Handle multiple GPUs
handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.topology)
if handled_multiple_gpus:
return # This function is recursive
# Handle all args being false
if not any([args.access, args.weight, args.hops, args.type, args.numa, args.numa_bw]):
args.access = args.weight = args.hops = args.type = args.numa = args.numa_bw = True
topo_json = {}
topo_table = []
topo_dict = {}
if args.access:
pass
topo_dict['access'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
if args.weight:
pass
topo_dict['weight'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
if args.hops:
pass
topo_dict['hops'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
if args.type:
pass
topo_dict['type'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
if args.numa:
pass
topo_dict['numa'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
# numa_numbers = c_uint32()
# for device in deviceList:
# ret = rocmsmi.rsmi_get_numa_node_number(device, byref(numa_numbers))
@@ -1293,7 +1359,17 @@ class AMDSMICommands():
# else:
# printErrLog(device, 'Cannot read Numa Affinity')
if args.numa_bw:
pass
topo_dict['numa_bw'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
# Store values in logger.output
self.logger.store_output(args.gpu, 'values', topo_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output()
def set_value(self, args, multiple_devices=False, gpu=None, clock=None, sclk=None, mclk=None,
@@ -1306,22 +1382,22 @@ class AMDSMICommands():
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
clock (bool, optional): Value over ride for args.clock. Defaults to None.
sclk (bool, optional): Value over ride for args.sclk. Defaults to None.
mclk (bool, optional): Value over ride for args.mclk. Defaults to None.
pcie (bool, optional): Value over ride for args.pcie. Defaults to None.
slevel (bool, optional): Value over ride for args.slevel. Defaults to None.
mlevel (bool, optional): Value over ride for args.mlevel. Defaults to None.
vc (bool, optional): Value over ride for args.vc. Defaults to None.
srange (bool, optional): Value over ride for args.srange. Defaults to None.
mrange (bool, optional): Value over ride for args.mrange. Defaults to None.
fan (bool, optional): Value over ride for args.fan. Defaults to None.
perflevel (bool, optional): Value over ride for args.perflevel. Defaults to None.
overdrive (bool, optional): Value over ride for args.overdrive. Defaults to None.
memoverdrive (bool, optional): Value over ride for args.memoverdrive. Defaults to None.
poweroverdrive (bool, optional): Value over ride for args.poweroverdrive. Defaults to None.
profile (bool, optional): Value over ride for args.profile. Defaults to None.
perfdeterminism (bool, optional): Value over ride for args.perfdeterminism. Defaults to None.
clock ((amdsmi_interface.AmdSmiClkType, int), optional): Value override for args.clock. Defaults to None.
sclk (int, optional): Value override for args.sclk. Defaults to None.
mclk (int, optional): Value override for args.mclk. Defaults to None.
pcie (int, optional): Value override for args.pcie. Defaults to None.
slevel ((amdsmi_interface.AmdSmiFreqInd), int), optional): Value override for args.slevel. Defaults to None.
mlevel ((amdsmi_interface.AmdSmiFreqInd), optional): Value override for args.mlevel. Defaults to None.
vc ((int, int, int), optional): Value override for args.vc. Defaults to None.
srange ((int, int), optional): Value override for args.srange. Defaults to None.
mrange ((int, int), optional): Value override for args.mrange. Defaults to None.
fan (int, optional): Value override for args.fan. Defaults to None.
perflevel (amdsmi_interface.AmdSmiDevPerfLevel, optional): Value override for args.perflevel. Defaults to None.
overdrive (int, optional): Value override for args.overdrive. Defaults to None.
memoverdrive (int, optional): Value override for args.memoverdrive. Defaults to None.
poweroverdrive (int, optional): Value override for args.poweroverdrive. Defaults to None.
profile (bool, optional): Value override for args.profile. Defaults to None.
perfdeterminism (int, optional): Value override for args.perfdeterminism. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -1397,7 +1473,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1405,7 +1481,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
if clock_type != amdsmi_interface.AmdSmiClkType.PCIE:
@@ -1413,18 +1489,17 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
else:
try:
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
self.logger.store_output(args.gpu, 'clock', f'Successfully set clock frequency bitmask for {clock_type}')
if isinstance(args.sclk, int):
freq_bitmask = args.sclk
clock_type = amdsmi_interface.AmdSmiClkType.SYS
@@ -1433,7 +1508,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1441,14 +1516,14 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
try:
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
self.logger.store_output(args.gpu, 'sclk', 'Successfully set clock frequency bitmask')
@@ -1460,7 +1535,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1468,14 +1543,14 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
try:
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
self.logger.store_output(args.gpu, 'mclk', 'Successfully set clock frequency bitmask')
@@ -1487,7 +1562,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1495,17 +1570,18 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
try:
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
self.logger.store_output(args.gpu, 'pcie', 'Successfully set clock frequency bitmask')
if isinstance(args.slevel, int):
level, value = args.slevel
level = amdsmi_interface.AmdSmiFreqInd(level)
clock_type = amdsmi_interface.AmdSmiClkType.SYS
@@ -1513,7 +1589,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
self.logger.store_output(args.gpu, 'slevel', 'Successfully changed clock frequency')
@@ -1525,7 +1601,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
self.logger.store_output(args.gpu, 'mlevel', 'Successfully changed clock frequency')
@@ -1535,7 +1611,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_od_volt_info(args.gpu, point, clk, volt)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the Voltage Curve point {point} to {clk}(MHz) {volt}(mV) on {gpu_string}") from e
self.logger.store_output(args.gpu, 'vc', f'Successfully set voltage point {point} to {clk}(MHz) {volt}(mV)')
@@ -1546,7 +1622,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
self.logger.store_output(args.gpu, 'srange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
@@ -1557,7 +1633,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
self.logger.store_output(args.gpu, 'mrange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
@@ -1566,7 +1642,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_fan_speed(args.gpu, 0, args.fan)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set fan speed {args.fan} on {gpu_string}") from e
self.logger.store_output(args.gpu, 'fan', f"Successfully set fan speed {args.fan}")
@@ -1576,7 +1652,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, perf_level)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set performance level {args.perflevel} on {gpu_string}") from e
self.logger.store_output(args.gpu, 'perflevel', f"Successfully set performance level {args.perflevel}")
@@ -1586,7 +1662,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1594,14 +1670,14 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
try:
amdsmi_interface.amdsmi_dev_set_overdrive_level_v1(args.gpu, args.overdrive)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set overdrive {args.overdrive} to {gpu_string}") from e
self.logger.store_output(args.gpu, 'overdrive', f"Successfully to set overdrive level to {args.overdrive}")
@@ -1611,7 +1687,7 @@ class AMDSMICommands():
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
if 'manual' in perf_level.lower():
@@ -1619,7 +1695,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
self.logger.store_output(args.gpu, 'memoverdrive', f"Successfully to set memoverdrive level to {args.memoverdrive}")
@@ -1629,7 +1705,7 @@ class AMDSMICommands():
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get the power cap info for {gpu_string}") from e
if overdrive_power_cap == 0:
overdrive_power_cap = power_caps['power_cap_default']
@@ -1649,14 +1725,14 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_dev_set_power_cap(args.gpu, 0, overdrive_power_cap)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set power cap to {overdrive_power_cap} on {gpu_string}") from e
try:
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to get the power cap info for {gpu_string} post set") from e
if power_caps['power_cap'] == overdrive_power_cap:
@@ -1670,7 +1746,7 @@ class AMDSMICommands():
amdsmi_interface.amdsmi_set_perf_determinism_mode(args.gpu, args.perfdeterminism)
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set performance determinism and clock frequency to {args.perfdeterminism} on {gpu_string}") from e
self.logger.store_output(args.gpu, 'perfdeterminism', f"Successfully enabled performance determinism and set GFX clock frequency to {args.perfdeterminism}")
@@ -1691,13 +1767,13 @@ class AMDSMICommands():
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
gpureset (bool, optional): Value over ride for args.gpureset. Defaults to None.
clocks (bool, optional): Value over ride for args.clocks. Defaults to None.
fans (bool, optional): Value over ride for args.fans. Defaults to None.
profile (bool, optional): Value over ride for args.profile. Defaults to None.
poweroverdrive (bool, optional): Value over ride for args.poweroverdrive. Defaults to None.
xgmierr (bool, optional): Value over ride for args.xgmierr. Defaults to None.
perfdeterminism (bool, optional): Value over ride for args.perfdeterminism. Defaults to None.
gpureset (bool, optional): Value override for args.gpureset. Defaults to None.
clocks (bool, optional): Value override for args.clocks. Defaults to None.
fans (bool, optional): Value override for args.fans. Defaults to None.
profile (bool, optional): Value override for args.profile. Defaults to None.
poweroverdrive (bool, optional): Value override for args.poweroverdrive. Defaults to None.
xgmierr (bool, optional): Value override for args.xgmierr. Defaults to None.
perfdeterminism (bool, optional): Value override for args.perfdeterminism. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -1742,14 +1818,13 @@ class AMDSMICommands():
result = 'Successfully reset GPU'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
result = e.get_error_info()
else:
result = 'Unable to reset non-amd GPU'
self.logger.store_output(args.gpu, 'gpu_reset', result)
if args.clocks:
# rsmi_string = ' Reset Clocks '
reset_clocks_results = {'overdrive' : '',
'clocks' : '',
'performance': ''}
@@ -1758,7 +1833,7 @@ class AMDSMICommands():
reset_clocks_results['overdrive'] = 'Overdrive set to 0'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
reset_clocks_results['overdrive'] = e.get_error_info()
try:
@@ -1767,7 +1842,7 @@ class AMDSMICommands():
reset_clocks_results['clocks'] = 'Successfully reset clocks'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
reset_clocks_results['clocks'] = e.get_error_info()
try:
@@ -1776,7 +1851,7 @@ class AMDSMICommands():
reset_clocks_results['performance'] = 'Performance level reset to auto'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
reset_clocks_results['performance'] = e.get_error_info()
self.logger.store_output(args.gpu, 'reset_clocks', reset_clocks_results)
@@ -1786,7 +1861,7 @@ class AMDSMICommands():
result = 'Successfully reset fan speed to driver control'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
result = e.get_error_info()
self.logger.store_output(args.gpu, 'reset_fans', result)
@@ -1799,7 +1874,7 @@ class AMDSMICommands():
reset_profile_results['power_profile'] = 'Successfully reset Power Profile'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
reset_profile_results['power_profile'] = e.get_error_info()
try:
@@ -1808,7 +1883,7 @@ class AMDSMICommands():
reset_profile_results['performance_level'] = 'Successfully reset Performance Level'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
reset_profile_results['performance_level'] = e.get_error_info()
self.logger.store_output(args.gpu, 'reset_profile', reset_profile_results)
@@ -1818,7 +1893,7 @@ class AMDSMICommands():
result = 'Successfully reset XGMI Error count'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
result = e.get_error_info()
self.logger.store_output(args.gpu, 'reset_xgmi_err', result)
if args.perfdeterminism:
@@ -1828,7 +1903,7 @@ class AMDSMICommands():
result = 'Successfully disabled performance determinism'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NO_PERM:
raise PermissionError('Command requires elevation')
raise PermissionError('Command requires elevation') from e
result = e.get_error_info()
self.logger.store_output(args.gpu, 'reset_perf_determinism', result)
+20 -5
Просмотреть файл
@@ -229,7 +229,7 @@ class AMDSMIHelpers():
for device_handle in args.gpu:
# Handle multiple_devices to print all output at once
subcommand(args, multiple_devices=True, gpu=device_handle)
logger.print_output(multiple_device_output=True)
logger.print_output(multiple_devices_enabled=True)
return True, args.gpu
elif len(args.gpu) == 1:
args.gpu = args.gpu[0]
@@ -240,13 +240,14 @@ class AMDSMIHelpers():
return False, args.gpu
def handle_watch(self, args, subcommand):
def handle_watch(self, args, subcommand, logger):
"""This function will run the subcommand multiple times based
on the passed watch, watch_time, and iterations passed in.
params:
args - argparser args to pass to subcommand
subcommand (AMDSMICommands) - Function that can handle
watching output (Currently: metric & process)
logger (AMDSMILogger) - Logger for accessing config values
return:
Nothing
"""
@@ -260,6 +261,8 @@ class AMDSMIHelpers():
args.watch_time = None
args.iterations = None
# Set the signal handler to flush a delmiter to file if the format is json
print("'CTRL' + 'C' to stop watching output:")
if watch_time: # Run for set amount of time
iterations_ran = 0
end_time = time.time() + watch_time
@@ -267,11 +270,11 @@ class AMDSMIHelpers():
subcommand(args, watching_output=True)
# Handle iterations limit
iterations_ran += 1
if iterations:
if iterations >= iterations_ran:
if iterations is not None:
if iterations <= iterations_ran:
break
time.sleep(watch)
elif iterations: # Run for a set amount of iterations
elif iterations is not None: # Run for a set amount of iterations
for iteration in range(iterations):
subcommand(args, watching_output=True)
if iteration == iterations - 1: # Break on iteration completion
@@ -386,3 +389,15 @@ class AMDSMIHelpers():
return True, profile_presets[profile]
else:
return False, profile_presets.values()
def has_ras_support(self, device_handle):
try:
caps_info = amdsmi_interface.amdsmi_get_caps_info(device_handle)
if caps_info['ras_supported']:
return True
else:
return False
except amdsmi_exception.AmdSmiLibraryException:
return False
+78 -55
Просмотреть файл
@@ -39,7 +39,7 @@ class AMDSMILogger():
self.compatibility = compatibility # amd-smi, gpuv-smi, or rocm-smi
self.format = format # csv, json, or human_readable
self.destination = destination # stdout, path to a file (append)
self.amd_smi_helpers = AMDSMIHelpers()
self.helpers = AMDSMIHelpers()
class LoggerFormat(Enum):
@@ -182,8 +182,12 @@ class AMDSMILogger():
value_with_parent_key = {}
for parent_key, child_dict in value.items():
if isinstance(child_dict, dict):
for child_key, value1 in child_dict.items():
value_with_parent_key[parent_key + '_' + child_key] = value1
if parent_key in ('gfx'):
for child_key, value1 in child_dict.items():
value_with_parent_key[child_key] = value1
else:
for child_key, value1 in child_dict.items():
value_with_parent_key[parent_key + '_' + child_key] = value1
else:
value_with_parent_key[parent_key] = child_dict
value = value_with_parent_key
@@ -212,7 +216,7 @@ class AMDSMILogger():
return:
Nothing
"""
gpu_id = self.amd_smi_helpers.get_gpu_id_from_device_handle(device_handle)
gpu_id = self.helpers.get_gpu_id_from_device_handle(device_handle)
if self.is_amdsmi_compatibility():
self._store_output_amdsmi(gpu_id=gpu_id, argument=argument, data=data)
elif self.is_rocmsmi_compatibility():
@@ -222,6 +226,9 @@ class AMDSMILogger():
def _store_output_amdsmi(self, gpu_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())
if self.is_json_format() or self.is_human_readable_format():
self.output['gpu'] = int(gpu_id)
if argument == 'values' and isinstance(data, dict):
@@ -237,7 +244,6 @@ class AMDSMILogger():
self.output.update(flat_dict)
else:
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")
@@ -257,6 +263,9 @@ class AMDSMILogger():
def _store_output_gpuvsmi(self, gpu_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())
if self.is_json_format() or self.is_human_readable_format():
self.output['gpu'] = int(gpu_id)
self.output[argument] = data
@@ -299,60 +308,68 @@ class AMDSMILogger():
"""
if not self.output:
return
output = {}
for key, value in self.output.items():
output[key] = value
self.multiple_device_output.append(self.output)
self.multiple_device_output.append(output)
self.output = {}
def store_watch_output(self, multiple_devices=False):
def store_watch_output(self, multiple_device_enabled=False):
""" Add the current output or multiple_devices_output
params:
multiple_devices (bool) - True if watching multiple devices
multiple_device_enabled (bool) - True if watching multiple devices
return:
Nothing
"""
values = self.output
if multiple_devices:
values = self.multiple_device_output
if multiple_device_enabled:
for output in self.multiple_device_output:
self.watch_output.append(output)
self.watch_output.append({'timestamp': int(time.time()),
'values': values})
self.multiple_device_output = []
else:
output = {}
for key, value in self.output.items():
output[key] = value
self.watch_output.append(output)
self.output = {}
def print_output(self, multiple_device_output=False, watch_output=False):
def print_output(self, multiple_device_enabled=False, watching_output=False):
""" Print current output acording to format and then destination
params:
multiple_device_output (bool) - True if printing output from
multiple_device_enabled (bool) - True if printing output from
multiple devices
watch_output (bool) - True if printing watch output
watching_output (bool) - True if printing watch output
return:
Nothing
"""
if self.is_json_format():
self._print_json_output(multiple_device_output=multiple_device_output,
watch_output=watch_output)
self._print_json_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
elif self.is_csv_format():
self._print_csv_output(multiple_device_output=multiple_device_output,
watch_output=watch_output)
self._print_csv_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
elif self.is_human_readable_format():
self._print_human_readable_output(multiple_device_output=multiple_device_output,
watch_output=watch_output)
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
def _print_json_output(self, multiple_device_output=False, watch_output=False):
if multiple_device_output:
def _print_json_output(self, multiple_device_enabled=False, watching_output=False):
if multiple_device_enabled:
json_output = self.multiple_device_output
else:
json_output = self.output
if self.destination == 'stdout':
if watch_output:
return # We don't need to print to stdout at the end of watch
else:
json_std_output = json.dumps(json_output, indent = 4)
if json_output:
json_std_output = json.dumps(json_output, indent=4)
print(json_std_output)
else: # Write output to file
if watch_output: # Flush the full JSON output to the file on watch command completion
if watching_output: # Flush the full JSON output to the file on watch command completion
with self.destination.open('w') as output_file:
json.dump(self.watch_output, output_file, indent=4)
else:
@@ -360,43 +377,42 @@ class AMDSMILogger():
json.dump(json_output, output_file, indent=4)
def _print_csv_output(self, multiple_device_output=False, watch_output=False):
if watch_output: # Don't print output if it's for watch
return
if multiple_device_output:
def _print_csv_output(self, multiple_device_enabled=False, watching_output=False):
if multiple_device_enabled:
stored_csv_output = self.multiple_device_output
else:
if not isinstance(self.output, list):
stored_csv_output = [self.output]
if self.destination == 'stdout':
csv_header = stored_csv_output[0].keys()
csv_stdout_output = self.CsvStdoutBuilder()
writer = csv.DictWriter(csv_stdout_output, csv_header)
writer.writeheader()
writer.writerows(stored_csv_output)
if self.is_gpuvsmi_compatibility():
print(str(csv_stdout_output).replace('"',''))
else:
print(str(csv_stdout_output))
else:
with self.destination.open('a', newline = '') as output_file:
if stored_csv_output:
csv_header = stored_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
csv_stdout_output = self.CsvStdoutBuilder()
writer = csv.DictWriter(csv_stdout_output, csv_header)
writer.writeheader()
writer.writerows(stored_csv_output)
print(str(csv_stdout_output))
else:
if watching_output:
with self.destination.open('w', newline = '') as output_file:
if self.watch_output:
csv_header = self.watch_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(self.watch_output)
else:
with self.destination.open('a', newline = '') as output_file:
csv_header = stored_csv_output[0].keys()
writer = csv.DictWriter(output_file, csv_header)
writer.writeheader()
writer.writerows(stored_csv_output)
def _print_human_readable_output(self, multiple_device_output=False, watch_output=False):
if watch_output: # Don't print output if it's for watch
return
if multiple_device_output:
def _print_human_readable_output(self, multiple_device_enabled=False, watching_output=False):
if multiple_device_enabled:
human_readable_output = ''
for output in self.multiple_device_output:
human_readable_output += (self._convert_json_to_human_readable(output))
human_readable_output += self._convert_json_to_human_readable(output)
else:
human_readable_output = self._convert_json_to_human_readable(self.output)
@@ -408,5 +424,12 @@ class AMDSMILogger():
# print as ascii, ignore incompatible characters
print(human_readable_output.encode('ascii', 'ignore').decode('ascii'))
else:
with self.destination.open('a') as output_file:
output_file.write(human_readable_output)
if watching_output:
with self.destination.open('w') as output_file:
human_readable_output = ''
for output in self.watch_output:
human_readable_output += self._convert_json_to_human_readable(output)
output_file.write(human_readable_output)
else:
with self.destination.open('a') as output_file:
output_file.write(human_readable_output)
+21 -18
Просмотреть файл
@@ -118,7 +118,7 @@ class AMDSMIParser(argparse.ArgumentParser):
elif args.csv:
file_name += ".csv"
else:
file_name += "txt"
file_name += ".txt"
path = path / file_name
path.touch()
setattr(args, self.dest, path)
@@ -169,6 +169,7 @@ class AMDSMIParser(argparse.ArgumentParser):
setattr(args, self.dest, values)
return WatchSelectedAction
def _gpu_select(self, gpu_choices):
""" Custom argparse action to return the device handle(s) for the gpu(s) selected
This will set the destination (args.gpu) to a list of 1 or more device handles
@@ -279,8 +280,8 @@ class AMDSMIParser(argparse.ArgumentParser):
def _add_static_parser(self, subparsers, func):
# Subparser help text
static_help = "Gets static information about the specified GPU"
static_subcommand_help = "If no argument is provided, return static information for all GPUs on the system.\
\nIf no static argument is specified all static information will be displayed."
static_subcommand_help = "If no GPU is specified, returns static information for all GPUs on the system.\
\nIf no static argument is provided, all static information will be displayed."
static_optionals_title = "Static Arguments"
# Optional arguments help text
@@ -334,7 +335,7 @@ class AMDSMIParser(argparse.ArgumentParser):
def _add_firmware_parser(self, subparsers, func):
# Subparser help text
firmware_help = "Gets firmware information about the specified GPU"
firmware_subcommand_help = "If no argument is provided, return firmware information for all GPUs on the system."
firmware_subcommand_help = "If no GPU is specified, return firmware information for all GPUs on the system."
firmware_optionals_title = "Firmware Arguments"
# Optional arguments help text
@@ -366,8 +367,8 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
bad_pages_help = "Gets bad page information about the specified GPU"
bad_pages_subcommand_help = "If no argument is provided, return bad page information for all GPUs on the system."
bad_pages_optionals_title = "Bad pages Arguments"
bad_pages_subcommand_help = "If no GPU is specified, return bad page information for all GPUs on the system."
bad_pages_optionals_title = "Bad Pages Arguments"
# Optional arguments help text
pending_help = "Displays all pending retired pages"
@@ -393,8 +394,8 @@ class AMDSMIParser(argparse.ArgumentParser):
def _add_metric_parser(self, subparsers, func):
# Subparser help text
metric_help = "Gets metric/performance information about the specified GPU"
metric_subcommand_help = "If no argument is provided, return metric information for all GPUs on the system.\
\nIf no metric argument is specified all metric information will be displayed."
metric_subcommand_help = "If no GPU is specified, returns metric information for all GPUs on the system.\
\nIf no metric argument is provided all metric information will be displayed."
metric_optionals_title = "Metric arguments"
# Optional arguments help text
@@ -483,8 +484,8 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
process_help = "Lists general process information running on the specified GPU"
process_subcommand_help = "If no argument is provided, returns information for all GPUs on the system.\
\nIf no argument is provided all process information will be displayed."
process_subcommand_help = "If no GPU is specified, returns information for all GPUs on the system.\
\nIf no process argument is provided all process information will be displayed."
process_optionals_title = "Process arguments"
# Optional Arguments help text
@@ -522,7 +523,7 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
profile_help = "Displays information about all profiles and current profile"
profile_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
profile_subcommand_help = "If no GPU is specified, returns information for all GPUs on the system."
profile_optionals_title = "Profile Arguments"
# Create profile subparser
@@ -543,7 +544,7 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
event_help = "Displays event information for the given GPU"
event_subcommand_help = "If no argument is provided, returns event information for all GPUs on the system."
event_subcommand_help = "If no GPU is specified, returns event information for all GPUs on the system."
event_optionals_title = "Event Arguments"
# Create event subparser
@@ -558,14 +559,14 @@ class AMDSMIParser(argparse.ArgumentParser):
def _add_topology_parser(self, subparsers, func):
return
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux
return
# Subparser help text
topology_help = "Displays topology information of the devices."
topology_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
topology_subcommand_help = "If no GPU is specified, returns information for all GPUs on the system.\
\nIf no topology argument is provided all topology information will be displayed."
topology_optionals_title = "Topology arguments"
# Help text for Arguments only on Guest and BM platforms
@@ -602,7 +603,8 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
set_value_help = "Set options for devices."
set_value_subcommand_help = "The user must specify one of the options for the set configuration."
set_value_subcommand_help = "A GPU must be specified to set a configuration.\
\nA set argument must be provided; Multiple set arguments are accepted"
set_value_optionals_title = "Set Arguments"
# Help text for Arguments only on Guest and BM platforms
@@ -649,7 +651,7 @@ class AMDSMIParser(argparse.ArgumentParser):
set_value_parser.add_argument('-O', '--memoverdrive', action=self._validate_overdrive_percent(), required=False, help=set_mem_overdrive_help, metavar='%')
set_value_parser.add_argument('-w', '--poweroverdrive', action=self._prompt_spec_warning(), type=self._positive_int, required=False, help=set_power_overdrive_help, metavar="WATTS")
set_value_parser.add_argument('-P', '--profile', action='store', required=False, help=set_profile_help, metavar='SETPROFILE')
set_value_parser.add_argument('-d', '--perfdeterminism', action='store', type=self._positive_int, required=False, help=set_perf_det_help, metavar='SCLK')
set_value_parser.add_argument('-d', '--perfdeterminism', action='store', type=self._positive_int, required=False, help=set_perf_det_help, metavar='SCLKMAX')
def _validate_set_clock(self, validate_clock_type=True):
@@ -752,7 +754,8 @@ class AMDSMIParser(argparse.ArgumentParser):
# Subparser help text
reset_help = "Reset options for devices."
reset_subcommand_help = "The user must specify one of the options to reset devices."
reset_subcommand_help = "A GPU must be specified to reset a configuration.\
\nA reset argument must be provided; Multiple reset arguments are accepted"
reset_optionals_title = "Reset Arguments"
# Help text for Arguments only on Guest and BM platforms
@@ -788,7 +791,7 @@ class AMDSMIParser(argparse.ArgumentParser):
return
# Subparser help text
rocm_smi_help = "Legacy rocm_smi commands ported for backward compatibility"
rocm_smi_subcommand_help = "If no argument is provided, return showall and print the information for all\
rocm_smi_subcommand_help = "If no GPU is specified, returns showall and print the information for all\
GPUs on the system."
rocm_smi_optionals_title = "rocm_smi Arguments"