SWDEV-381302 - Added error handling
Signed-off-by: Dalibor Stanisavljevic <Dalibor.Stanisavljevic@amd.com>
Change-Id: Ia69c8aebdaa23e212c0ce2522201092bab54e732
[ROCm/amdsmi commit: 06f12c4700]
Bu işleme şunda yer alıyor:
@@ -19,6 +19,7 @@ add_custom_command(
|
||||
${PY_PACKAGE_DIR}/amdsmi_init.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_logger.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_parser.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
|
||||
${PY_PACKAGE_DIR}/BDF.py
|
||||
DEPENDS amdsmi_cli
|
||||
COMMAND mkdir -p ${PY_PACKAGE_DIR}/
|
||||
@@ -30,6 +31,7 @@ add_custom_command(
|
||||
COMMAND ln -Pf ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_init.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ln -Pf ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_logger.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ln -Pf ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_parser.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ln -Pf ${CMAKE_CURRENT_SOURCE_DIR}/amdsmi_cli_exceptions.py ${PY_PACKAGE_DIR}/
|
||||
COMMAND ln -Pf ${CMAKE_CURRENT_SOURCE_DIR}/BDF.py ${PY_PACKAGE_DIR}/)
|
||||
|
||||
# The CLI requires the python amdsmi wrapper to be installed
|
||||
@@ -44,6 +46,7 @@ add_custom_target(
|
||||
${PY_PACKAGE_DIR}/amdsmi_init.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_logger.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_parser.py
|
||||
${PY_PACKAGE_DIR}/amdsmi_cli_exceptions.py
|
||||
${PY_PACKAGE_DIR}/BDF.py)
|
||||
|
||||
install(
|
||||
|
||||
@@ -26,6 +26,20 @@ import sys
|
||||
from amdsmi_commands import AMDSMICommands
|
||||
from amdsmi_parser import AMDSMIParser
|
||||
from amdsmi_logger import AMDSMILogger
|
||||
import amdsmi_cli_exceptions
|
||||
from amdsmi import amdsmi_interface
|
||||
|
||||
def _print_error(e, destination):
|
||||
if destination == 'stdout':
|
||||
print(e)
|
||||
else:
|
||||
f = open(destination, "w")
|
||||
f.write(e)
|
||||
f.close()
|
||||
print("Error occured. Result written to " +
|
||||
str(destination) + " file")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Set compatability mode based on which cli mapping user selects
|
||||
@@ -50,23 +64,28 @@ if __name__ == "__main__":
|
||||
amd_smi_commands.set_value,
|
||||
amd_smi_commands.reset,
|
||||
amd_smi_commands.rocm_smi)
|
||||
try:
|
||||
args = amd_smi_parser.parse_args(args=None if sys.argv[1:] else ['--help'])
|
||||
|
||||
args = amd_smi_parser.parse_args(args=None if sys.argv[1:] else ['--help'])
|
||||
# Handle command modifiers before subcommand execution
|
||||
if args.json:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.json.value
|
||||
if args.csv:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.csv.value
|
||||
if args.file:
|
||||
amd_smi_commands.logger.destination = args.file
|
||||
if args.loglevel:
|
||||
logging_dict = {'DEBUG' : logging.DEBUG,
|
||||
'INFO' : logging.INFO,
|
||||
'WARNING': logging.WARNING,
|
||||
'ERROR': logging.ERROR,
|
||||
'CRITICAL': logging.CRITICAL}
|
||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_dict[args.loglevel])
|
||||
|
||||
# Handle command modifiers before subcommand execution
|
||||
if args.json:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.json.value
|
||||
if args.csv:
|
||||
amd_smi_commands.logger.format = amd_smi_commands.logger.LoggerFormat.csv.value
|
||||
if args.file:
|
||||
amd_smi_commands.logger.destination = args.file
|
||||
if args.loglevel:
|
||||
logging_dict = {'DEBUG' : logging.DEBUG,
|
||||
'INFO' : logging.INFO,
|
||||
'WARNING': logging.WARNING,
|
||||
'ERROR': logging.ERROR,
|
||||
'CRITICAL': logging.CRITICAL}
|
||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging_dict[args.loglevel])
|
||||
|
||||
# Execute subcommands
|
||||
args.func(args)
|
||||
# Execute subcommands
|
||||
args.func(args)
|
||||
except amdsmi_cli_exceptions.AmdSmiException as e:
|
||||
_print_error(str(e), amd_smi_commands.logger.destination)
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
exc = amdsmi_cli_exceptions.AmdSmiAMDSMIErrorException(amd_smi_commands.logger.format, e.get_error_code())
|
||||
_print_error(str(exc), amd_smi_commands.logger.destination)
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
AMDSMI_ERROR_MESSAGES = {
|
||||
0: "Sucess",
|
||||
1: "Invalid parameters",
|
||||
2: "Command not supported",
|
||||
3: "Command not yet implemented",
|
||||
4: "Failed load module",
|
||||
5: "Failed load symbole",
|
||||
6: "Drm error",
|
||||
7: "API call failed",
|
||||
8: "Timeout in API call",
|
||||
9: "Retry operation",
|
||||
10: "Permission Denied",
|
||||
11: "Interrupt ocurred during execution",
|
||||
12: "I/O Error",
|
||||
13: "Address fault",
|
||||
14: "Error opening file",
|
||||
15: "Not enough memory",
|
||||
16: "Internal error",
|
||||
17: "Out of bounds",
|
||||
18: "Initialization error",
|
||||
19: "Internal reference counter exceeded",
|
||||
|
||||
30: "Device busy",
|
||||
31: "Device Not found",
|
||||
32: "Device not initialized",
|
||||
33: "No more free slot",
|
||||
|
||||
40: "No data was found for given input",
|
||||
41: "Insufficient size for operation",
|
||||
42: "Unexpected size of data was read",
|
||||
43: "The data read or provided was unexpected",
|
||||
}
|
||||
|
||||
def _get_error_message(error_code):
|
||||
if abs(error_code) in AMDSMI_ERROR_MESSAGES:
|
||||
return AMDSMI_ERROR_MESSAGES[abs(error_code)]
|
||||
return "Generic error"
|
||||
|
||||
class AmdSmiException(Exception):
|
||||
def __str__(self):
|
||||
return self.message
|
||||
|
||||
|
||||
class AmdSmiInvalidCommandException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -1
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Command '{}' is invalid. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Command '{}' is invalid. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Command '{}' is invalid. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -2
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' is invalid. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' is invalid. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' is invalid. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
class AmdSmiDeviceNotFoundException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -3
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "GPU Device with GPU_INDEX '{}' cannot be found on the system.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "GPU Device with GPU_INDEX '{}' cannot be found on the system.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "GPU Device with GPU_INDEX '{}' cannot be found on the system. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
class AmdSmiInvalidFilePathException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -4
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Path '{}' cannot be found.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Path '{}' cannot be found.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Path '{}' cannot be found. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -5
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Value '{}' is not of valid type or format. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Value '{}' is not of valid type or format. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Value '{}' is not of valid type or format. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
class AmdSmiMissingParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -6
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' requires a value. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' requires a value. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' requires a value. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
|
||||
class AmdSmiParameterNotSupportedException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -8
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' is not supported on the system. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' is not supported on the system. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' is not supported on the system. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
|
||||
|
||||
class AmdSmiUnknownErrorException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
self.value = -100
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "An unknown error has occurred. Run 'help' for more info."
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "An unknown error has occurred. Run 'help' for more info.," + str(self.value)
|
||||
else:
|
||||
self.message = "An unknown error has occurred. Run 'help' for more info. Error code: {}".format(self.value)
|
||||
|
||||
|
||||
class AmdSmiAMDSMIErrorException(AmdSmiException):
|
||||
def __init__(self, outputformat, error_code):
|
||||
self.value = -1000 - abs(error_code)
|
||||
self.smilibcode = error_code
|
||||
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "AMDSMI has returned error '{}' - '{}'".format(self.value,
|
||||
AMDSMI_ERROR_MESSAGES[abs(self.smilibcode)])
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "AMDSMI has returned error '{}' - '{}',".format(self.value, _get_error_message(self.smilibcode)) + str(self.value)
|
||||
else:
|
||||
self.message = "AMDSMI has returned error '{}' - '{}' Error code: {}".format(self.value, _get_error_message(self.smilibcode), self.value)
|
||||
@@ -51,6 +51,8 @@ class AMDSMICommands():
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
self.stop = ''
|
||||
self.all_arguments = False
|
||||
|
||||
|
||||
def version(self, args):
|
||||
"""Print Version String
|
||||
@@ -198,99 +200,111 @@ class AMDSMICommands():
|
||||
handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.static)
|
||||
if handled_multiple_gpus:
|
||||
return # This function is recursive
|
||||
|
||||
args.gpu = device_handle
|
||||
|
||||
# If all arguments are False, it means that no argument was passed and the entire static should be printed
|
||||
if not any([args.asic, args.bus, args.vbios, args.limit, args.driver, args.caps, args.ras, args.board]):
|
||||
args.asic = args.bus = args.vbios = args.limit = args.driver = args.caps = args.ras = args.board = True
|
||||
args.asic = args.bus = args.vbios = args.limit = args.driver = args.caps = args.ras = args.board = self.all_arguments = True
|
||||
|
||||
values_dict = {}
|
||||
|
||||
if args.asic:
|
||||
try:
|
||||
asic_info = amdsmi_interface.amdsmi_get_asic_info(args.gpu)
|
||||
asic_info['family'] = hex(asic_info['family'])
|
||||
asic_info['vendor_id'] = hex(asic_info['vendor_id'])
|
||||
asic_info['device_id'] = hex(asic_info['device_id'])
|
||||
asic_info['rev_id'] = hex(asic_info['rev_id'])
|
||||
if asic_info['asic_serial'] != '':
|
||||
asic_info['asic_serial'] = '0x' + asic_info['asic_serial']
|
||||
|
||||
values_dict['asic'] = asic_info
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
asic_info = e.get_error_info()
|
||||
|
||||
asic_info['family'] = hex(asic_info['family'])
|
||||
asic_info['vendor_id'] = hex(asic_info['vendor_id'])
|
||||
asic_info['device_id'] = hex(asic_info['device_id'])
|
||||
asic_info['rev_id'] = hex(asic_info['rev_id'])
|
||||
if asic_info['asic_serial'] != '':
|
||||
asic_info['asic_serial'] = '0x' + asic_info['asic_serial']
|
||||
|
||||
values_dict['asic'] = asic_info
|
||||
values_dict['asic'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.bus:
|
||||
bus_output_info = {}
|
||||
|
||||
try:
|
||||
bus_info = amdsmi_interface.amdsmi_get_pcie_link_caps(args.gpu)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
bus_info['pcie_speed'] = f"{bus_info['pcie_speed']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
bus_info = e.get_error_info()
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
bus_info['pcie_speed'] = f"{bus_info['pcie_speed']} {unit}"
|
||||
|
||||
bus_output_info = {}
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
try:
|
||||
bus_output_info['bdf'] = amdsmi_interface.amdsmi_get_device_bdf(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
bus_output_info['bdf'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
bus_output_info.update(bus_info)
|
||||
|
||||
values_dict['bus'] = bus_output_info
|
||||
|
||||
if args.vbios:
|
||||
try:
|
||||
vbios_info = amdsmi_interface.amdsmi_get_vbios_info(args.gpu)
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
vbios_info['version'] = vbios_info.pop('vbios_version_string')
|
||||
vbios_info['build_date'] = vbios_info.pop('build_date')
|
||||
vbios_info['part_number'] = vbios_info.pop('part_number')
|
||||
vbios_info['vbios_version'] = vbios_info.pop('vbios_version')
|
||||
|
||||
values_dict['vbios'] = vbios_info
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
vbios_info = e.get_error_info()
|
||||
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
vbios_info['version'] = vbios_info.pop('vbios_version_string')
|
||||
vbios_info['build_date'] = vbios_info.pop('build_date')
|
||||
vbios_info['part_number'] = vbios_info.pop('part_number')
|
||||
vbios_info['vbios_version'] = vbios_info.pop('vbios_version')
|
||||
|
||||
values_dict['vbios'] = vbios_info
|
||||
values_dict['vbios'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.board:
|
||||
try:
|
||||
board_info = amdsmi_interface.amdsmi_get_board_info(args.gpu)
|
||||
board_info['serial_number'] = hex(board_info['serial_number'])
|
||||
board_info['product_serial'] = '0x' + board_info['product_serial']
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
board_info['product_number'] = board_info.pop('product_serial')
|
||||
board_info['product_name'] = board_info.pop('product_name')
|
||||
|
||||
values_dict['board'] = board_info
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
board_info = e.get_error_info()
|
||||
|
||||
board_info['serial_number'] = hex(board_info['serial_number'])
|
||||
board_info['product_serial'] = '0x' + board_info['product_serial']
|
||||
board_info['product_name'] = board_info['product_name'].strip()
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
board_info['product_number'] = board_info.pop('product_serial')
|
||||
|
||||
values_dict['board'] = board_info
|
||||
values_dict['board'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.limit:
|
||||
try:
|
||||
power_limit = amdsmi_interface.amdsmi_get_power_measure(args.gpu)['power_limit']
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
power_limit = e.get_error_info()
|
||||
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
try:
|
||||
temp_edge_limit = amdsmi_interface.amdsmi_dev_get_temp_metric(args.gpu,
|
||||
amdsmi_interface.AmdSmiTemperatureType.EDGE, amdsmi_interface.AmdSmiTemperatureMetric.CRITICAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temp_edge_limit = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
try:
|
||||
temp_junction_limit = amdsmi_interface.amdsmi_dev_get_temp_metric(args.gpu,
|
||||
amdsmi_interface.AmdSmiTemperatureType.JUNCTION, amdsmi_interface.AmdSmiTemperatureMetric.CRITICAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temp_junction_limit = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
try:
|
||||
temp_vram_limit = amdsmi_interface.amdsmi_dev_get_temp_metric(args.gpu,
|
||||
amdsmi_interface.AmdSmiTemperatureType.VRAM, amdsmi_interface.AmdSmiTemperatureMetric.CRITICAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temp_vram_limit = e.get_error_info()
|
||||
temp_junction_limit = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'W'
|
||||
@@ -309,35 +323,40 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['limit'] = limit_info
|
||||
if args.driver:
|
||||
driver_info = {}
|
||||
try:
|
||||
driver_info = {}
|
||||
driver_info['driver_version'] = amdsmi_interface.amdsmi_get_driver_version(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
driver_info['driver_version'] = e.get_error_info()
|
||||
|
||||
values_dict['driver'] = driver_info
|
||||
values_dict['driver'] = driver_info
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['driver'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.ras:
|
||||
try:
|
||||
values_dict['ras'] = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['ras'] = e.get_error_info()
|
||||
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.caps:
|
||||
try:
|
||||
caps_info = amdsmi_interface.amdsmi_get_caps_info(args.gpu)
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
del caps_info['ras_supported']
|
||||
caps_info['gfx'] = caps_info.pop('gfx')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
for capability_name, capability_value in caps_info.items():
|
||||
if isinstance(capability_value, list):
|
||||
caps_info[capability_name] = f"{capability_value}"
|
||||
|
||||
values_dict['caps'] = caps_info
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
caps_info = e.get_error_info()
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
del caps_info['ras_supported']
|
||||
caps_info['gfx'] = caps_info.pop('gfx')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
for capability_name, capability_value in caps_info.items():
|
||||
if isinstance(capability_value, list):
|
||||
caps_info[capability_name] = f"{capability_value}"
|
||||
|
||||
values_dict['caps'] = caps_info
|
||||
values_dict['caps'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
# Store values in logger.output
|
||||
self.logger.store_output(args.gpu, 'values', values_dict)
|
||||
@@ -384,30 +403,30 @@ class AMDSMICommands():
|
||||
if args.fw_list:
|
||||
try:
|
||||
fw_info = amdsmi_interface.amdsmi_get_fw_info(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
fw_info = e.get_error_info()
|
||||
|
||||
for fw_index, fw_entry in enumerate(fw_info['fw_list']):
|
||||
# Change fw_name to fw_id
|
||||
fw_entry['fw_id'] = fw_entry.pop('fw_name').name.strip('FW_ID_')
|
||||
fw_entry['fw_version'] = fw_entry.pop('fw_version')
|
||||
firmware_identifier = 'FW'
|
||||
for fw_index, fw_entry in enumerate(fw_info['fw_list']):
|
||||
# Change fw_name to fw_id
|
||||
fw_entry['fw_id'] = fw_entry.pop('fw_name').name.strip('FW_ID_')
|
||||
fw_entry['fw_version'] = fw_entry.pop('fw_version')
|
||||
firmware_identifier = 'FW'
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
firmware_identifier = 'UCODE'
|
||||
fw_entry['name'] = fw_entry.pop('fw_id')
|
||||
fw_entry['version'] = fw_entry.pop('fw_version')
|
||||
|
||||
# Add custom human readable formatting
|
||||
if self.logger.is_human_readable_format():
|
||||
fw_info['fw_list'][fw_index] = {f'{firmware_identifier} {fw_index}': fw_entry}
|
||||
else:
|
||||
fw_info['fw_list'][fw_index] = fw_entry
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
firmware_identifier = 'UCODE'
|
||||
fw_entry['name'] = fw_entry.pop('fw_id')
|
||||
fw_entry['version'] = fw_entry.pop('fw_version')
|
||||
fw_info['ucode_list'] = fw_info.pop('fw_list')
|
||||
|
||||
# Add custom human readable formatting
|
||||
if self.logger.is_human_readable_format():
|
||||
fw_info['fw_list'][fw_index] = {f'{firmware_identifier} {fw_index}': fw_entry}
|
||||
else:
|
||||
fw_info['fw_list'][fw_index] = fw_entry
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
fw_info['ucode_list'] = fw_info.pop('fw_list')
|
||||
|
||||
values_dict.update(fw_info)
|
||||
values_dict.update(fw_info)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
|
||||
# Store values in logger.output
|
||||
self.logger.store_output(args.gpu, 'values', values_dict)
|
||||
@@ -471,6 +490,7 @@ class AMDSMICommands():
|
||||
bad_page_info = ""
|
||||
bad_page_err_output = e.get_error_info()
|
||||
bad_page_error = True
|
||||
raise e
|
||||
|
||||
if isinstance(bad_page_info, str):
|
||||
pass
|
||||
@@ -582,6 +602,7 @@ class AMDSMICommands():
|
||||
Returns:
|
||||
None: Print output via AMDSMILogger to destination
|
||||
"""
|
||||
|
||||
# Set args.* to passed in arguments
|
||||
if gpu:
|
||||
args.gpu = gpu
|
||||
@@ -655,158 +676,144 @@ class AMDSMICommands():
|
||||
else:
|
||||
raise IndexError("args.gpu should not be an empty list")
|
||||
|
||||
|
||||
# Check if any of the options have been set, if not then set them all to true
|
||||
if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, args.ecc, args.pcie, args.voltage, args.fan,
|
||||
args.pcie_usage, args.voltage_curve, args.overdrive, args.mem_overdrive, args.perf_level,
|
||||
args.replay_count, args.xgmi_err, args.energy, args.mem_usage]):
|
||||
args.usage = args.fb_usage = args.power = args.clock = args.temperature = args.ecc = args.pcie = args.voltage = args.fan = \
|
||||
args.pcie_usage = args.voltage_curve = args.overdrive = args.mem_overdrive = args.perf_level = \
|
||||
args.replay_count = args.xgmi_err = args.energy = args.mem_usage = True
|
||||
args.replay_count = args.xgmi_err = args.energy = args.mem_usage = self.all_arguments = True
|
||||
|
||||
|
||||
# Add timestamp and store values for specified arguments
|
||||
values_dict = {}
|
||||
if args.usage:
|
||||
try:
|
||||
engine_usage = amdsmi_interface.amdsmi_get_gpu_activity(args.gpu)
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
engine_usage['gfx_usage'] = engine_usage.pop('gfx_activity')
|
||||
engine_usage['mem_usage'] = engine_usage.pop('umc_activity')
|
||||
engine_usage['mm_usage_list'] = engine_usage.pop('mm_activity')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '%'
|
||||
for usage_name, usage_value in engine_usage.items():
|
||||
engine_usage[usage_name] = f"{usage_value} {unit}"
|
||||
|
||||
values_dict['usage'] = engine_usage
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
engine_usage = e.get_error_info()
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
engine_usage['gfx_usage'] = engine_usage.pop('gfx_activity')
|
||||
engine_usage['mem_usage'] = engine_usage.pop('umc_activity')
|
||||
engine_usage['mm_usage_list'] = engine_usage.pop('mm_activity')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '%'
|
||||
for usage_name, usage_value in engine_usage.items():
|
||||
engine_usage[usage_name] = f"{usage_value} {unit}"
|
||||
|
||||
values_dict['usage'] = engine_usage
|
||||
raise e
|
||||
if args.fb_usage:
|
||||
try:
|
||||
vram_usage = amdsmi_interface.amdsmi_get_vram_usage(args.gpu)
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
vram_usage['fb_total'] = vram_usage.pop('vram_total')
|
||||
vram_usage['fb_used'] = vram_usage.pop('vram_used')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'MB'
|
||||
for vram_name, vram_value in vram_usage.items():
|
||||
vram_usage[vram_name] = f"{vram_value} {unit}"
|
||||
|
||||
values_dict['fb_usage'] = vram_usage
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
vram_usage = e.get_error_info()
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
vram_usage['fb_total'] = vram_usage.pop('vram_total')
|
||||
vram_usage['fb_used'] = vram_usage.pop('vram_used')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'MB'
|
||||
for vram_name, vram_value in vram_usage.items():
|
||||
vram_usage[vram_name] = f"{vram_value} {unit}"
|
||||
|
||||
values_dict['fb_usage'] = vram_usage
|
||||
raise e
|
||||
if args.power:
|
||||
try:
|
||||
average_socket_power = amdsmi_interface.amdsmi_get_power_measure(args.gpu)['average_socket_power']
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
pass
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'W'
|
||||
average_socket_power = f"{average_socket_power} {unit}"
|
||||
|
||||
values_dict['power'] = average_socket_power
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
average_socket_power = e.get_error_info()
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
pass
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'W'
|
||||
average_socket_power = f"{average_socket_power} {unit}"
|
||||
|
||||
values_dict['power'] = average_socket_power
|
||||
raise e
|
||||
if args.clock:
|
||||
try:
|
||||
clock_gfx = amdsmi_interface.amdsmi_get_clock_measure(args.gpu, amdsmi_interface.AmdSmiClkType.GFX)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
clock_gfx = e.get_error_info()
|
||||
|
||||
try:
|
||||
clock_mem = amdsmi_interface.amdsmi_get_clock_measure(args.gpu, amdsmi_interface.AmdSmiClkType.MEM)
|
||||
|
||||
clocks = {'gfx': clock_gfx,
|
||||
'mem': clock_mem}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'MHz'
|
||||
for clock_target, clock_metric_values in clocks.items():
|
||||
for clock_type, clock_value in clock_metric_values.items():
|
||||
clocks[clock_target][clock_type] = f"{clock_value} {unit}"
|
||||
|
||||
values_dict['clock'] = clocks
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
clock_mem = e.get_error_info()
|
||||
|
||||
clocks = {'gfx': clock_gfx,
|
||||
'mem': clock_mem}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'MHz'
|
||||
for clock_target, clock_metric_values in clocks.items():
|
||||
for clock_type, clock_value in clock_metric_values.items():
|
||||
clocks[clock_target][clock_type] = f"{clock_value} {unit}"
|
||||
|
||||
values_dict['clock'] = clocks
|
||||
raise e
|
||||
if args.temperature:
|
||||
try:
|
||||
temperature_edge_current = amdsmi_interface.amdsmi_dev_get_temp_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiTemperatureType.EDGE, amdsmi_interface.AmdSmiTemperatureMetric.CURRENT)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temperature_edge_current = e.get_error_info()
|
||||
|
||||
try:
|
||||
temperature_junction_current = amdsmi_interface.amdsmi_dev_get_temp_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiTemperatureType.JUNCTION, amdsmi_interface.AmdSmiTemperatureMetric.CURRENT)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temperature_junction_current = e.get_error_info()
|
||||
|
||||
try:
|
||||
temperature_vram_current = amdsmi_interface.amdsmi_dev_get_temp_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiTemperatureType.VRAM, amdsmi_interface.AmdSmiTemperatureMetric.CURRENT)
|
||||
|
||||
temperatures = { 'edge': temperature_edge_current,
|
||||
'hotspot': temperature_junction_current,
|
||||
'mem': temperature_vram_current}
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
temperatures = { 'edge_temperature': temperature_edge_current,
|
||||
'hotspot_temperature': temperature_junction_current,
|
||||
'mem_temperature': temperature_vram_current}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '\N{DEGREE SIGN}C'
|
||||
for temperature_value in temperatures:
|
||||
temperatures[temperature_value] = f"{temperatures[temperature_value]} {unit}"
|
||||
|
||||
values_dict['temperature'] = temperatures
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
temperature_vram_current = e.get_error_info()
|
||||
|
||||
temperatures = { 'edge': temperature_edge_current,
|
||||
'hotspot': temperature_junction_current,
|
||||
'mem': temperature_vram_current}
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
temperatures = { 'edge_temperature': temperature_edge_current,
|
||||
'hotspot_temperature': temperature_junction_current,
|
||||
'mem_temperature': temperature_vram_current}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '\N{DEGREE SIGN}C'
|
||||
for temperature_value in temperatures:
|
||||
temperatures[temperature_value] = f"{temperatures[temperature_value]} {unit}"
|
||||
|
||||
values_dict['temperature'] = temperatures
|
||||
raise e
|
||||
if args.ecc:
|
||||
try:
|
||||
values_dict['ecc'] = amdsmi_interface.amdsmi_get_ecc_error_count(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['ecc'] = e.get_error_info()
|
||||
|
||||
raise e
|
||||
if args.pcie:
|
||||
try:
|
||||
pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_caps(args.gpu)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
pcie_link_status['pcie_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
pcie_link_status['current_width'] = pcie_link_status.pop('pcie_lanes')
|
||||
pcie_link_status['current_speed'] = pcie_link_status.pop('pcie_speed')
|
||||
|
||||
values_dict['pcie'] = pcie_link_status
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
pcie_link_status = e.get_error_info()
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
pcie_link_status['pcie_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
pcie_link_status['current_width'] = pcie_link_status.pop('pcie_lanes')
|
||||
pcie_link_status['current_speed'] = pcie_link_status.pop('pcie_speed')
|
||||
|
||||
values_dict['pcie'] = pcie_link_status
|
||||
raise e
|
||||
if args.voltage:
|
||||
try:
|
||||
volt_metric = amdsmi_interface.amdsmi_dev_get_volt_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiVoltageType.VDDGFX, amdsmi_interface.AmdSmiVoltageMetric.CURRENT)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'mV'
|
||||
volt_metric = f"{volt_metric} {unit}"
|
||||
|
||||
values_dict['voltage'] = volt_metric
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
volt_metric = e.get_error_info()
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'mV'
|
||||
volt_metric = f"{volt_metric} {unit}"
|
||||
|
||||
values_dict['voltage'] = volt_metric
|
||||
raise e
|
||||
if args.fan:
|
||||
try:
|
||||
fan_speed = amdsmi_interface.amdsmi_dev_get_fan_speed(args.gpu, 0)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
fan_speed = e.get_error_info()
|
||||
|
||||
try:
|
||||
fan_max = amdsmi_interface.amdsmi_dev_get_fan_speed_max(args.gpu, 0)
|
||||
if isinstance(fan_speed, int) and fan_max > 0:
|
||||
fan_percent = round((float(fan_speed) / float(fan_max)) * 100, 2)
|
||||
@@ -815,42 +822,30 @@ class AMDSMICommands():
|
||||
fan_percent = f"{fan_percent} {unit}"
|
||||
else:
|
||||
fan_percent = 'Unable to detect fan speed'
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
fan_max = e.get_error_info()
|
||||
fan_percent = 'Unable to detect fan speed'
|
||||
|
||||
try:
|
||||
fan_rpm = amdsmi_interface.amdsmi_dev_get_fan_rpms(args.gpu, 0)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
fan_rpm = e.get_error_info()
|
||||
|
||||
values_dict['fan'] = {'speed': fan_speed,
|
||||
'max' : fan_max,
|
||||
'rpm' : fan_rpm,
|
||||
'usage' : fan_percent}
|
||||
values_dict['fan'] = {'speed': fan_speed,
|
||||
'max' : fan_max,
|
||||
'rpm' : fan_rpm,
|
||||
'usage' : fan_percent}
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
if args.pcie_usage:
|
||||
try:
|
||||
pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_status(args.gpu)
|
||||
pcie_link_status_call = True
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
pcie_link_status['pcie_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
|
||||
|
||||
values_dict['pcie_usage'] = pcie_link_status
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
pcie_link_status = e.get_error_info()
|
||||
pcie_link_status_call = False
|
||||
|
||||
if self.logger.is_human_readable_format() and pcie_link_status_call:
|
||||
unit ='MT/s'
|
||||
pcie_link_status['pcie_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
|
||||
|
||||
values_dict['pcie_usage'] = pcie_link_status
|
||||
raise e
|
||||
if args.voltage_curve:
|
||||
try:
|
||||
od_volt = amdsmi_interface.amdsmi_dev_get_od_volt_info(args.gpu)
|
||||
voltage_curve_error = False
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
od_volt = None
|
||||
values_dict["voltage_curve"] = e.get_error_info()
|
||||
voltage_curve_error = True
|
||||
|
||||
if not voltage_curve_error:
|
||||
voltage_point_dict = {}
|
||||
|
||||
for point in range(3):
|
||||
@@ -863,100 +858,79 @@ class AMDSMICommands():
|
||||
voltage_point_dict[f'voltage_point_{point}'] = f"{frequency}Mhz {voltage}mV"
|
||||
|
||||
values_dict['voltage_curve'] = voltage_point_dict
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['voltage_curve'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if args.overdrive:
|
||||
try:
|
||||
overdrive_level = amdsmi_interface.amdsmi_dev_get_overdrive_level(args.gpu)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '%'
|
||||
overdrive_level = f"{overdrive_level} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
overdrive_level = e.get_error_info()
|
||||
|
||||
values_dict['overdrive'] = overdrive_level
|
||||
values_dict['overdrive'] = overdrive_level
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
if args.mem_overdrive:
|
||||
values_dict['mem_overdrive'] = amdsmi_interface.AmdSmiRetCode.NOT_IMPLEMENTED
|
||||
|
||||
if args.perf_level:
|
||||
try:
|
||||
values_dict['perf_level'] = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
values_dict['perf_level'] = perf_level
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['perf_level'] = e.get_error_info()
|
||||
|
||||
raise e
|
||||
if args.replay_count:
|
||||
try:
|
||||
values_dict['replay_count'] = amdsmi_interface.amdsmi_dev_get_pci_replay_counter(args.gpu)
|
||||
pci_replay_counter = amdsmi_interface.amdsmi_dev_get_pci_replay_counter(args.gpu)
|
||||
values_dict['replay_count'] = pci_replay_counter
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['replay_count'] = e.get_error_info()
|
||||
|
||||
raise e
|
||||
if args.xgmi_err:
|
||||
try:
|
||||
values_dict['xgmi_err'] = amdsmi_interface.amdsmi_dev_xgmi_error_status(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['xgmi_err'] = e.get_error_info()
|
||||
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.AmdSmiRetCode.ERR_NOT_SUPPORTED:
|
||||
values_dict['xgmi_err'] = 'N/A'
|
||||
else:
|
||||
raise e
|
||||
if args.energy:
|
||||
try:
|
||||
energy = amdsmi_interface.amdsmi_get_power_measure(args.gpu)['energy_accumulator']
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'J'
|
||||
energy = f"{energy} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
energy = e.get_error_info()
|
||||
|
||||
values_dict['energy'] = energy
|
||||
values_dict['energy'] = energy
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
if args.mem_usage:
|
||||
unit = 'MB'
|
||||
memory_total = {}
|
||||
|
||||
try:
|
||||
total_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VRAM)
|
||||
memory_total['vram'] = total_vram // (1024*1024)
|
||||
memory_total_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VRAM)
|
||||
memory_total_vis_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM)
|
||||
memory_total_gtt = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.GTT)
|
||||
|
||||
memory_total = {}
|
||||
# Convert mem_usage to megabytes
|
||||
memory_total['vram'] = memory_total_vram // (1024*1024)
|
||||
memory_total['vis_vram'] = memory_total_vis_vram // (1024*1024)
|
||||
memory_total['gtt'] = memory_total_gtt // (1024*1024)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'MB'
|
||||
energy = f"{energy} {unit}"
|
||||
memory_total['vram'] = f"{memory_total['vram']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['vram'] = e.get_error_info()
|
||||
|
||||
try:
|
||||
total_vis_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM)
|
||||
memory_total['vis_vram'] = total_vis_vram // (1024*1024)
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['vis_vram'] = f"{memory_total['vis_vram']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['vis_vram'] = e.get_error_info()
|
||||
|
||||
try:
|
||||
total_gtt = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.GTT)
|
||||
memory_total['gtt'] = total_gtt // (1024*1024)
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['gtt'] = f"{memory_total['gtt']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['gtt'] = e.get_error_info()
|
||||
|
||||
try:
|
||||
total_used_vram = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.VRAM)
|
||||
memory_total['used_vram'] = total_used_vram // (1024*1024)
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['used_vram'] = f"{memory_total['used_vram']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['used_vram'] = e.get_error_info()
|
||||
|
||||
try:
|
||||
total_used_vis_vram = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM)
|
||||
memory_total['used_vis_vram'] = total_used_vis_vram // (1024*1024)
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['used_vis_vram'] = f"{memory_total['used_vis_vram']} {unit}"
|
||||
values_dict['mem_usage'] = memory_total
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['used_vis_vram'] = e.get_error_info()
|
||||
|
||||
try:
|
||||
total_used_gtt = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.GTT)
|
||||
memory_total['used_gtt'] = total_used_gtt // (1024*1024)
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['used_gtt'] = f"{memory_total['used_gtt']} {unit}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['used_gtt'] = e.get_error_info()
|
||||
|
||||
values_dict['mem_usage'] = memory_total
|
||||
raise e
|
||||
|
||||
# Store values in logger.output
|
||||
self.logger.store_output(args.gpu, 'values', values_dict)
|
||||
@@ -1190,11 +1164,6 @@ class AMDSMICommands():
|
||||
args.gpu = self.device_handles
|
||||
|
||||
# Handle all args being false
|
||||
|
||||
# If all arguments are False, it means that no argument was passed and the entire topology should be printed
|
||||
# if not any([args.asic, args.bus, args.vbios, args.limit, args.driver, args.caps, args.ras, args.board]):
|
||||
# args.asic = args.bus = args.vbios = args.limit = args.driver = args.caps = args.ras = args.board = True
|
||||
|
||||
if not any([args.access, args.weight, args.hops, args.type, args.numa, args.numa_bw]):
|
||||
args.access = args.weight = args.hops = args.type = args.numa = args.numa_bw = True
|
||||
|
||||
|
||||
@@ -26,9 +26,12 @@ import errno
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from _version import __version__
|
||||
from amdsmi_helpers import AMDSMIHelpers
|
||||
import amdsmi_cli_exceptions
|
||||
from BDF import BDF
|
||||
|
||||
|
||||
class AMDSMIParser(argparse.ArgumentParser):
|
||||
@@ -80,8 +83,13 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if int_value.isdigit(): # Is digit works only on positive numbers
|
||||
return int(int_value)
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"invalid input:{int_value} integer provided must be positive")
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(int_value, outputformat)
|
||||
|
||||
|
||||
def _check_output_file_path(self):
|
||||
@@ -99,8 +107,13 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if path.parent.is_dir():
|
||||
path.touch()
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"Invalid path:{path} Could not find parent directory of given path")
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, outputformat)
|
||||
|
||||
if path.is_dir():
|
||||
path = path / f"{int(time.time())}-amdsmi-output.txt"
|
||||
@@ -109,8 +122,13 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
elif path.is_file():
|
||||
setattr(args, self.dest, path)
|
||||
else:
|
||||
raise argparse.ArgumentTypeError(
|
||||
f"Invalid path:{path} Could not determine if value given is a valid path")
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, outputformat)
|
||||
return CheckOutputFilePath
|
||||
|
||||
|
||||
@@ -142,26 +160,24 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
|
||||
def _check_watch_selected(self):
|
||||
""" Argument action validator:
|
||||
Validate that the -w/--watch argument was selected
|
||||
""" Validate that the -w/--watch argument was selected
|
||||
This is because -W/--watch_time and -i/--iterations are dependent on watch
|
||||
"""
|
||||
class _WatchSelectedAction(argparse.Action):
|
||||
class WatchSelectedAction(argparse.Action):
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
if args.watch is None:
|
||||
raise argparse.ArgumentError(self,
|
||||
f"Invalid argument: '{self.dest}' needs to be paired with -w/--watch")
|
||||
setattr(args, self.dest, values)
|
||||
return _WatchSelectedAction
|
||||
|
||||
raise argparse.ArgumentError(self, f"invalid argument: '{self.dest}' needs to be paired with -w/--watch")
|
||||
else:
|
||||
setattr(args, self.dest, values)
|
||||
return WatchSelectedAction
|
||||
|
||||
def _gpu_select(self, gpu_choices):
|
||||
""" Argument action validator:
|
||||
Custom argparse action to return the device handle(s) for the gpu(s) selected
|
||||
""" Custom argparse action to return the device handle(s) for the gpu(s) selected
|
||||
This will set the destination (args.gpu) to a list of 1 or more device handles
|
||||
If 1 or more device handles are not found then raise an ArgumentError for the first invalid gpu seen
|
||||
"""
|
||||
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
class _GPUSelectAction(argparse.Action):
|
||||
# Checks the values
|
||||
@@ -172,7 +188,17 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
setattr(args, self.dest, selected_device_handles)
|
||||
else:
|
||||
invalid_selection = selected_device_handles
|
||||
raise argparse.ArgumentError(self, f"invalid choice: '{invalid_selection}' (see available choices with -h)")
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
if invalid_selection == '':
|
||||
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--gpu", outputformat)
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(invalid_selection, outputformat)
|
||||
|
||||
return _GPUSelectAction
|
||||
|
||||
|
||||
@@ -679,6 +705,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
class _ValidateFanSpeed(argparse.Action):
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
|
||||
# Convert percentage to fan level
|
||||
if isinstance(values, str):
|
||||
try:
|
||||
@@ -804,3 +831,22 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
rocm_smi_parser.add_argument('-p', '--showproductname', action='store_true', required=False, help=showproductname_help)
|
||||
rocm_smi_parser.add_argument('-v', '--showclkvolt', action='store_true', required=False, help=showclkvolt_help)
|
||||
rocm_smi_parser.add_argument('-f', '--showclkfrq', action='store_true', required=False, help=showclkfrq_help)
|
||||
|
||||
def error(self, message):
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
if "argument : invalid choice: " in message:
|
||||
l = len("argument : invalid choice: ") + 1
|
||||
message = message[l:]
|
||||
message = message.split("'")[0]
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidCommandException(message, outputformat)
|
||||
elif "unrecognized arguments: " in message:
|
||||
l = len("unrecognized arguments: ")
|
||||
message = message[l:]
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(message, outputformat)
|
||||
else:
|
||||
print(message)
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle