SWDEV-381302 - Added Error handling for Set & Metric
Bug Fixes for Set and Fan Updated lib_amdsmi.so directory access Backwards compatability fixes for gpuv-smi Change-Id: I3b7977859c750c1c3d6f41eaa761c81d8b9e5184 Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
Этот коммит содержится в:
@@ -1 +1 @@
|
||||
__version__ = "0.0.2"
|
||||
__version__ = "0.0.3"
|
||||
|
||||
@@ -1,6 +1,26 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (C) 2023 Advanced Micro Devices. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
# this software and associated documentation files (the "Software"), to deal in
|
||||
# the Software without restriction, including without limitation the rights to
|
||||
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
# the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
# subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
AMDSMI_ERROR_MESSAGES = {
|
||||
0: "Sucess",
|
||||
@@ -23,12 +43,12 @@ AMDSMI_ERROR_MESSAGES = {
|
||||
17: "Out of bounds",
|
||||
18: "Initialization error",
|
||||
19: "Internal reference counter exceeded",
|
||||
|
||||
# Reserved for future error messages
|
||||
30: "Device busy",
|
||||
31: "Device Not found",
|
||||
32: "Device not initialized",
|
||||
33: "No more free slot",
|
||||
|
||||
# Reserved for future error messages
|
||||
40: "No data was found for given input",
|
||||
41: "Insufficient size for operation",
|
||||
42: "Unexpected size of data was read",
|
||||
@@ -41,142 +61,152 @@ def _get_error_message(error_code):
|
||||
return "Generic error"
|
||||
|
||||
class AmdSmiException(Exception):
|
||||
def __init__(self):
|
||||
self.json_message = {}
|
||||
self.csv_message = ''
|
||||
self.stdout_message = ''
|
||||
self.message = ''
|
||||
self.output_format = ''
|
||||
|
||||
def __str__(self):
|
||||
# Return message according to the current output format
|
||||
if self.output_format == 'json':
|
||||
self.message = json.dumps(self.json_message)
|
||||
elif self.output_format == 'csv':
|
||||
self.message = self.csv_message
|
||||
else:
|
||||
self.message = self.stdout_message
|
||||
|
||||
return self.message
|
||||
|
||||
|
||||
class AmdSmiInvalidCommandException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -1
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Command '{}' is invalid. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Command '{}' is invalid. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Command '{}' is invalid. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Command '{self.command}' is invalid. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -2
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' is invalid. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' is invalid. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' is invalid. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.command}' is invalid. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiDeviceNotFoundException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -3
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "GPU Device with GPU_INDEX '{}' cannot be found on the system.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "GPU Device with GPU_INDEX '{}' cannot be found on the system.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "GPU Device with GPU_INDEX '{}' cannot be found on the system. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"GPU Device with GPU_INDEX '{self.command}' cannot be found on the system."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
class AmdSmiInvalidFilePathException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -4
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Path '{}' cannot be found.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Path '{}' cannot be found.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Path '{}' cannot be found. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Path '{self.command}' cannot be found."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiInvalidParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -5
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Value '{}' is not of valid type or format. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Value '{}' is not of valid type or format. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Value '{}' is not of valid type or format. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Value '{self.command}' is not of valid type or format. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiMissingParameterValueException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -6
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' requires a value. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' requires a value. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' requires a value. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.command}' requires a value. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
|
||||
class AmdSmiParameterNotSupportedException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -8
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "Parameter '{}' is not supported on the system. Run '--help' for more info.".format(self.command)
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "Parameter '{}' is not supported on the system. Run '--help' for more info.,".format(self.command) + str(self.value)
|
||||
else:
|
||||
self.message = "Parameter '{}' is not supported on the system. Run '--help' for more info. Error code: {}".format(self.command, self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = f"Parameter '{self.command}' is not supported on the system. Run '--help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
class AmdSmiUnknownErrorException(AmdSmiException):
|
||||
def __init__(self, command, outputformat):
|
||||
super().__init__()
|
||||
self.value = -100
|
||||
self.command = command
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "An unknown error has occurred. Run 'help' for more info."
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "An unknown error has occurred. Run 'help' for more info.," + str(self.value)
|
||||
else:
|
||||
self.message = "An unknown error has occurred. Run 'help' for more info. Error code: {}".format(self.value)
|
||||
self.output_format = outputformat
|
||||
|
||||
common_message = "An unknown error has occurred. Run 'help' for more info."
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
class AmdSmiAMDSMIErrorException(AmdSmiException):
|
||||
def __init__(self, outputformat, error_code):
|
||||
super().__init__()
|
||||
self.value = -1000 - abs(error_code)
|
||||
self.smilibcode = error_code
|
||||
self.output_format = outputformat
|
||||
|
||||
if outputformat == "json":
|
||||
values = {}
|
||||
values["error"] = "AMDSMI has returned error '{}' - '{}'".format(self.value,
|
||||
AMDSMI_ERROR_MESSAGES[abs(self.smilibcode)])
|
||||
values["code"] = self.value
|
||||
self.message = json.dumps(values)
|
||||
elif outputformat == "csv":
|
||||
self.message = "error,code\n" + "AMDSMI has returned error '{}' - '{}',".format(self.value, _get_error_message(self.smilibcode)) + str(self.value)
|
||||
else:
|
||||
self.message = "AMDSMI has returned error '{}' - '{}' Error code: {}".format(self.value, _get_error_message(self.smilibcode), self.value)
|
||||
common_message = f"AMDSMI has returned error '{self.value}' - '{AMDSMI_ERROR_MESSAGES[abs(self.smilibcode)]}'"
|
||||
|
||||
self.json_message["error"] = common_message
|
||||
self.json_message["code"] = self.value
|
||||
self.csv_message = f"error,code\n{common_message}, {self.value}"
|
||||
self.stdout_message = f"{common_message} Error code: {self.value}"
|
||||
|
||||
@@ -236,6 +236,7 @@ class AMDSMICommands():
|
||||
bus_info = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
try:
|
||||
bus_output_info['bdf'] = amdsmi_interface.amdsmi_get_device_bdf(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
@@ -245,7 +246,6 @@ class AMDSMICommands():
|
||||
|
||||
bus_output_info.update(bus_info)
|
||||
values_dict['bus'] = bus_output_info
|
||||
|
||||
if args.vbios:
|
||||
try:
|
||||
vbios_info = amdsmi_interface.amdsmi_get_vbios_info(args.gpu)
|
||||
@@ -282,6 +282,7 @@ class AMDSMICommands():
|
||||
power_limit = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
try:
|
||||
temp_edge_limit = amdsmi_interface.amdsmi_dev_get_temp_metric(args.gpu,
|
||||
amdsmi_interface.AmdSmiTemperatureType.EDGE, amdsmi_interface.AmdSmiTemperatureMetric.CRITICAL)
|
||||
@@ -343,10 +344,6 @@ class AMDSMICommands():
|
||||
try:
|
||||
caps_info = amdsmi_interface.amdsmi_get_caps_info(args.gpu)
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
del caps_info['ras_supported']
|
||||
caps_info['gfx'] = caps_info.pop('gfx')
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
for capability_name, capability_value in caps_info.items():
|
||||
if isinstance(capability_value, list):
|
||||
@@ -565,8 +562,8 @@ class AMDSMICommands():
|
||||
def metric(self, args, multiple_devices=False, watching_output=False, gpu=None,
|
||||
usage=None, watch=None, watch_time=None, iterations=None, fb_usage=None, power=None,
|
||||
clock=None, temperature=None, ecc=None, pcie=None, voltage=None, fan=None,
|
||||
pcie_usage=None, voltage_curve=None, overdrive=None, mem_overdrive=None,
|
||||
perf_level=None, replay_count=None, xgmi_err=None, energy=None, mem_usage=None):
|
||||
voltage_curve=None, overdrive=None, mem_overdrive=None, perf_level=None,
|
||||
replay_count=None, xgmi_err=None, energy=None, mem_usage=None):
|
||||
"""Get Metric information for target gpu
|
||||
|
||||
Args:
|
||||
@@ -586,7 +583,6 @@ class AMDSMICommands():
|
||||
pcie (bool, optional): Value override for args.pcie. Defaults to None.
|
||||
voltage (bool, optional): Value override for args.voltage. Defaults to None.
|
||||
fan (bool, optional): Value override for args.fan. Defaults to None.
|
||||
pcie_usage (bool, optional): Value override for args.pcie_usage. Defaults to None.
|
||||
voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None.
|
||||
overdrive (bool, optional): Value override for args.overdrive. Defaults to None.
|
||||
mem_overdrive (bool, optional): Value override for args.mem_overdrive. Defaults to None.
|
||||
@@ -630,8 +626,6 @@ class AMDSMICommands():
|
||||
args.voltage = voltage
|
||||
if fan:
|
||||
args.fan = fan
|
||||
if pcie_usage:
|
||||
args.pcie_usage = pcie_usage
|
||||
if voltage_curve:
|
||||
args.voltage_curve = voltage_curve
|
||||
if overdrive:
|
||||
@@ -676,15 +670,13 @@ class AMDSMICommands():
|
||||
else:
|
||||
raise IndexError("args.gpu should not be an empty list")
|
||||
|
||||
|
||||
# Check if any of the options have been set, if not then set them all to true
|
||||
if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, args.ecc, args.pcie, args.voltage, args.fan,
|
||||
args.pcie_usage, args.voltage_curve, args.overdrive, args.mem_overdrive, args.perf_level,
|
||||
args.replay_count, args.xgmi_err, args.energy, args.mem_usage]):
|
||||
if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, args.ecc, args.pcie, args.voltage,
|
||||
args.fan, args.voltage_curve, args.overdrive, args.mem_overdrive, args.perf_level, args.replay_count,
|
||||
args.xgmi_err, args.energy, args.mem_usage]):
|
||||
args.usage = args.fb_usage = args.power = args.clock = args.temperature = args.ecc = args.pcie = args.voltage = args.fan = \
|
||||
args.pcie_usage = args.voltage_curve = args.overdrive = args.mem_overdrive = args.perf_level = \
|
||||
args.replay_count = args.xgmi_err = args.energy = args.mem_usage = self.all_arguments = True
|
||||
|
||||
args.voltage_curve = args.overdrive = args.mem_overdrive = args.perf_level = args.replay_count = args.xgmi_err = \
|
||||
args.energy = args.mem_usage = self.all_arguments = True
|
||||
|
||||
# Add timestamp and store values for specified arguments
|
||||
values_dict = {}
|
||||
@@ -704,7 +696,9 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['usage'] = engine_usage
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['usage'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.fb_usage:
|
||||
try:
|
||||
vram_usage = amdsmi_interface.amdsmi_get_vram_usage(args.gpu)
|
||||
@@ -720,21 +714,49 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['fb_usage'] = vram_usage
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['fb_usage'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.power:
|
||||
power_dict = {}
|
||||
try:
|
||||
average_socket_power = amdsmi_interface.amdsmi_get_power_measure(args.gpu)['average_socket_power']
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
pass
|
||||
power_measure = amdsmi_interface.amdsmi_get_power_measure(args.gpu)
|
||||
power_dict = {'average_socket_power': power_measure['average_socket_power'],
|
||||
'voltage_gfx': power_measure['voltage_gfx'],
|
||||
'voltage_soc': amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info,
|
||||
'voltage_mem': amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'W'
|
||||
average_socket_power = f"{average_socket_power} {unit}"
|
||||
power_dict['average_socket_power'] = f"{power_dict['average_socket_power']} W"
|
||||
power_dict['voltage_gfx'] = f"{power_dict['voltage_gfx']} mV"
|
||||
power_dict['voltage_soc'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
|
||||
power_dict['voltage_mem'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
|
||||
|
||||
values_dict['power'] = average_socket_power
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
power_dict = {'average_socket_power': e.get_error_info(),
|
||||
'voltage_gfx': e.get_error_info(),
|
||||
'voltage_soc': e.get_error_info(),
|
||||
'voltage_mem': e.get_error_info()}
|
||||
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
power_dict['current_power'] = power_dict.pop('average_socket_power')
|
||||
power_dict['current_voltage'] = power_dict.pop('voltage_gfx')
|
||||
power_dict['current_voltage_soc'] = power_dict.pop('voltage_soc')
|
||||
power_dict['current_voltage_mem'] = power_dict.pop('voltage_mem')
|
||||
|
||||
try:
|
||||
power_dict['current_fan_rpm'] = amdsmi_interface.amdsmi_dev_get_fan_rpms(args.gpu, 0)
|
||||
if self.logger.is_human_readable_format():
|
||||
power_dict['current_fan_rpm'] = f"{power_dict['current_fan_rpm']} RPM"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
power_dict['current_fan_rpm'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
values_dict['power'] = power_dict
|
||||
if args.clock:
|
||||
try:
|
||||
clock_gfx = amdsmi_interface.amdsmi_get_clock_measure(args.gpu, amdsmi_interface.AmdSmiClkType.GFX)
|
||||
@@ -751,7 +773,9 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['clock'] = clocks
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['clock'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.temperature:
|
||||
try:
|
||||
temperature_edge_current = amdsmi_interface.amdsmi_dev_get_temp_metric(
|
||||
@@ -761,29 +785,44 @@ class AMDSMICommands():
|
||||
temperature_vram_current = amdsmi_interface.amdsmi_dev_get_temp_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiTemperatureType.VRAM, amdsmi_interface.AmdSmiTemperatureMetric.CURRENT)
|
||||
|
||||
temperatures = { 'edge': temperature_edge_current,
|
||||
temperatures = {'edge': temperature_edge_current,
|
||||
'hotspot': temperature_junction_current,
|
||||
'mem': temperature_vram_current}
|
||||
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
temperatures = { 'edge_temperature': temperature_edge_current,
|
||||
temperatures = {'edge_temperature': temperature_edge_current,
|
||||
'hotspot_temperature': temperature_junction_current,
|
||||
'mem_temperature': temperature_vram_current}
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '\N{DEGREE SIGN}C'
|
||||
if self.logger.is_gpuvsmi_compatibility():
|
||||
unit = 'C'
|
||||
for temperature_value in temperatures:
|
||||
temperatures[temperature_value] = f"{temperatures[temperature_value]} {unit}"
|
||||
|
||||
values_dict['temperature'] = temperatures
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['temperature'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.ecc:
|
||||
ecc_dict = {}
|
||||
try:
|
||||
values_dict['ecc'] = amdsmi_interface.amdsmi_get_ecc_error_count(args.gpu)
|
||||
ras_states = amdsmi_interface.amdsmi_get_ras_block_features_enabled(args.gpu)
|
||||
for state in ras_states:
|
||||
if state['status'] == amdsmi_interface.AmdSmiRasErrState.ENABLED:
|
||||
gpu_block = amdsmi_interface.AmdSmiGpuBlock[state['block']]
|
||||
ecc_count = amdsmi_interface.amdsmi_get_ecc_error_count(args.gpu, gpu_block)
|
||||
ecc_dict[state['block']] = {'correctable' : ecc_count['correctable_count'],
|
||||
'uncorrectable': ecc_count['uncorrectable_count']}
|
||||
if ecc_dict == {}:
|
||||
ecc_dict = 'No RAS Blocks Enabled'
|
||||
values_dict['ecc'] = ecc_dict
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['ecc'] = e.get_error_info()
|
||||
raise e
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.pcie:
|
||||
try:
|
||||
pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_caps(args.gpu)
|
||||
@@ -798,7 +837,9 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['pcie'] = pcie_link_status
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['pcie'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.voltage:
|
||||
try:
|
||||
volt_metric = amdsmi_interface.amdsmi_dev_get_volt_metric(
|
||||
@@ -810,38 +851,39 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['voltage'] = volt_metric
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['voltage'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.fan:
|
||||
try:
|
||||
fan_speed = amdsmi_interface.amdsmi_dev_get_fan_speed(args.gpu, 0)
|
||||
fan_speed_error = False
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
fan_speed = e.get_error_info()
|
||||
fan_speed_error = True
|
||||
|
||||
try:
|
||||
fan_max = amdsmi_interface.amdsmi_dev_get_fan_speed_max(args.gpu, 0)
|
||||
if isinstance(fan_speed, int) and fan_max > 0:
|
||||
if not fan_speed_error and fan_max > 0:
|
||||
fan_percent = round((float(fan_speed) / float(fan_max)) * 100, 2)
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = '%'
|
||||
fan_percent = f"{fan_percent} {unit}"
|
||||
else:
|
||||
fan_percent = 'Unable to detect fan speed'
|
||||
|
||||
fan_rpm = amdsmi_interface.amdsmi_dev_get_fan_rpms(args.gpu, 0)
|
||||
|
||||
values_dict['fan'] = {'speed': fan_speed,
|
||||
'max' : fan_max,
|
||||
'rpm' : fan_rpm,
|
||||
'usage' : fan_percent}
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
if args.pcie_usage:
|
||||
fan_max = e.get_error_info()
|
||||
fan_percent = 'Unable to detect fan speed'
|
||||
|
||||
try:
|
||||
pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_status(args.gpu)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit ='MT/s'
|
||||
pcie_link_status['pcie_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
|
||||
|
||||
values_dict['pcie_usage'] = pcie_link_status
|
||||
fan_rpm = amdsmi_interface.amdsmi_dev_get_fan_rpms(args.gpu, 0)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
fan_rpm = e.get_error_info()
|
||||
|
||||
values_dict['fan'] = {'speed': fan_speed,
|
||||
'max' : fan_max,
|
||||
'rpm' : fan_rpm,
|
||||
'usage' : fan_percent}
|
||||
if args.voltage_curve:
|
||||
try:
|
||||
od_volt = amdsmi_interface.amdsmi_dev_get_od_volt_info(args.gpu)
|
||||
@@ -862,7 +904,6 @@ class AMDSMICommands():
|
||||
values_dict['voltage_curve'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if args.overdrive:
|
||||
try:
|
||||
overdrive_level = amdsmi_interface.amdsmi_dev_get_overdrive_level(args.gpu)
|
||||
@@ -873,29 +914,34 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['overdrive'] = overdrive_level
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['overdrive'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.mem_overdrive:
|
||||
values_dict['mem_overdrive'] = amdsmi_interface.AmdSmiRetCode.NOT_IMPLEMENTED
|
||||
|
||||
values_dict['mem_overdrive'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.NOT_IMPLEMENTED).err_info
|
||||
if args.perf_level:
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
values_dict['perf_level'] = perf_level
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['perf_level'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.replay_count:
|
||||
try:
|
||||
pci_replay_counter = amdsmi_interface.amdsmi_dev_get_pci_replay_counter(args.gpu)
|
||||
values_dict['replay_count'] = pci_replay_counter
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['replay_count'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.xgmi_err:
|
||||
try:
|
||||
values_dict['xgmi_err'] = amdsmi_interface.amdsmi_dev_xgmi_error_status(args.gpu)
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.AmdSmiRetCode.ERR_NOT_SUPPORTED:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.ERR_NOT_SUPPORTED:
|
||||
values_dict['xgmi_err'] = 'N/A'
|
||||
else:
|
||||
elif not self.all_arguments:
|
||||
raise e
|
||||
if args.energy:
|
||||
try:
|
||||
@@ -907,14 +953,16 @@ class AMDSMICommands():
|
||||
|
||||
values_dict['energy'] = energy
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
values_dict['energy'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.mem_usage:
|
||||
memory_total = {}
|
||||
try:
|
||||
memory_total_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VRAM)
|
||||
memory_total_vis_vram = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM)
|
||||
memory_total_gtt = amdsmi_interface.amdsmi_dev_get_memory_total(args.gpu, amdsmi_interface.AmdSmiMemoryType.GTT)
|
||||
|
||||
memory_total = {}
|
||||
# Convert mem_usage to megabytes
|
||||
memory_total['vram'] = memory_total_vram // (1024*1024)
|
||||
memory_total['vis_vram'] = memory_total_vis_vram // (1024*1024)
|
||||
@@ -927,10 +975,36 @@ class AMDSMICommands():
|
||||
memory_total['vis_vram'] = f"{memory_total['vis_vram']} {unit}"
|
||||
memory_total['gtt'] = f"{memory_total['gtt']} {unit}"
|
||||
|
||||
|
||||
values_dict['mem_usage'] = memory_total
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise e
|
||||
memory_total['vram'] = e.get_error_info()
|
||||
memory_total['vis_vram'] = e.get_error_info()
|
||||
memory_total['gtt'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
try:
|
||||
total_used_vram = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.VRAM)
|
||||
total_used_vis_vram = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM)
|
||||
total_used_gtt = amdsmi_interface.amdsmi_dev_get_memory_usage(args.gpu, amdsmi_interface.AmdSmiMemoryType.GTT)
|
||||
|
||||
# Convert mem_usage to megabytes
|
||||
memory_total['used_vram'] = total_used_vram // (1024*1024)
|
||||
memory_total['used_vis_vram'] = total_used_vis_vram // (1024*1024)
|
||||
memory_total['used_gtt'] = total_used_gtt // (1024*1024)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
memory_total['used_vram'] = f"{memory_total['used_vram']} {unit}"
|
||||
memory_total['used_vis_vram'] = f"{memory_total['used_vis_vram']} {unit}"
|
||||
memory_total['used_gtt'] = f"{memory_total['used_gtt']} {unit}"
|
||||
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_total['used_vram'] = e.get_error_info()
|
||||
memory_total['used_vis_vram'] = e.get_error_info()
|
||||
memory_total['used_gtt'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
values_dict['mem_usage'] = memory_total
|
||||
|
||||
# Store values in logger.output
|
||||
self.logger.store_output(args.gpu, 'values', values_dict)
|
||||
@@ -1126,6 +1200,7 @@ class AMDSMICommands():
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
|
||||
def topology(self, args, multiple_devices=False, gpu=None, access=None,
|
||||
weight=None, hops=None, type=None, numa=None, numa_bw=None):
|
||||
""" Get topology information for target gpus
|
||||
@@ -1196,6 +1271,7 @@ class AMDSMICommands():
|
||||
if args.numa_bw:
|
||||
pass
|
||||
|
||||
|
||||
def set_value(self, args, multiple_devices=False, gpu=None, clock=None, sclk=None, mclk=None,
|
||||
pcie=None, slevel=None, mlevel=None, vc=None, srange=None, mrange=None,
|
||||
fan=None, perflevel=None, overdrive=None, memoverdrive=None,
|
||||
@@ -1277,6 +1353,18 @@ class AMDSMICommands():
|
||||
|
||||
args.gpu = device_handle
|
||||
|
||||
# Build GPU string for errors
|
||||
try:
|
||||
gpu_bdf = amdsmi_interface.amdsmi_get_device_bdf(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException:
|
||||
gpu_bdf = f'BDF Unavailable for {args.gpu}'
|
||||
try:
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu)
|
||||
except IndexError:
|
||||
gpu_id = f'ID Unavailable for {args.gpu}'
|
||||
gpu_string = f"GPU ID: {gpu_id} BDF:{gpu_bdf}"
|
||||
|
||||
# Handle args
|
||||
if args.clock:
|
||||
clock_type, freq_bitmask = args.clock
|
||||
|
||||
@@ -1284,231 +1372,232 @@ class AMDSMICommands():
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
|
||||
if clock_type != amdsmi_interface.AmdSmiClkType.PCIE.value:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
clock_type = amdsmi_interface.AmdSmiClkType(clock_type)
|
||||
raise ValueError(self, f"Unable to set the {clock_type} clock frequency on {args.gpu}")
|
||||
print(f'Successfully set frequency bitmask on {args.gpu}')
|
||||
else:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
clock_type = amdsmi_interface.AmdSmiClkType(clock_type)
|
||||
raise ValueError(self, f"Unable to set the {clock_type} clock frequency on {args.gpu}")
|
||||
print(f'Successfully set frequency bitmask on {args.gpu}')
|
||||
|
||||
if args.sclk:
|
||||
freq_bitmask = args.sclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type.value, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the {clock_type} clock frequency on {args.gpu}")
|
||||
print(f'Successfully set frequency bitmask on {args.gpu}')
|
||||
|
||||
if args.mclk:
|
||||
freq_bitmask = args.sclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type.value, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the {clock_type} clock frequency on {args.gpu}")
|
||||
print(f'Successfully set frequency bitmask on {args.gpu}')
|
||||
|
||||
if args.pcie:
|
||||
freq_bitmask = args.sclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.PCIE
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the {clock_type} clock frequency on {args.gpu}")
|
||||
print(f'Successfully set frequency bitmask on {args.gpu}')
|
||||
|
||||
if args.slevel:
|
||||
level, value = args.slevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level).value
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {args.gpu}")
|
||||
print(f'Successfully changed clock frequency on {args.gpu}')
|
||||
|
||||
if args.mlevel:
|
||||
level, value = args.mlevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level).value
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {args.gpu}")
|
||||
print(f'Successfully changed clock frequency on {args.gpu}')
|
||||
|
||||
if args.vc:
|
||||
point, clk, volt = args.vc
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_volt_info(args.gpu, point, clk, volt)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the Voltage Curve point {point} to {clk}(MHz) {volt}(mV) on {args.gpu}")
|
||||
print(f'Successfully set voltage point {point} to {clk}(MHz) {volt}(mV) on {args.gpu}')
|
||||
|
||||
if args.srange:
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {args.gpu}")
|
||||
print(f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {args.gpu}")
|
||||
|
||||
if args.mrange:
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type.value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {args.gpu}")
|
||||
print(f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {args.gpu}")
|
||||
|
||||
if args.fan:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_fan_speed(args.gpu, 0, args.fan)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set fan speed {args.fan} on {args.gpu}")
|
||||
print(f"Successfully set fan speed {args.fan} on {args.gpu}")
|
||||
|
||||
if args.perflevel:
|
||||
perf_levels = amdsmi_interface.amdsmi_wrapper.amdsmi_dev_perf_level_t__enumvalues
|
||||
for value in perf_levels:
|
||||
if args.perflevel.lower() in perf_levels[value]:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, value)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set performance level {args.perflevel} on {args.gpu}")
|
||||
print(f"Successfully set performance level {args.perflevel} on {args.gpu}")
|
||||
break
|
||||
|
||||
if args.overdrive or args.overdrive == 0:
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_overdrive_level_v1(args.gpu, args.overdrive)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set overdrive {args.overdrive} to {args.gpu}")
|
||||
print(f"Successfully to set overdrive {args.overdrive} to {args.gpu}")
|
||||
if clock_type != amdsmi_interface.AmdSmiClkType.PCIE:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
else:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
if args.memoverdrive or args.memoverdrive == 0:
|
||||
self.logger.store_output(args.gpu, 'clock', f'Successfully set clock frequency bitmask for {clock_type}')
|
||||
|
||||
if isinstance(args.sclk, int):
|
||||
freq_bitmask = args.sclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {args.gpu}")
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL.value)
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set the performance level of {args.gpu} to manual")
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
if args.poweroverdrive:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'sclk', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.mclk, int):
|
||||
freq_bitmask = args.mclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mclk', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.pcie, int):
|
||||
freq_bitmask = args.pcie
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.PCIE
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'pcie', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.slevel, int):
|
||||
level, value = args.slevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level)
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'slevel', 'Successfully changed clock frequency')
|
||||
if isinstance(args.mlevel, int):
|
||||
level, value = args.mlevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level)
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_clk_info(args.gpu, level, value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mlevel', 'Successfully changed clock frequency')
|
||||
if isinstance(args.vc, int):
|
||||
point, clk, volt = args.vc
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_od_volt_info(args.gpu, point, clk, volt)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the Voltage Curve point {point} to {clk}(MHz) {volt}(mV) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'vc', f'Successfully set voltage point {point} to {clk}(MHz) {volt}(mV)')
|
||||
if isinstance(args.srange, int):
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'srange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
|
||||
if isinstance(args.mrange, int):
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_clk_range(args.gpu, min_value, max_value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mrange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
|
||||
if isinstance(args.fan, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_fan_speed(args.gpu, 0, args.fan)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set fan speed {args.fan} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'fan', f"Successfully set fan speed {args.fan}")
|
||||
if args.perflevel:
|
||||
perf_level = amdsmi_interface.AmdSmiDevPerfLevel[args.perflevel]
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, perf_level)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set performance level {args.perflevel} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'perflevel', f"Successfully set performance level {args.perflevel}")
|
||||
if isinstance(args.overdrive, int):
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_overdrive_level_v1(args.gpu, args.overdrive)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set overdrive {args.overdrive} to {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'overdrive', f"Successfully to set overdrive level to {args.overdrive}")
|
||||
if isinstance(args.memoverdrive, int):
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_dev_get_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_perf_level_v1(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'memoverdrive', f"Successfully to set memoverdrive level to {args.memoverdrive}")
|
||||
if isinstance(args.poweroverdrive, int):
|
||||
overdrive_power_cap = args.poweroverdrive
|
||||
try:
|
||||
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get the power cap info for {args.gpu}")
|
||||
raise ValueError(f"Unable to get the power cap info for {gpu_string}") from e
|
||||
if overdrive_power_cap == 0:
|
||||
overdrive_power_cap = power_caps['power_cap_default']
|
||||
else:
|
||||
overdrive_power_cap *= 1000000
|
||||
|
||||
if overdrive_power_cap < power_caps['min_power_cap']:
|
||||
raise ValueError(self, f"Requested power cap: {overdrive_power_cap} is lower than the min power cap: {power_caps['min_power_cap']}")
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is lower than the min power cap: {power_caps['min_power_cap']}")
|
||||
|
||||
if overdrive_power_cap > power_caps['max_power_cap']:
|
||||
raise ValueError(self, f"Requested power cap: {overdrive_power_cap} is greater than the max power cap: {power_caps['max_power_cap']}")
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is greater than the max power cap: {power_caps['max_power_cap']}")
|
||||
|
||||
if overdrive_power_cap == power_caps['power_cap']:
|
||||
raise ValueError(self, f"Requested power cap: {overdrive_power_cap} is the same as the current power cap: {power_caps['power_cap']}")
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is the same as the current power cap: {power_caps['power_cap']}")
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_dev_set_power_cap(args.gpu, 0, overdrive_power_cap)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set power cap to {overdrive_power_cap} on {args.gpu}")
|
||||
raise ValueError(f"Unable to set power cap to {overdrive_power_cap} on {gpu_string}") from e
|
||||
|
||||
try:
|
||||
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get the power cap info for {args.gpu} post set")
|
||||
raise ValueError(f"Unable to get the power cap info for {gpu_string} post set") from e
|
||||
|
||||
if power_caps['power_cap'] == overdrive_power_cap:
|
||||
print(f"Successfully set the power cap {overdrive_power_cap} on {args.gpu}")
|
||||
self.logger.store_output(args.gpu, 'power_cap', f"Successfully set the power cap {overdrive_power_cap}")
|
||||
else:
|
||||
raise ValueError(self, f"Power cap: {overdrive_power_cap} set failed on {args.gpu}")
|
||||
|
||||
raise ValueError(f"Power cap: {overdrive_power_cap} set failed on {gpu_string}")
|
||||
if args.profile:
|
||||
print(amdsmi_interface.AmdSmiRetCode.NOT_IMPLEMENTED)
|
||||
|
||||
if args.perfdeterminism:
|
||||
self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented")
|
||||
if isinstance(args.perfdeterminism, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_perf_determinism_mode(args.gpu, args.perfdeterminism)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to set performance determinism and clock frequency to {args.perfdeterminism} on {args.gpu}")
|
||||
print(f"Successfully enabled performance determinism and set GFX clock frequency to {args.perfdeterminism} on {args.gpu}")
|
||||
raise ValueError(f"Unable to set performance determinism and clock frequency to {args.perfdeterminism} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'perfdeterminism', f"Successfully enabled performance determinism and set GFX clock frequency to {args.perfdeterminism}")
|
||||
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
return # Skip printing when there are multiple devices
|
||||
|
||||
self.logger.print_output()
|
||||
|
||||
|
||||
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
|
||||
@@ -1652,7 +1741,7 @@ class AMDSMICommands():
|
||||
|
||||
|
||||
def rocm_smi(self, args):
|
||||
print("Placeholder for rocm-smi legacy commandss")
|
||||
print("Placeholder for rocm-smi legacy commands")
|
||||
|
||||
|
||||
def _event_thread(self, commands, i):
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
import logging
|
||||
import platform
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
@@ -118,6 +119,20 @@ class AMDSMIHelpers():
|
||||
return self._is_windows
|
||||
|
||||
|
||||
def get_output_format(self):
|
||||
"""Returns the output format read from sys.argv
|
||||
Returns:
|
||||
str: outputformat
|
||||
"""
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "--c" in args:
|
||||
outputformat = "csv"
|
||||
return outputformat
|
||||
|
||||
|
||||
def get_gpu_choices(self):
|
||||
"""Return dictionary of possible GPU choices and string of the output:
|
||||
Dictionary will be in format: gpus[ID] : (BDF, UUID, Device Handle)
|
||||
@@ -307,11 +322,34 @@ class AMDSMIHelpers():
|
||||
return asic_info['vendor_id'] == AMD_VENDOR_ID
|
||||
|
||||
|
||||
def is_valid_clock_type(self, clock_type):
|
||||
if clock_type in amdsmi_interface.amdsmi_wrapper.amdsmi_clk_type_t__enumvalues:
|
||||
return True, amdsmi_interface.amdsmi_wrapper.amdsmi_clk_type_t__enumvalues.keys()
|
||||
else:
|
||||
return False, amdsmi_interface.amdsmi_wrapper.amdsmi_clk_type_t__enumvalues.keys()
|
||||
def get_perf_levels(self):
|
||||
perf_levels_str = [clock.name for clock in amdsmi_interface.AmdSmiDevPerfLevel]
|
||||
perf_levels_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiDevPerfLevel))
|
||||
return perf_levels_str, perf_levels_int
|
||||
|
||||
|
||||
def get_clock_types(self):
|
||||
clock_types_str = [clock.name for clock in amdsmi_interface.AmdSmiClkType]
|
||||
clock_types_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiClkType))
|
||||
return clock_types_str, clock_types_int
|
||||
|
||||
|
||||
def validate_clock_type(self, input_clock_type):
|
||||
valid_clock_types_str, valid_clock_types_int = self.get_clock_types()
|
||||
|
||||
valid_clock_input = False
|
||||
if isinstance(input_clock_type, str):
|
||||
for clock_type in valid_clock_types_str:
|
||||
if input_clock_type.lower() == clock_type.lower():
|
||||
input_clock_type = clock_type # Set input_clock_type to enum value in AmdSmiClkType
|
||||
valid_clock_input = True
|
||||
break
|
||||
elif isinstance(input_clock_type, int):
|
||||
if input_clock_type in valid_clock_types_int:
|
||||
input_clock_type = amdsmi_interface.AmdSmiClkType(input_clock_type)
|
||||
valid_clock_input = True
|
||||
|
||||
return valid_clock_input, input_clock_type
|
||||
|
||||
|
||||
def confirm_out_of_spec_warning(self, auto_respond=False):
|
||||
@@ -348,15 +386,3 @@ class AMDSMIHelpers():
|
||||
return True, profile_presets[profile]
|
||||
else:
|
||||
return False, profile_presets.values()
|
||||
|
||||
|
||||
def get_perf_level(self, device_handle):
|
||||
""" Return the current performance level of a given device
|
||||
|
||||
@param device_handle: DRM device identifier
|
||||
"""
|
||||
|
||||
try:
|
||||
ret = amdsmi_interface.amdsmi_dev_get_perf_level(device_handle)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
raise ValueError(self, f"Unable to get performance level of {device_handle}")
|
||||
|
||||
+101
-115
@@ -31,20 +31,26 @@ import sys
|
||||
from _version import __version__
|
||||
from amdsmi_helpers import AMDSMIHelpers
|
||||
import amdsmi_cli_exceptions
|
||||
from BDF import BDF
|
||||
|
||||
|
||||
class AMDSMIParser(argparse.ArgumentParser):
|
||||
"""Unified Parser for AMDSMI CLI.
|
||||
This parser doesn't access amdsmi's lib directly,but via AMDSMIHelpers,
|
||||
this allows for us to use this parser with future OS & Platform integration.
|
||||
|
||||
Args:
|
||||
argparse (ArgumentParser): argparse.ArgumentParser
|
||||
"""
|
||||
def __init__(self, version, discovery, static, firmware, bad_pages, metric,
|
||||
process, profile, event, topology, set_value, reset, rocmsmi):
|
||||
|
||||
# Helper variables
|
||||
self.amdsmi_helpers = AMDSMIHelpers()
|
||||
self.gpu_choices, self.gpu_choices_str = self.amdsmi_helpers.get_gpu_choices()
|
||||
self.helpers = AMDSMIHelpers()
|
||||
self.gpu_choices, self.gpu_choices_str = self.helpers.get_gpu_choices()
|
||||
self.vf_choices = ['3', '2', '1']
|
||||
|
||||
version_string = f"Version: {__version__}"
|
||||
platform_string = f"Platform: {self.amdsmi_helpers.os_info()}"
|
||||
platform_string = f"Platform: {self.helpers.os_info()}"
|
||||
|
||||
# Adjust argument parser options
|
||||
super().__init__(
|
||||
@@ -83,12 +89,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if int_value.isdigit(): # Is digit works only on positive numbers
|
||||
return int(int_value)
|
||||
else:
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
outputformat = self.helpers.get_output_format()
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(int_value, outputformat)
|
||||
|
||||
|
||||
@@ -100,6 +101,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
If the path is a file and it doesn't exist create and return the file path
|
||||
"""
|
||||
class CheckOutputFilePath(argparse.Action):
|
||||
outputformat = self.helpers.get_output_format()
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
path = Path(values)
|
||||
@@ -107,13 +109,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if path.parent.is_dir():
|
||||
path.touch()
|
||||
else:
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, outputformat)
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, CheckOutputFilePath.outputformat)
|
||||
|
||||
if path.is_dir():
|
||||
path = path / f"{int(time.time())}-amdsmi-output.txt"
|
||||
@@ -122,13 +118,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
elif path.is_file():
|
||||
setattr(args, self.dest, path)
|
||||
else:
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, outputformat)
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, CheckOutputFilePath.outputformat)
|
||||
return CheckOutputFilePath
|
||||
|
||||
|
||||
@@ -178,8 +168,9 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
If 1 or more device handles are not found then raise an ArgumentError for the first invalid gpu seen
|
||||
"""
|
||||
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
amdsmi_helpers = self.helpers
|
||||
class _GPUSelectAction(argparse.Action):
|
||||
ouputformat=self.helpers.get_output_format()
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
status, selected_device_handles = amdsmi_helpers.get_device_handles_from_gpu_selections(gpu_selections=values,
|
||||
@@ -187,17 +178,10 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if status:
|
||||
setattr(args, self.dest, selected_device_handles)
|
||||
else:
|
||||
invalid_selection = selected_device_handles
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
if invalid_selection == '':
|
||||
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--gpu", outputformat)
|
||||
if selected_device_handles == '':
|
||||
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--gpu", _GPUSelectAction.ouputformat)
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(invalid_selection, outputformat)
|
||||
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, _GPUSelectAction.ouputformat)
|
||||
|
||||
return _GPUSelectAction
|
||||
|
||||
@@ -221,6 +205,21 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"])
|
||||
|
||||
|
||||
def _add_watch_arguments(self, subcommand_parser):
|
||||
# Device arguments help text
|
||||
watch_help = "Reprint the command in a loop of Interval seconds"
|
||||
watch_time_help = "The total time to watch the given command"
|
||||
iterations_help = "Total number of iterations to loop on the given command"
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
subcommand_parser.add_argument('-w', '--watch', action='store', metavar='loop_time',
|
||||
type=self._positive_int, required=False, help=watch_help)
|
||||
subcommand_parser.add_argument('-W', '--watch_time', action=self._check_watch_selected(), metavar='total_loop_time',
|
||||
type=self._positive_int, required=False, help=watch_time_help)
|
||||
subcommand_parser.add_argument('-i', '--iterations', action=self._check_watch_selected(), metavar='number_of_iterations',
|
||||
type=self._positive_int, required=False, help=iterations_help)
|
||||
|
||||
|
||||
def _add_device_arguments(self, subcommand_parser, required=False):
|
||||
# Device arguments help text
|
||||
gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}"
|
||||
@@ -232,7 +231,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
device_args.add_argument('-g', '--gpu', action=self._gpu_select(self.gpu_choices),
|
||||
nargs='+', help=gpu_help)
|
||||
|
||||
if self.amdsmi_helpers.is_hypervisor():
|
||||
if self.helpers.is_hypervisor():
|
||||
device_args.add_argument('-v', '--vf', action='store', nargs='+',
|
||||
help=vf_help, choices=self.vf_choices)
|
||||
|
||||
@@ -313,13 +312,13 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
static_parser.add_argument('-c', '--caps', action='store_true', required=False, help=caps_help)
|
||||
|
||||
# Options to display on Hypervisors and Baremetal
|
||||
if self.amdsmi_helpers.is_hypervisor() or self.amdsmi_helpers.is_baremetal():
|
||||
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
|
||||
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
|
||||
if self.amdsmi_helpers.is_linux():
|
||||
if self.helpers.is_linux():
|
||||
static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help)
|
||||
|
||||
# Options to only display on a Hypervisor
|
||||
if self.amdsmi_helpers.is_hypervisor():
|
||||
if self.helpers.is_hypervisor():
|
||||
static_parser.add_argument('-d', '--dfc-ucode', action='store_true', required=False, help=dfc_help)
|
||||
static_parser.add_argument('-f', '--fb-info', action='store_true', required=False, help=fb_help)
|
||||
static_parser.add_argument('-n', '--num-vf', action='store_true', required=False, help=num_vf_help)
|
||||
@@ -349,12 +348,12 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
firmware_parser.add_argument('-f', '--ucode-list', '--fw-list', dest='fw_list', action='store_true', required=False, help=fw_list_help, default=True)
|
||||
|
||||
# Options to only display on a Hypervisor
|
||||
if self.amdsmi_helpers.is_hypervisor():
|
||||
if self.helpers.is_hypervisor():
|
||||
firmware_parser.add_argument('-e', '--error-records', action='store_true', required=False, help=err_records_help)
|
||||
|
||||
|
||||
def _add_bad_pages_parser(self, subparsers, func):
|
||||
if not (self.amdsmi_helpers.is_baremetal() and self.amdsmi_helpers.is_linux()):
|
||||
if not (self.helpers.is_baremetal() and self.helpers.is_linux()):
|
||||
# The bad_pages subcommand is only applicable to Linux Baremetal systems
|
||||
return
|
||||
|
||||
@@ -369,7 +368,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
un_res_help = "Displays unreservable pages"
|
||||
|
||||
# Create bad_pages subparser
|
||||
bad_pages_parser = subparsers.add_parser('bad-pages', help=bad_pages_help, description=bad_pages_subcommand_help, aliases=['bad_pages'])
|
||||
bad_pages_parser = subparsers.add_parser('bad-pages', help=bad_pages_help, description=bad_pages_subcommand_help)
|
||||
bad_pages_parser._optionals.title = bad_pages_optionals_title
|
||||
bad_pages_parser.formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, max_help_position=80, width=90)
|
||||
bad_pages_parser.set_defaults(func=func)
|
||||
@@ -393,9 +392,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
# Optional arguments help text
|
||||
usage_help = "Displays engine usage information"
|
||||
watch_help = "Reprint the command in a loop of Interval seconds"
|
||||
watch_time_help = "The total time to watch the given command"
|
||||
iterations_help = "Total number of iterations to loop on the given command"
|
||||
|
||||
# Help text for Arguments only Available on Virtual OS and Baremetal platforms
|
||||
fb_usage_help = "Total and used framebuffer"
|
||||
@@ -410,7 +406,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
# Help text for Arguments only on Linux Baremetal platforms
|
||||
fan_help = "Current fan speed"
|
||||
pcie_usage_help = "Estimated PCIe link usage"
|
||||
vc_help = "Display voltage curve"
|
||||
overdrive_help = "Current GPU clock overdrive level"
|
||||
mo_help = "Current memory clock overdrive level"
|
||||
@@ -435,21 +430,18 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
# Add Device args
|
||||
self._add_device_arguments(metric_parser, required=False)
|
||||
|
||||
# Add Watch args
|
||||
self._add_watch_arguments(metric_parser)
|
||||
|
||||
# Optional Args
|
||||
metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help)
|
||||
metric_parser.add_argument('-w', '--watch', action='store', metavar='Interval',
|
||||
type=self._positive_int, required=False, help=watch_help)
|
||||
metric_parser.add_argument('-W', '--watch_time', action=self._check_watch_selected(), metavar='Duration',
|
||||
type=self._positive_int, required=False, help=watch_time_help)
|
||||
metric_parser.add_argument('-i', '--iterations', action=self._check_watch_selected(), metavar='Iterations',
|
||||
type=self._positive_int, required=False, help=iterations_help)
|
||||
|
||||
# Optional Args for Virtual OS and Baremetal systems
|
||||
if self.amdsmi_helpers.is_virtual_os() or self.amdsmi_helpers.is_baremetal():
|
||||
if self.helpers.is_virtual_os() or self.helpers.is_baremetal():
|
||||
metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help)
|
||||
|
||||
# Optional Args for Hypervisors and Baremetal systems
|
||||
if self.amdsmi_helpers.is_hypervisor() or self.amdsmi_helpers.is_baremetal():
|
||||
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
|
||||
metric_parser.add_argument('-p', '--power', action='store_true', required=False, help=power_help)
|
||||
metric_parser.add_argument('-c', '--clock', action='store_true', required=False, help=clock_help)
|
||||
metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help)
|
||||
@@ -458,9 +450,8 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
metric_parser.add_argument('-V', '--voltage', action='store_true', required=False, help=voltage_help)
|
||||
|
||||
# Optional Args for Linux Baremetal Systems
|
||||
if self.amdsmi_helpers.is_baremetal() and self.amdsmi_helpers.is_linux():
|
||||
if self.helpers.is_baremetal() and self.helpers.is_linux():
|
||||
metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help)
|
||||
metric_parser.add_argument('-s', '--pcie-usage', action='store_true', required=False, help=pcie_usage_help)
|
||||
metric_parser.add_argument('-C', '--voltage-curve', action='store_true', required=False, help=vc_help)
|
||||
metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help)
|
||||
metric_parser.add_argument('-M', '--mem-overdrive', action='store_true', required=False, help=mo_help)
|
||||
@@ -471,14 +462,14 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
metric_parser.add_argument('-m', '--mem-usage', action='store_true', required=False, help=mem_usage_help)
|
||||
|
||||
# Options to only display to Hypervisors
|
||||
if self.amdsmi_helpers.is_hypervisor():
|
||||
if self.helpers.is_hypervisor():
|
||||
metric_parser.add_argument('-s', '--schedule', action='store_true', required=False, help=schedule_help)
|
||||
metric_parser.add_argument('-G', '--guard', action='store_true', required=False, help=guard_help)
|
||||
metric_parser.add_argument('-u', '--guest', action='store_true', required=False, help=guest_help)
|
||||
|
||||
|
||||
def _add_process_parser(self, subparsers, func):
|
||||
if self.amdsmi_helpers.is_hypervisor():
|
||||
if self.helpers.is_hypervisor():
|
||||
# Don't add this subparser on Hypervisors
|
||||
# This subparser is only available to Guest and Baremetal systems
|
||||
return
|
||||
@@ -495,9 +486,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
pid_help = "Gets all process information about the specified process based on Process ID"
|
||||
name_help = "Gets all process information about the specified process based on Process Name.\
|
||||
\nIf multiple processes have the same name information is returned for all of them."
|
||||
watch_help = "Reprint the command in a loop of Interval seconds"
|
||||
watch_time_help = "The total time to watch the given command"
|
||||
iterations_help = "Total number of iterations to loop on the given command"
|
||||
|
||||
|
||||
# Create process subparser
|
||||
process_parser = subparsers.add_parser('process', help=process_help, description=process_subcommand_help)
|
||||
@@ -509,21 +498,18 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
# Add Device args
|
||||
self._add_device_arguments(process_parser, required=False)
|
||||
|
||||
# Add Watch args
|
||||
self._add_watch_arguments(process_parser)
|
||||
|
||||
# Optional Args
|
||||
process_parser.add_argument('-G', '--general', action='store_true', required=False, help=general_help)
|
||||
process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help)
|
||||
process_parser.add_argument('-p', '--pid', action='store', type=self._positive_int, required=False, help=pid_help)
|
||||
process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help)
|
||||
process_parser.add_argument('-w', '--watch', action='store', metavar='Interval',
|
||||
type=self._positive_int, required=False, help=watch_help)
|
||||
process_parser.add_argument('-W', '--watch_time', action=self._check_watch_selected(), metavar='Duration',
|
||||
type=self._positive_int, required=False, help=watch_time_help)
|
||||
process_parser.add_argument('-i', '--iterations', action=self._check_watch_selected(), metavar='Iterations',
|
||||
type=self._positive_int, required=False, help=iterations_help)
|
||||
|
||||
|
||||
def _add_profile_parser(self, subparsers, func):
|
||||
if not (self.amdsmi_helpers.is_windows() and self.amdsmi_helpers.is_hypervisor()):
|
||||
if not (self.helpers.is_windows() and self.helpers.is_hypervisor()):
|
||||
# This subparser only applies to Hypervisors
|
||||
return
|
||||
|
||||
@@ -544,7 +530,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
|
||||
def _add_event_parser(self, subparsers, func):
|
||||
if self.amdsmi_helpers.is_linux() and not self.amdsmi_helpers.is_virtual_os():
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
# This subparser only applies to Linux BareMetal & Linux Hypervisors, NOT Linux Guest
|
||||
return
|
||||
|
||||
@@ -566,7 +552,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
def _add_topology_parser(self, subparsers, func):
|
||||
return
|
||||
if not(self.amdsmi_helpers.is_baremetal() and self.amdsmi_helpers.is_linux()):
|
||||
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux
|
||||
return
|
||||
|
||||
@@ -599,11 +585,11 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
topology_parser.add_argument('-o', '--hops', action='store_true', required=False, help=hops_help)
|
||||
topology_parser.add_argument('-t', '--type', action='store_true', required=False, help=type_help)
|
||||
topology_parser.add_argument('-n', '--numa', action='store_true', required=False, help=numa_help)
|
||||
topology_parser.add_argument('-b', '--numa_bw', action='store_true', required=False, help=numa_bw_help)
|
||||
topology_parser.add_argument('-b', '--numa-bw', action='store_true', required=False, help=numa_bw_help)
|
||||
|
||||
|
||||
def _add_set_value_parser(self, subparsers, func):
|
||||
if not(self.amdsmi_helpers.is_baremetal() and self.amdsmi_helpers.is_linux()):
|
||||
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux
|
||||
return
|
||||
|
||||
@@ -628,7 +614,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_mem_overdrive_help = "Set memory overclock overdrive level ***DEPRECATED IN NEWER KERNEL VERSIONS (use --mlevel instead)***"
|
||||
set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts"
|
||||
set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes"
|
||||
set_perf_det_help = "Set GPU clock frequency limit to get minimal performance variation"
|
||||
set_perf_det_help = "Sets GPU clock frequency limit and performance level to determinism to get minimal performance variation"
|
||||
|
||||
# Create set_value subparser
|
||||
set_value_parser = subparsers.add_parser('set', help=set_value_help, description=set_value_subcommand_help)
|
||||
@@ -641,7 +627,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_device_arguments(set_value_parser, required=True)
|
||||
|
||||
# Optional Args
|
||||
set_value_parser.add_argument('-c', '--clock', action=self._validate_set_clock(True), nargs='+', type=self._positive_int, required=False, help=set_clock_help, metavar=('CLK_TYPE', 'CLK_LEVELS'))
|
||||
set_value_parser.add_argument('-c', '--clock', action=self._validate_set_clock(True), nargs='+', required=False, help=set_clock_help, metavar=('CLK_TYPE', 'CLK_LEVELS'))
|
||||
set_value_parser.add_argument('-s', '--sclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_sclk_help, metavar='CLK_LEVELS')
|
||||
set_value_parser.add_argument('-m', '--mclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_mclk_help, metavar='CLK_LEVELS')
|
||||
set_value_parser.add_argument('-p', '--pcie', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_pcie_help, metavar='CLK_LEVELS')
|
||||
@@ -651,7 +637,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_parser.add_argument('-r', '--srange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_srange_help, metavar=('SCLKMIN', 'SCLKMAX'))
|
||||
set_value_parser.add_argument('-R', '--mrange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_mrange_help, metavar=('MCLKMIN', 'MCLKMAX'))
|
||||
set_value_parser.add_argument('-f', '--fan', action=self._validate_fan_speed(), required=False, help=set_fan_help, metavar='%')
|
||||
set_value_parser.add_argument('-l', '--perflevel', action='store', choices=['auto', 'low', 'high', 'manual'], required=False, help=set_perf_level_help, metavar='LEVEL')
|
||||
set_value_parser.add_argument('-l', '--perflevel', action='store', choices=self.helpers.get_perf_levels()[0], type=str.upper, required=False, help=set_perf_level_help, metavar='LEVEL')
|
||||
set_value_parser.add_argument('-o', '--overdrive', action=self._validate_overdrive_percent(), required=False, help=set_overdrive_help, metavar='%')
|
||||
set_value_parser.add_argument('-O', '--memoverdrive', action=self._validate_overdrive_percent(), required=False, help=set_mem_overdrive_help, metavar='%')
|
||||
set_value_parser.add_argument('-w', '--poweroverdrive', action=self._prompt_spec_warning(), type=self._positive_int, required=False, help=set_power_overdrive_help, metavar="WATTS")
|
||||
@@ -661,13 +647,14 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
def _validate_set_clock(self, validate_clock_type=True):
|
||||
""" Validate Clock input"""
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
amdsmi_helpers = self.helpers
|
||||
class _ValidateClockType(argparse.Action):
|
||||
# Checks the values
|
||||
# Checks the clock type and clock values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
if validate_clock_type:
|
||||
clock_type = values[0]
|
||||
valid_clock_type, clock_types = amdsmi_helpers.is_valid_clock_type(clock_type=clock_type)
|
||||
clock_types = amdsmi_helpers.get_clock_types()[0]
|
||||
valid_clock_type, amdsmi_clock_type = amdsmi_helpers.validate_clock_type(input_clock_type=clock_type)
|
||||
if not valid_clock_type:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{clock_type}' needs to be a valid clock type:{clock_types}")
|
||||
|
||||
@@ -682,7 +669,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
freq_bitmask |= (1 << level)
|
||||
|
||||
if validate_clock_type:
|
||||
setattr(args, self.dest, (clock_type, freq_bitmask))
|
||||
setattr(args, self.dest, (amdsmi_clock_type, freq_bitmask))
|
||||
else:
|
||||
setattr(args, self.dest, freq_bitmask)
|
||||
return _ValidateClockType
|
||||
@@ -690,7 +677,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
def _prompt_spec_warning(self):
|
||||
""" Prompt out of spec warning"""
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
amdsmi_helpers = self.helpers
|
||||
class _PromptSpecWarning(argparse.Action):
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
@@ -701,57 +688,58 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
def _validate_fan_speed(self):
|
||||
""" Validate fan speed input"""
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
amdsmi_helpers = self.helpers
|
||||
class _ValidateFanSpeed(argparse.Action):
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
|
||||
# Convert percentage to fan level
|
||||
if isinstance(values, str):
|
||||
try:
|
||||
values = int(values[:-1]) // 100 * 255
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-255 or 0-100%")
|
||||
|
||||
# Store the fan level as fan_speed
|
||||
if isinstance(values, int):
|
||||
if 0 <= values <= 255:
|
||||
amdsmi_helpers.confirm_out_of_spec_warning()
|
||||
setattr(args, self.dest, values)
|
||||
else:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-255 or 0-100%")
|
||||
|
||||
# Convert percentage to fan level
|
||||
if '%' in values:
|
||||
try:
|
||||
amdsmi_helpers.confirm_out_of_spec_warning()
|
||||
values = int(int(values[:-1]) / 100 * 255)
|
||||
setattr(args, self.dest, values)
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-100%")
|
||||
else: # Store the fan level as fan_speed
|
||||
values = int(values)
|
||||
if 0 <= values <= 255:
|
||||
amdsmi_helpers.confirm_out_of_spec_warning()
|
||||
setattr(args, self.dest, values)
|
||||
else:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-255")
|
||||
else:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-255 or 0-100%")
|
||||
return _ValidateFanSpeed
|
||||
|
||||
|
||||
def _validate_overdrive_percent(self):
|
||||
""" Validate overdrive percentage input"""
|
||||
amdsmi_helpers = self.amdsmi_helpers
|
||||
amdsmi_helpers = self.helpers
|
||||
class _ValidateOverdrivePercent(argparse.Action):
|
||||
# Checks the values
|
||||
def __call__(self, parser, args, values, option_string=None):
|
||||
if isinstance(values, str):
|
||||
try:
|
||||
if values[-1] == '%':
|
||||
values = int(values[:-1])
|
||||
over_drive_percent = int(values[:-1])
|
||||
else:
|
||||
values = int(values)
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-20 or 0-20%")
|
||||
over_drive_percent = int(values)
|
||||
|
||||
if isinstance(values, int):
|
||||
if 0 <= values <= 20:
|
||||
over_drive_percent = values
|
||||
else:
|
||||
if 0 <= over_drive_percent <= 20:
|
||||
amdsmi_helpers.confirm_out_of_spec_warning()
|
||||
setattr(args, self.dest, over_drive_percent)
|
||||
else:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be within range 0-20 or 0-20%")
|
||||
except ValueError:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-20 or 0-20%")
|
||||
|
||||
amdsmi_helpers.confirm_out_of_spec_warning()
|
||||
setattr(args, self.dest, over_drive_percent)
|
||||
else:
|
||||
raise argparse.ArgumentError(self, f"Invalid argument: '{values}' needs to be 0-20 or 0-20%")
|
||||
return _ValidateOverdrivePercent
|
||||
|
||||
|
||||
def _add_reset_parser(self, subparsers, func):
|
||||
if not(self.amdsmi_helpers.is_baremetal() and self.amdsmi_helpers.is_linux()):
|
||||
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux
|
||||
return
|
||||
|
||||
@@ -823,6 +811,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
rocm_smi_parser.add_argument('-l', '--load', action=self._check_input_file_path(), type=str, required=False, help=load_help)
|
||||
rocm_smi_parser.add_argument('-s', '--save', action=self._check_output_file_path(), type=str, required=False, help=save_help)
|
||||
|
||||
rocm_smi_parser.add_argument('-b', '--showbw', action='store_true', required=False, help=showbw_help)
|
||||
rocm_smi_parser.add_argument('-t', '--showtempgraph', action='store_true', required=False, help=showtempgraph_help)
|
||||
rocm_smi_parser.add_argument('-m', '--showmclkrange', action='store_true', required=False, help=showmclkrange_help)
|
||||
rocm_smi_parser.add_argument('-c', '--showsclkrange', action='store_true', required=False, help=showsclkrange_help)
|
||||
@@ -832,13 +821,10 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
rocm_smi_parser.add_argument('-v', '--showclkvolt', action='store_true', required=False, help=showclkvolt_help)
|
||||
rocm_smi_parser.add_argument('-f', '--showclkfrq', action='store_true', required=False, help=showclkfrq_help)
|
||||
|
||||
|
||||
def error(self, message):
|
||||
args = sys.argv[1:]
|
||||
outputformat = "human"
|
||||
if "--json" in args or "--j" in args:
|
||||
outputformat = "json"
|
||||
elif "--csv" in args or "-c" in args:
|
||||
outputformat = "csv"
|
||||
outputformat = self.helpers.get_output_format()
|
||||
|
||||
if "argument : invalid choice: " in message:
|
||||
l = len("argument : invalid choice: ") + 1
|
||||
message = message[l:]
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = "0.0.2"
|
||||
__version__ = "0.0.3"
|
||||
@@ -2437,7 +2437,7 @@ def amdsmi_dev_get_ecc_count(
|
||||
|
||||
ec = amdsmi_wrapper.amdsmi_error_count_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper. amdsmi_dev_get_ecc_count(
|
||||
amdsmi_wrapper.amdsmi_dev_get_ecc_count(
|
||||
device_handle, block, ctypes.byref(ec))
|
||||
)
|
||||
|
||||
|
||||
@@ -168,17 +168,17 @@ def char_pointer_cast(string, encoding='utf-8'):
|
||||
|
||||
_libraries = {}
|
||||
from pathlib import Path
|
||||
libamd_smi_optrocm = Path(__file__).parents[3] / "/lib/libamd_smi.so"
|
||||
libamd_smi_cpack = Path("@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/libamd_smi.so")
|
||||
libamd_smi_optrocm = Path("/opt/rocm/lib/libamd_smi.so")
|
||||
libamd_smi_parent_dir = Path(__file__).resolve().parent / "libamd_smi.so"
|
||||
libamd_smi_cwd = Path.cwd()
|
||||
libamd_smi_cwd = Path.cwd() / "libamd_smi.so"
|
||||
|
||||
if libamd_smi_cpack.is_file():
|
||||
# try to find library in install directory provided by CMake
|
||||
_libraries['libamd_smi.so'] = ctypes.CDLL(libamd_smi_cpack)
|
||||
elif libamd_smi_optrocm.is_file():
|
||||
if libamd_smi_optrocm.is_file():
|
||||
# try /opt/rocm/lib as a fallback
|
||||
_libraries['libamd_smi.so'] = ctypes.CDLL(libamd_smi_optrocm)
|
||||
elif libamd_smi_cpack.is_file():
|
||||
# try to find library in install directory provided by CMake
|
||||
_libraries['libamd_smi.so'] = ctypes.CDLL(libamd_smi_cpack)
|
||||
elif libamd_smi_parent_dir.is_file():
|
||||
# try to fall back to parent directory
|
||||
_libraries['libamd_smi.so'] = ctypes.CDLL(libamd_smi_parent_dir)
|
||||
@@ -187,7 +187,6 @@ else:
|
||||
_libraries['libamd_smi.so'] = ctypes.CDLL(libamd_smi_cwd)
|
||||
|
||||
|
||||
|
||||
# values for enumeration 'c__EA_amdsmi_init_flags_t'
|
||||
c__EA_amdsmi_init_flags_t__enumvalues = {
|
||||
0: 'AMDSMI_INIT_ALL_DEVICES',
|
||||
|
||||
@@ -10,7 +10,7 @@ name = "amdsmi"
|
||||
authors = [
|
||||
{name = "AMD", email = "amd-smi.support@amd.com"},
|
||||
]
|
||||
version = '0.1'
|
||||
version = '0.3'
|
||||
license = {file = "amdsmi/LICENSE"}
|
||||
readme = {file = "amdsmi/README.md", content-type = "text/markdown"}
|
||||
description = "SMI LIB - AMD GPU Monitoring Library"
|
||||
|
||||
@@ -106,17 +106,17 @@ def main():
|
||||
library_path = os.path.join(os.path.dirname(__file__), library)
|
||||
line_to_replace = "_libraries['{}'] = ctypes.CDLL('{}')".format(library_name, library_path)
|
||||
new_line = f"""from pathlib import Path
|
||||
libamd_smi_optrocm = Path(__file__).parents[3] / "/lib/{library_name}"
|
||||
libamd_smi_cpack = Path("@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@/{library_name}")
|
||||
libamd_smi_optrocm = Path("/opt/rocm/lib/{library_name}")
|
||||
libamd_smi_parent_dir = Path(__file__).resolve().parent / "{library_name}"
|
||||
libamd_smi_cwd = Path.cwd()
|
||||
libamd_smi_cwd = Path.cwd() / "{library_name}"
|
||||
|
||||
if libamd_smi_cpack.is_file():
|
||||
# try to find library in install directory provided by CMake
|
||||
_libraries['{library_name}'] = ctypes.CDLL(libamd_smi_cpack)
|
||||
elif libamd_smi_optrocm.is_file():
|
||||
if libamd_smi_optrocm.is_file():
|
||||
# try /opt/rocm/lib as a fallback
|
||||
_libraries['{library_name}'] = ctypes.CDLL(libamd_smi_optrocm)
|
||||
elif libamd_smi_cpack.is_file():
|
||||
# try to find library in install directory provided by CMake
|
||||
_libraries['{library_name}'] = ctypes.CDLL(libamd_smi_cpack)
|
||||
elif libamd_smi_parent_dir.is_file():
|
||||
# try to fall back to parent directory
|
||||
_libraries['{library_name}'] = ctypes.CDLL(libamd_smi_parent_dir)
|
||||
|
||||
Ссылка в новой задаче
Block a user