Removed cmdline options
Signed-off-by: Maisam Arif <maisarif@amd.com>
Change-Id: I3f98829e988468d657f280db6765f2f5e28ff5f1
[ROCm/amdsmi commit: d5ad387252]
Этот коммит содержится в:
коммит произвёл
Maisam Arif
родитель
907fcc53a1
Коммит
9f4faaabd8
@@ -185,8 +185,8 @@ amd-smi metric --help
|
||||
usage: amd-smi metric [-h] [--json | --csv] [--file FILE]
|
||||
[--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-g GPU [GPU ...]]
|
||||
[-w loop_time] [-W total_loop_time] [-i number_of_iterations] [-u]
|
||||
[-b] [-p] [-c] [-t] [-e] [-P] [-V] [-f] [-C] [-o] [-M] [-l] [-r]
|
||||
[-x] [-E] [-m]
|
||||
[-b] [-p] [-c] [-t] [-e] [-P] [-f] [-C] [-o] [-l] [-r] [-x]
|
||||
[-E] [-m]
|
||||
|
||||
If no GPU is specified, returns metric information for all GPUs on the system.
|
||||
If no metric argument is provided all metric information will be displayed.
|
||||
@@ -205,11 +205,9 @@ Metric arguments:
|
||||
-t, --temperature Current temperatures
|
||||
-e, --ecc Number of ECC errors
|
||||
-P, --pcie Current PCIe speed and width
|
||||
-V, --voltage Current GPU voltages
|
||||
-f, --fan Current fan speed
|
||||
-C, --voltage-curve Display voltage curve
|
||||
-o, --overdrive Current GPU clock overdrive level
|
||||
-M, --mem-overdrive Current memory clock overdrive level
|
||||
-l, --perf-level Current DPM performance level
|
||||
-r, --replay-count PCIe replay count
|
||||
-x, --xgmi-err XGMI error information since last read
|
||||
@@ -283,11 +281,7 @@ Command Modifiers:
|
||||
amd-smi set --help
|
||||
usage: amd-smi set [-h] [--json | --csv] [--file FILE]
|
||||
[--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] -g GPU [GPU ...]
|
||||
[-c CLK_TYPE [CLK_LEVELS ...]] [-s CLK_LEVELS [CLK_LEVELS ...]]
|
||||
[-m CLK_LEVELS [CLK_LEVELS ...]] [-p CLK_LEVELS [CLK_LEVELS ...]]
|
||||
[-S SCLKLEVEL SCLK] [-M MCLKLEVEL MCLK] [-V POINT SCLK SVOLT]
|
||||
[-r SCLKMIN SCLKMAX] [-R MCLKMIN MCLKMAX] [-f %] [-l LEVEL] [-o %]
|
||||
[-O %] [-w WATTS] [-P SETPROFILE] [-d SCLKMAX]
|
||||
[-f %] [-l LEVEL] [-P SETPROFILE] [-d SCLKMAX]
|
||||
|
||||
A GPU must be specified to set a configuration.
|
||||
A set argument must be provided; Multiple set arguments are accepted
|
||||
@@ -296,20 +290,8 @@ Set Arguments:
|
||||
-h, --help show this help message and exit
|
||||
-g GPU [GPU ...], --gpu GPU [GPU ...] Select a GPU ID, BDF, or UUID from the possible choices:
|
||||
ID:0 | BDF:0000:23:00.0 | UUID:ffffffff-ffff-ffff-ffff-ffffffffffff
|
||||
-c CLK_TYPE [CLK_LEVELS ...], --clock CLK_TYPE [CLK_LEVELS ...] Sets clock frequency levels for specified clocks
|
||||
-s CLK_LEVELS [CLK_LEVELS ...], --sclk CLK_LEVELS [CLK_LEVELS ...] Sets GPU clock frequency levels
|
||||
-m CLK_LEVELS [CLK_LEVELS ...], --mclk CLK_LEVELS [CLK_LEVELS ...] Sets memory clock frequency levels
|
||||
-p CLK_LEVELS [CLK_LEVELS ...], --pcie CLK_LEVELS [CLK_LEVELS ...] Sets PCIe Bandwith
|
||||
-S SCLKLEVEL SCLK, --slevel SCLKLEVEL SCLK Change GPU clock frequency and voltage for a specific level
|
||||
-M MCLKLEVEL MCLK, --mlevel MCLKLEVEL MCLK Change GPU memory frequency and voltage for a specific level
|
||||
-V POINT SCLK SVOLT, --vc POINT SCLK SVOLT Change SCLK voltage curve for a specified point
|
||||
-r SCLKMIN SCLKMAX, --srange SCLKMIN SCLKMAX Sets min and max SCLK speed
|
||||
-R MCLKMIN MCLKMAX, --mrange MCLKMIN MCLKMAX Sets min and max MCLK speed
|
||||
-f %, --fan % Sets GPU fan speed (0-255 or 0-100%)
|
||||
-l LEVEL, --perflevel LEVEL Sets performance level
|
||||
-o %, --overdrive % Set GPU overdrive (0-20%) ***DEPRECATED IN NEWER KERNEL VERSIONS (use --slevel instead)***
|
||||
-O %, --memoverdrive % Set memory overclock overdrive level ***DEPRECATED IN NEWER KERNEL VERSIONS (use --mlevel instead)***
|
||||
-w WATTS, --poweroverdrive WATTS Set the maximum GPU power using power overdrive in Watts
|
||||
-P SETPROFILE, --profile SETPROFILE Set power profile level (#) or a quoted string of custom profile attributes
|
||||
-d SCLKMAX, --perfdeterminism SCLKMAX Sets GPU clock frequency limit and performance level to determinism to get minimal performance variation
|
||||
|
||||
@@ -324,7 +306,7 @@ Command Modifiers:
|
||||
amd-smi reset --help
|
||||
usage: amd-smi reset [-h] [--json | --csv] [--file FILE]
|
||||
[--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] -g GPU [GPU ...]
|
||||
[-G] [-c] [-f] [-p] [-o] [-x] [-d]
|
||||
[-G] [-c] [-f] [-p] [-x] [-d]
|
||||
|
||||
A GPU must be specified to reset a configuration.
|
||||
A reset argument must be provided; Multiple reset arguments are accepted
|
||||
@@ -337,7 +319,6 @@ Reset Arguments:
|
||||
-c, --clocks Reset clocks and overdrive to default
|
||||
-f, --fans Reset fans to automatic (driver) control
|
||||
-p, --profile Reset power profile back to default
|
||||
-o, --poweroverdrive Set the maximum GPU power back to the device default state
|
||||
-x, --xgmierr Reset XGMI error counts
|
||||
-d, --perfdeterminism Disable performance determinism
|
||||
|
||||
|
||||
@@ -302,7 +302,7 @@ class AMDSMICommands():
|
||||
unit = 'W'
|
||||
power_limit = f"{power_limit} {unit}"
|
||||
|
||||
unit = 'C'
|
||||
unit = '\N{DEGREE SIGN}C'
|
||||
temp_edge_limit = f"{temp_edge_limit} {unit}"
|
||||
temp_junction_limit = f"{temp_junction_limit} {unit}"
|
||||
temp_vram_limit = f"{temp_vram_limit} {unit}"
|
||||
@@ -594,8 +594,8 @@ class AMDSMICommands():
|
||||
|
||||
def metric(self, args, multiple_devices=False, watching_output=False, gpu=None,
|
||||
usage=None, watch=None, watch_time=None, iterations=None, fb_usage=None, power=None,
|
||||
clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None, voltage=None,
|
||||
fan=None, voltage_curve=None, overdrive=None, mem_overdrive=None, perf_level=None,
|
||||
clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None,
|
||||
fan=None, voltage_curve=None, overdrive=None, perf_level=None,
|
||||
replay_count=None, xgmi_err=None, energy=None, mem_usage=None):
|
||||
"""Get Metric information for target gpu
|
||||
|
||||
@@ -615,11 +615,9 @@ class AMDSMICommands():
|
||||
ecc (bool, optional): Value override for args.ecc. Defaults to None.
|
||||
ecc_block (bool, optional): Value override for args.ecc. Defaults to None.
|
||||
pcie (bool, optional): Value override for args.pcie. Defaults to None.
|
||||
voltage (bool, optional): Value override for args.voltage. Defaults to None.
|
||||
fan (bool, optional): Value override for args.fan. Defaults to None.
|
||||
voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None.
|
||||
overdrive (bool, optional): Value override for args.overdrive. Defaults to None.
|
||||
mem_overdrive (bool, optional): Value override for args.mem_overdrive. Defaults to None.
|
||||
perf_level (bool, optional): Value override for args.perf_level. Defaults to None.
|
||||
replay_count (bool, optional): Value override for args.replay_count. Defaults to None.
|
||||
xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None.
|
||||
@@ -663,16 +661,12 @@ class AMDSMICommands():
|
||||
args.ecc_block = ecc_block
|
||||
if pcie:
|
||||
args.pcie = pcie
|
||||
if voltage:
|
||||
args.voltage = voltage
|
||||
if fan:
|
||||
args.fan = fan
|
||||
if voltage_curve:
|
||||
args.voltage_curve = voltage_curve
|
||||
if overdrive:
|
||||
args.overdrive = overdrive
|
||||
if mem_overdrive:
|
||||
args.mem_overdrive = mem_overdrive
|
||||
if perf_level:
|
||||
args.perf_level = perf_level
|
||||
if xgmi_err:
|
||||
@@ -726,12 +720,14 @@ class AMDSMICommands():
|
||||
args.fb_usage = args.replay_count = args.mem_usage = self.all_arguments = True
|
||||
|
||||
if self.helpers.is_linux() and self.helpers.is_baremetal():
|
||||
if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, args.ecc, args.ecc_block, args.pcie, args.voltage, args.fan,
|
||||
args.voltage_curve, args.overdrive, args.mem_overdrive, args.perf_level,
|
||||
args.replay_count, args.xgmi_err, args.energy, args.mem_usage]):
|
||||
args.usage = args.fb_usage = args.power = args.clock = args.temperature = args.ecc = args.ecc_block = args.pcie = args.voltage = args.fan = \
|
||||
args.voltage_curve = args.overdrive = args.mem_overdrive = args.perf_level = \
|
||||
args.replay_count = args.xgmi_err = args.energy = args.mem_usage = self.all_arguments = True
|
||||
if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature,
|
||||
args.ecc, args.ecc_block, args.pcie, args.fan, args.voltage_curve,
|
||||
args.overdrive, args.perf_level, args.replay_count, args.xgmi_err,
|
||||
args.energy, args.mem_usage]):
|
||||
args.usage = args.fb_usage = args.power = args.clock = args.temperature = \
|
||||
args.ecc = args.ecc_block = args.pcie = args.fan = args.voltage_curve = \
|
||||
args.overdrive = args.perf_level = args.replay_count = args.xgmi_err = \
|
||||
args.energy = args.mem_usage = self.all_arguments = True
|
||||
|
||||
# Add timestamp and store values for specified arguments
|
||||
values_dict = {}
|
||||
@@ -905,7 +901,6 @@ class AMDSMICommands():
|
||||
values_dict['ecc_block'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if args.pcie:
|
||||
try:
|
||||
pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_caps(args.gpu)
|
||||
@@ -922,21 +917,6 @@ class AMDSMICommands():
|
||||
values_dict['pcie'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
|
||||
if args.voltage:
|
||||
try:
|
||||
volt_metric = amdsmi_interface.amdsmi_get_gpu_volt_metric(
|
||||
args.gpu, amdsmi_interface.AmdSmiVoltageType.VDDGFX, amdsmi_interface.AmdSmiVoltageMetric.CURRENT)
|
||||
|
||||
if self.logger.is_human_readable_format():
|
||||
unit = 'mV'
|
||||
volt_metric = f"{volt_metric} {unit}"
|
||||
|
||||
values_dict['voltage'] = volt_metric
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
values_dict['voltage'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.fan:
|
||||
try:
|
||||
fan_speed = amdsmi_interface.amdsmi_get_gpu_fan_speed(args.gpu, 0)
|
||||
@@ -1000,8 +980,6 @@ class AMDSMICommands():
|
||||
values_dict['overdrive'] = e.get_error_info()
|
||||
if not self.all_arguments:
|
||||
raise e
|
||||
if args.mem_overdrive:
|
||||
values_dict['mem_overdrive'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.STATUS_NOT_YET_IMPLEMENTED).err_info
|
||||
if args.perf_level:
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
@@ -1462,30 +1440,16 @@ class AMDSMICommands():
|
||||
self.logger.print_output(multiple_device_enabled=True)
|
||||
|
||||
|
||||
def set_value(self, args, multiple_devices=False, gpu=None, clock=None, sclk=None, mclk=None,
|
||||
pcie=None, slevel=None, mlevel=None, vc=None, srange=None, mrange=None,
|
||||
fan=None, perflevel=None, overdrive=None, memoverdrive=None,
|
||||
poweroverdrive=None, profile=None, perfdeterminism=None):
|
||||
def set_value(self, args, multiple_devices=False, gpu=None, fan=None, perflevel=None,
|
||||
profile=None, perfdeterminism=None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
args (Namespace): Namespace containing the parsed CLI args
|
||||
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
|
||||
gpu (device_handle, optional): device_handle for target device. Defaults to None.
|
||||
clock ((amdsmi_interface.AmdSmiClkType, int), optional): Value override for args.clock. Defaults to None.
|
||||
sclk (int, optional): Value override for args.sclk. Defaults to None.
|
||||
mclk (int, optional): Value override for args.mclk. Defaults to None.
|
||||
pcie (int, optional): Value override for args.pcie. Defaults to None.
|
||||
slevel ((amdsmi_interface.AmdSmiFreqInd), int), optional): Value override for args.slevel. Defaults to None.
|
||||
mlevel ((amdsmi_interface.AmdSmiFreqInd), optional): Value override for args.mlevel. Defaults to None.
|
||||
vc ((int, int, int), optional): Value override for args.vc. Defaults to None.
|
||||
srange ((int, int), optional): Value override for args.srange. Defaults to None.
|
||||
mrange ((int, int), optional): Value override for args.mrange. Defaults to None.
|
||||
fan (int, optional): Value override for args.fan. Defaults to None.
|
||||
perflevel (amdsmi_interface.AmdSmiDevPerfLevel, optional): Value override for args.perflevel. Defaults to None.
|
||||
overdrive (int, optional): Value override for args.overdrive. Defaults to None.
|
||||
memoverdrive (int, optional): Value override for args.memoverdrive. Defaults to None.
|
||||
poweroverdrive (int, optional): Value override for args.poweroverdrive. Defaults to None.
|
||||
profile (bool, optional): Value override for args.profile. Defaults to None.
|
||||
perfdeterminism (int, optional): Value override for args.perfdeterminism. Defaults to None.
|
||||
|
||||
@@ -1499,34 +1463,10 @@ class AMDSMICommands():
|
||||
# Set args.* to passed in arguments
|
||||
if gpu:
|
||||
args.gpu = gpu
|
||||
if clock:
|
||||
args.clock = clock
|
||||
if sclk:
|
||||
args.sclk = sclk
|
||||
if mclk:
|
||||
args.mclk = mclk
|
||||
if pcie:
|
||||
args.pcie = pcie
|
||||
if slevel:
|
||||
args.slevel = slevel
|
||||
if mlevel:
|
||||
args.mlevel = mlevel
|
||||
if vc:
|
||||
args.vc = vc
|
||||
if srange:
|
||||
args.srange = srange
|
||||
if mrange:
|
||||
args.mrange = mrange
|
||||
if fan:
|
||||
args.fan = fan
|
||||
if perflevel:
|
||||
args.perflevel = perflevel
|
||||
if overdrive:
|
||||
args.overdrive = overdrive
|
||||
if memoverdrive:
|
||||
args.memoverdrive = memoverdrive
|
||||
if poweroverdrive:
|
||||
args.poweroverdrive = poweroverdrive
|
||||
if profile:
|
||||
args.profile = profile
|
||||
if perfdeterminism:
|
||||
@@ -1555,178 +1495,6 @@ class AMDSMICommands():
|
||||
gpu_string = f"GPU ID: {gpu_id} BDF:{gpu_bdf}"
|
||||
|
||||
# Handle args
|
||||
if args.clock:
|
||||
clock_type, freq_bitmask = args.clock
|
||||
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
if clock_type != amdsmi_interface.AmdSmiClkType.PCIE:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
else:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'clock', f'Successfully set clock frequency bitmask for {clock_type}')
|
||||
if isinstance(args.sclk, int):
|
||||
freq_bitmask = args.sclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'sclk', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.mclk, int):
|
||||
freq_bitmask = args.mclk
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mclk', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.pcie, int):
|
||||
freq_bitmask = args.pcie
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.PCIE
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_pci_bandwidth(args.gpu, freq_bitmask)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'pcie', 'Successfully set clock frequency bitmask')
|
||||
if isinstance(args.slevel, int):
|
||||
|
||||
level, value = args.slevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level)
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_od_clk_info(args.gpu, level, value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'slevel', 'Successfully changed clock frequency')
|
||||
if isinstance(args.mlevel, int):
|
||||
level, value = args.mlevel
|
||||
level = amdsmi_interface.AmdSmiFreqInd(level)
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_od_clk_info(args.gpu, level, value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mlevel', 'Successfully changed clock frequency')
|
||||
if isinstance(args.vc, int):
|
||||
point, clk, volt = args.vc
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_od_volt_info(args.gpu, point, clk, volt)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the Voltage Curve point {point} to {clk}(MHz) {volt}(mV) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'vc', f'Successfully set voltage point {point} to {clk}(MHz) {volt}(mV)')
|
||||
if isinstance(args.srange, int):
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.SYS
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_clk_range(args.gpu, min_value, max_value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'srange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
|
||||
if isinstance(args.mrange, int):
|
||||
min_value, max_value = args.srange
|
||||
clock_type = amdsmi_interface.AmdSmiClkType.MEM
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_clk_range(args.gpu, min_value, max_value, clock_type)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'mrange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)")
|
||||
if isinstance(args.fan, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_fan_speed(args.gpu, 0, args.fan)
|
||||
@@ -1746,89 +1514,6 @@ class AMDSMICommands():
|
||||
raise ValueError(f"Unable to set performance level {args.perflevel} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'perflevel', f"Successfully set performance level {args.perflevel}")
|
||||
if isinstance(args.overdrive, int):
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_overdrive_level(args.gpu, args.overdrive)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set overdrive {args.overdrive} to {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'overdrive', f"Successfully to set overdrive level to {args.overdrive}")
|
||||
if isinstance(args.memoverdrive, int):
|
||||
# Check if the performance level is manual, if not then set it to manual
|
||||
try:
|
||||
perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get performance level of {gpu_string}") from e
|
||||
|
||||
if 'manual' in perf_level.lower():
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'memoverdrive', f"Successfully to set memoverdrive level to {args.memoverdrive}")
|
||||
if isinstance(args.poweroverdrive, int):
|
||||
overdrive_power_cap = args.poweroverdrive
|
||||
try:
|
||||
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get the power cap info for {gpu_string}") from e
|
||||
if overdrive_power_cap == 0:
|
||||
overdrive_power_cap = power_caps['default_power_cap']
|
||||
else:
|
||||
overdrive_power_cap *= 1000000
|
||||
|
||||
if overdrive_power_cap < power_caps['min_power_cap']:
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is lower than the min power cap: {power_caps['min_power_cap']}")
|
||||
|
||||
if overdrive_power_cap > power_caps['max_power_cap']:
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is greater than the max power cap: {power_caps['max_power_cap']}")
|
||||
|
||||
if overdrive_power_cap == power_caps['power_cap']:
|
||||
raise ValueError(f"Requested power cap: {overdrive_power_cap} is the same as the current power cap: {power_caps['power_cap']}")
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_power_cap(args.gpu, 0, overdrive_power_cap)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set power cap to {overdrive_power_cap} on {gpu_string}") from e
|
||||
|
||||
try:
|
||||
power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to get the power cap info for {gpu_string} post set") from e
|
||||
|
||||
if power_caps['power_cap'] == overdrive_power_cap:
|
||||
self.logger.store_output(args.gpu, 'power_cap', f"Successfully set the power cap {overdrive_power_cap}")
|
||||
else:
|
||||
raise ValueError(f"Power cap: {overdrive_power_cap} set failed on {gpu_string}")
|
||||
if args.profile:
|
||||
self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented")
|
||||
if isinstance(args.perfdeterminism, int):
|
||||
@@ -1849,8 +1534,7 @@ class AMDSMICommands():
|
||||
|
||||
|
||||
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
|
||||
clocks=None, fans=None, profile=None,
|
||||
poweroverdrive=None, xgmierr=None, perfdeterminism=None):
|
||||
clocks=None, fans=None, profile=None, xgmierr=None, perfdeterminism=None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
@@ -1861,7 +1545,6 @@ class AMDSMICommands():
|
||||
clocks (bool, optional): Value override for args.clocks. Defaults to None.
|
||||
fans (bool, optional): Value override for args.fans. Defaults to None.
|
||||
profile (bool, optional): Value override for args.profile. Defaults to None.
|
||||
poweroverdrive (bool, optional): Value override for args.poweroverdrive. Defaults to None.
|
||||
xgmierr (bool, optional): Value override for args.xgmierr. Defaults to None.
|
||||
perfdeterminism (bool, optional): Value override for args.perfdeterminism. Defaults to None.
|
||||
|
||||
@@ -1883,8 +1566,6 @@ class AMDSMICommands():
|
||||
args.fans = fans
|
||||
if profile:
|
||||
args.profile = profile
|
||||
if poweroverdrive:
|
||||
args.poweroverdrive = poweroverdrive
|
||||
if xgmierr:
|
||||
args.xgmierr = xgmierr
|
||||
if perfdeterminism:
|
||||
|
||||
@@ -25,7 +25,6 @@ import platform
|
||||
import sys
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
from subprocess import run
|
||||
from subprocess import PIPE, STDOUT
|
||||
|
||||
|
||||
@@ -127,6 +127,15 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
path.touch()
|
||||
setattr(args, self.dest, path)
|
||||
elif path.is_file():
|
||||
file_name = str(path)
|
||||
if args.json and str(path).split('.')[-1].lower() != 'json':
|
||||
file_name += ".json"
|
||||
elif args.csv and str(path).split('.')[-1].lower() != 'csv':
|
||||
file_name += ".csv"
|
||||
elif str(path).split('.')[-1].lower() != 'txt':
|
||||
file_name += ".txt"
|
||||
path = Path(file_name)
|
||||
path.touch()
|
||||
setattr(args, self.dest, path)
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, CheckOutputFilePath.outputformat)
|
||||
@@ -415,13 +424,11 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
ecc_help = "Number of ECC errors"
|
||||
ecc_block_help = "Number of ECC errors per block"
|
||||
pcie_help = "Current PCIe speed and width"
|
||||
voltage_help = "Current GPU voltages"
|
||||
|
||||
# Help text for Arguments only on Linux Baremetal platforms
|
||||
fan_help = "Current fan speed"
|
||||
vc_help = "Display voltage curve"
|
||||
overdrive_help = "Current GPU clock overdrive level"
|
||||
mo_help = "Current memory clock overdrive level"
|
||||
perf_level_help = "Current DPM performance level"
|
||||
replay_count_help = "PCIe replay count"
|
||||
xgmi_err_help = "XGMI error information since last read"
|
||||
@@ -450,7 +457,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
if self.helpers.is_virtual_os() or self.helpers.is_baremetal():
|
||||
metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help)
|
||||
metric_parser.add_argument('-m', '--mem-usage', action='store_true', required=False, help=mem_usage_help)
|
||||
metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help)
|
||||
|
||||
# Optional Args for Hypervisors and Baremetal systems
|
||||
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
|
||||
@@ -459,9 +465,8 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help)
|
||||
metric_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
|
||||
metric_parser.add_argument('-k', '--ecc-block', action='store_true', required=False, help=ecc_block_help)
|
||||
|
||||
metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help)
|
||||
metric_parser.add_argument('-P', '--pcie', action='store_true', required=False, help=pcie_help)
|
||||
metric_parser.add_argument('-V', '--voltage', action='store_true', required=False, help=voltage_help)
|
||||
metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help)
|
||||
|
||||
# Optional Args for Linux Baremetal Systems
|
||||
@@ -469,7 +474,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help)
|
||||
metric_parser.add_argument('-C', '--voltage-curve', action='store_true', required=False, help=vc_help)
|
||||
metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help)
|
||||
metric_parser.add_argument('-M', '--mem-overdrive', action='store_true', required=False, help=mo_help)
|
||||
metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help)
|
||||
metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help)
|
||||
metric_parser.add_argument('-E', '--energy', action='store_true', required=False, help=energy_help)
|
||||
@@ -544,7 +548,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
|
||||
def _add_event_parser(self, subparsers, func):
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
# This subparser only applies to Linux BareMetal & Linux Hypervisors, NOT Linux Guest
|
||||
# This subparser only applies to Linux Hypervisors, NOT Linux Guest
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
@@ -611,20 +615,8 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_optionals_title = "Set Arguments"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
set_clock_help = "Sets clock frequency levels for specified clocks"
|
||||
set_sclk_help = "Sets GPU clock frequency levels"
|
||||
set_mclk_help = "Sets memory clock frequency levels"
|
||||
set_pcie_help = "Sets PCIe Bandwith"
|
||||
set_slevel_help = "Change GPU clock frequency and voltage for a specific level"
|
||||
set_mlevel_help = "Change GPU memory frequency and voltage for a specific level"
|
||||
set_vc_help = "Change SCLK voltage curve for a specified point"
|
||||
set_srange_help = "Sets min and max SCLK speed"
|
||||
set_mrange_help = "Sets min and max MCLK speed"
|
||||
set_fan_help = "Sets GPU fan speed (0-255 or 0-100%%)"
|
||||
set_perf_level_help = "Sets performance level"
|
||||
set_overdrive_help = "Set GPU overdrive (0-20%%) ***DEPRECATED IN NEWER KERNEL VERSIONS (use --slevel instead)***"
|
||||
set_mem_overdrive_help = "Set memory overclock overdrive level ***DEPRECATED IN NEWER KERNEL VERSIONS (use --mlevel instead)***"
|
||||
set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts"
|
||||
set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes"
|
||||
set_perf_det_help = "Sets GPU clock frequency limit and performance level to determinism to get minimal performance variation"
|
||||
|
||||
@@ -639,20 +631,8 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_device_arguments(set_value_parser, required=True)
|
||||
|
||||
# Optional Args
|
||||
set_value_parser.add_argument('-c', '--clock', action=self._validate_set_clock(True), nargs='+', required=False, help=set_clock_help, metavar=('CLK_TYPE', 'CLK_LEVELS'))
|
||||
set_value_parser.add_argument('-s', '--sclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_sclk_help, metavar='CLK_LEVELS')
|
||||
set_value_parser.add_argument('-m', '--mclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_mclk_help, metavar='CLK_LEVELS')
|
||||
set_value_parser.add_argument('-p', '--pcie', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_pcie_help, metavar='CLK_LEVELS')
|
||||
set_value_parser.add_argument('-S', '--slevel', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_slevel_help, metavar=('SCLKLEVEL', 'SCLK'))
|
||||
set_value_parser.add_argument('-M', '--mlevel', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_mlevel_help, metavar=('MCLKLEVEL', 'MCLK'))
|
||||
set_value_parser.add_argument('-V', '--vc', action=self._prompt_spec_warning(), nargs=3, type=self._positive_int, required=False, help=set_vc_help, metavar=('POINT', 'SCLK', 'SVOLT'))
|
||||
set_value_parser.add_argument('-r', '--srange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_srange_help, metavar=('SCLKMIN', 'SCLKMAX'))
|
||||
set_value_parser.add_argument('-R', '--mrange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_mrange_help, metavar=('MCLKMIN', 'MCLKMAX'))
|
||||
set_value_parser.add_argument('-f', '--fan', action=self._validate_fan_speed(), required=False, help=set_fan_help, metavar='%')
|
||||
set_value_parser.add_argument('-l', '--perflevel', action='store', choices=self.helpers.get_perf_levels()[0], type=str.upper, required=False, help=set_perf_level_help, metavar='LEVEL')
|
||||
set_value_parser.add_argument('-o', '--overdrive', action=self._validate_overdrive_percent(), required=False, help=set_overdrive_help, metavar='%')
|
||||
set_value_parser.add_argument('-O', '--memoverdrive', action=self._validate_overdrive_percent(), required=False, help=set_mem_overdrive_help, metavar='%')
|
||||
set_value_parser.add_argument('-w', '--poweroverdrive', action=self._prompt_spec_warning(), type=self._positive_int, required=False, help=set_power_overdrive_help, metavar="WATTS")
|
||||
set_value_parser.add_argument('-P', '--profile', action='store', required=False, help=set_profile_help, metavar='SETPROFILE')
|
||||
set_value_parser.add_argument('-d', '--perfdeterminism', action='store', type=self._positive_int, required=False, help=set_perf_det_help, metavar='SCLKMAX')
|
||||
|
||||
@@ -766,7 +746,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
resetclocks_help = "Reset clocks and overdrive to default"
|
||||
resetfans_help = "Reset fans to automatic (driver) control"
|
||||
resetprofile_help = "Reset power profile back to default"
|
||||
resetpoweroverdrive_help = "Set the maximum GPU power back to the device default state"
|
||||
resetxgmierr_help = "Reset XGMI error counts"
|
||||
resetperfdet_help = "Disable performance determinism"
|
||||
|
||||
@@ -785,7 +764,6 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
reset_parser.add_argument('-c', '--clocks', action='store_true', required=False, help=resetclocks_help)
|
||||
reset_parser.add_argument('-f', '--fans', action='store_true', required=False, help=resetfans_help)
|
||||
reset_parser.add_argument('-p', '--profile', action='store_true', required=False, help=resetprofile_help)
|
||||
reset_parser.add_argument('-o', '--poweroverdrive', action='store_true', required=False, help=resetpoweroverdrive_help)
|
||||
reset_parser.add_argument('-x', '--xgmierr', action='store_true', required=False, help=resetxgmierr_help)
|
||||
reset_parser.add_argument('-d', '--perfdeterminism', action='store_true', required=False, help=resetperfdet_help)
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user