From 9f4faaabd8957875d63f31a60ced62a28427b68d Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Mon, 31 Jul 2023 09:31:40 -0500 Subject: [PATCH] Removed cmdline options Signed-off-by: Maisam Arif Change-Id: I3f98829e988468d657f280db6765f2f5e28ff5f1 [ROCm/amdsmi commit: d5ad387252aa9e20cfce0b0b5daed699ab5c4d4d] --- projects/amdsmi/amdsmi_cli/README.md | 27 +- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 347 +----------------- projects/amdsmi/amdsmi_cli/amdsmi_helpers.py | 1 - projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 44 +-- 4 files changed, 29 insertions(+), 390 deletions(-) diff --git a/projects/amdsmi/amdsmi_cli/README.md b/projects/amdsmi/amdsmi_cli/README.md index 21d9629818..4b805fded2 100644 --- a/projects/amdsmi/amdsmi_cli/README.md +++ b/projects/amdsmi/amdsmi_cli/README.md @@ -185,8 +185,8 @@ amd-smi metric --help usage: amd-smi metric [-h] [--json | --csv] [--file FILE] [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-g GPU [GPU ...]] [-w loop_time] [-W total_loop_time] [-i number_of_iterations] [-u] - [-b] [-p] [-c] [-t] [-e] [-P] [-V] [-f] [-C] [-o] [-M] [-l] [-r] - [-x] [-E] [-m] + [-b] [-p] [-c] [-t] [-e] [-P] [-f] [-C] [-o] [-l] [-r] [-x] + [-E] [-m] If no GPU is specified, returns metric information for all GPUs on the system. If no metric argument is provided all metric information will be displayed. @@ -205,11 +205,9 @@ Metric arguments: -t, --temperature Current temperatures -e, --ecc Number of ECC errors -P, --pcie Current PCIe speed and width - -V, --voltage Current GPU voltages -f, --fan Current fan speed -C, --voltage-curve Display voltage curve -o, --overdrive Current GPU clock overdrive level - -M, --mem-overdrive Current memory clock overdrive level -l, --perf-level Current DPM performance level -r, --replay-count PCIe replay count -x, --xgmi-err XGMI error information since last read @@ -283,11 +281,7 @@ Command Modifiers: amd-smi set --help usage: amd-smi set [-h] [--json | --csv] [--file FILE] [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] -g GPU [GPU ...] - [-c CLK_TYPE [CLK_LEVELS ...]] [-s CLK_LEVELS [CLK_LEVELS ...]] - [-m CLK_LEVELS [CLK_LEVELS ...]] [-p CLK_LEVELS [CLK_LEVELS ...]] - [-S SCLKLEVEL SCLK] [-M MCLKLEVEL MCLK] [-V POINT SCLK SVOLT] - [-r SCLKMIN SCLKMAX] [-R MCLKMIN MCLKMAX] [-f %] [-l LEVEL] [-o %] - [-O %] [-w WATTS] [-P SETPROFILE] [-d SCLKMAX] + [-f %] [-l LEVEL] [-P SETPROFILE] [-d SCLKMAX] A GPU must be specified to set a configuration. A set argument must be provided; Multiple set arguments are accepted @@ -296,20 +290,8 @@ Set Arguments: -h, --help show this help message and exit -g GPU [GPU ...], --gpu GPU [GPU ...] Select a GPU ID, BDF, or UUID from the possible choices: ID:0 | BDF:0000:23:00.0 | UUID:ffffffff-ffff-ffff-ffff-ffffffffffff - -c CLK_TYPE [CLK_LEVELS ...], --clock CLK_TYPE [CLK_LEVELS ...] Sets clock frequency levels for specified clocks - -s CLK_LEVELS [CLK_LEVELS ...], --sclk CLK_LEVELS [CLK_LEVELS ...] Sets GPU clock frequency levels - -m CLK_LEVELS [CLK_LEVELS ...], --mclk CLK_LEVELS [CLK_LEVELS ...] Sets memory clock frequency levels - -p CLK_LEVELS [CLK_LEVELS ...], --pcie CLK_LEVELS [CLK_LEVELS ...] Sets PCIe Bandwith - -S SCLKLEVEL SCLK, --slevel SCLKLEVEL SCLK Change GPU clock frequency and voltage for a specific level - -M MCLKLEVEL MCLK, --mlevel MCLKLEVEL MCLK Change GPU memory frequency and voltage for a specific level - -V POINT SCLK SVOLT, --vc POINT SCLK SVOLT Change SCLK voltage curve for a specified point - -r SCLKMIN SCLKMAX, --srange SCLKMIN SCLKMAX Sets min and max SCLK speed - -R MCLKMIN MCLKMAX, --mrange MCLKMIN MCLKMAX Sets min and max MCLK speed -f %, --fan % Sets GPU fan speed (0-255 or 0-100%) -l LEVEL, --perflevel LEVEL Sets performance level - -o %, --overdrive % Set GPU overdrive (0-20%) ***DEPRECATED IN NEWER KERNEL VERSIONS (use --slevel instead)*** - -O %, --memoverdrive % Set memory overclock overdrive level ***DEPRECATED IN NEWER KERNEL VERSIONS (use --mlevel instead)*** - -w WATTS, --poweroverdrive WATTS Set the maximum GPU power using power overdrive in Watts -P SETPROFILE, --profile SETPROFILE Set power profile level (#) or a quoted string of custom profile attributes -d SCLKMAX, --perfdeterminism SCLKMAX Sets GPU clock frequency limit and performance level to determinism to get minimal performance variation @@ -324,7 +306,7 @@ Command Modifiers: amd-smi reset --help usage: amd-smi reset [-h] [--json | --csv] [--file FILE] [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}] -g GPU [GPU ...] - [-G] [-c] [-f] [-p] [-o] [-x] [-d] + [-G] [-c] [-f] [-p] [-x] [-d] A GPU must be specified to reset a configuration. A reset argument must be provided; Multiple reset arguments are accepted @@ -337,7 +319,6 @@ Reset Arguments: -c, --clocks Reset clocks and overdrive to default -f, --fans Reset fans to automatic (driver) control -p, --profile Reset power profile back to default - -o, --poweroverdrive Set the maximum GPU power back to the device default state -x, --xgmierr Reset XGMI error counts -d, --perfdeterminism Disable performance determinism diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index b63f1cd588..178efe1365 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -302,7 +302,7 @@ class AMDSMICommands(): unit = 'W' power_limit = f"{power_limit} {unit}" - unit = 'C' + unit = '\N{DEGREE SIGN}C' temp_edge_limit = f"{temp_edge_limit} {unit}" temp_junction_limit = f"{temp_junction_limit} {unit}" temp_vram_limit = f"{temp_vram_limit} {unit}" @@ -594,8 +594,8 @@ class AMDSMICommands(): def metric(self, args, multiple_devices=False, watching_output=False, gpu=None, usage=None, watch=None, watch_time=None, iterations=None, fb_usage=None, power=None, - clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None, voltage=None, - fan=None, voltage_curve=None, overdrive=None, mem_overdrive=None, perf_level=None, + clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None, + fan=None, voltage_curve=None, overdrive=None, perf_level=None, replay_count=None, xgmi_err=None, energy=None, mem_usage=None): """Get Metric information for target gpu @@ -615,11 +615,9 @@ class AMDSMICommands(): ecc (bool, optional): Value override for args.ecc. Defaults to None. ecc_block (bool, optional): Value override for args.ecc. Defaults to None. pcie (bool, optional): Value override for args.pcie. Defaults to None. - voltage (bool, optional): Value override for args.voltage. Defaults to None. fan (bool, optional): Value override for args.fan. Defaults to None. voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None. overdrive (bool, optional): Value override for args.overdrive. Defaults to None. - mem_overdrive (bool, optional): Value override for args.mem_overdrive. Defaults to None. perf_level (bool, optional): Value override for args.perf_level. Defaults to None. replay_count (bool, optional): Value override for args.replay_count. Defaults to None. xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None. @@ -663,16 +661,12 @@ class AMDSMICommands(): args.ecc_block = ecc_block if pcie: args.pcie = pcie - if voltage: - args.voltage = voltage if fan: args.fan = fan if voltage_curve: args.voltage_curve = voltage_curve if overdrive: args.overdrive = overdrive - if mem_overdrive: - args.mem_overdrive = mem_overdrive if perf_level: args.perf_level = perf_level if xgmi_err: @@ -726,12 +720,14 @@ class AMDSMICommands(): args.fb_usage = args.replay_count = args.mem_usage = self.all_arguments = True if self.helpers.is_linux() and self.helpers.is_baremetal(): - if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, args.ecc, args.ecc_block, args.pcie, args.voltage, args.fan, - args.voltage_curve, args.overdrive, args.mem_overdrive, args.perf_level, - args.replay_count, args.xgmi_err, args.energy, args.mem_usage]): - args.usage = args.fb_usage = args.power = args.clock = args.temperature = args.ecc = args.ecc_block = args.pcie = args.voltage = args.fan = \ - args.voltage_curve = args.overdrive = args.mem_overdrive = args.perf_level = \ - args.replay_count = args.xgmi_err = args.energy = args.mem_usage = self.all_arguments = True + if not any([args.usage, args.fb_usage, args.power, args.clock, args.temperature, + args.ecc, args.ecc_block, args.pcie, args.fan, args.voltage_curve, + args.overdrive, args.perf_level, args.replay_count, args.xgmi_err, + args.energy, args.mem_usage]): + args.usage = args.fb_usage = args.power = args.clock = args.temperature = \ + args.ecc = args.ecc_block = args.pcie = args.fan = args.voltage_curve = \ + args.overdrive = args.perf_level = args.replay_count = args.xgmi_err = \ + args.energy = args.mem_usage = self.all_arguments = True # Add timestamp and store values for specified arguments values_dict = {} @@ -905,7 +901,6 @@ class AMDSMICommands(): values_dict['ecc_block'] = e.get_error_info() if not self.all_arguments: raise e - if args.pcie: try: pcie_link_status = amdsmi_interface.amdsmi_get_pcie_link_caps(args.gpu) @@ -922,21 +917,6 @@ class AMDSMICommands(): values_dict['pcie'] = e.get_error_info() if not self.all_arguments: raise e - - if args.voltage: - try: - volt_metric = amdsmi_interface.amdsmi_get_gpu_volt_metric( - args.gpu, amdsmi_interface.AmdSmiVoltageType.VDDGFX, amdsmi_interface.AmdSmiVoltageMetric.CURRENT) - - if self.logger.is_human_readable_format(): - unit = 'mV' - volt_metric = f"{volt_metric} {unit}" - - values_dict['voltage'] = volt_metric - except amdsmi_exception.AmdSmiLibraryException as e: - values_dict['voltage'] = e.get_error_info() - if not self.all_arguments: - raise e if args.fan: try: fan_speed = amdsmi_interface.amdsmi_get_gpu_fan_speed(args.gpu, 0) @@ -1000,8 +980,6 @@ class AMDSMICommands(): values_dict['overdrive'] = e.get_error_info() if not self.all_arguments: raise e - if args.mem_overdrive: - values_dict['mem_overdrive'] = amdsmi_exception.AmdSmiLibraryException(amdsmi_exception.AmdSmiRetCode.STATUS_NOT_YET_IMPLEMENTED).err_info if args.perf_level: try: perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) @@ -1462,30 +1440,16 @@ class AMDSMICommands(): self.logger.print_output(multiple_device_enabled=True) - def set_value(self, args, multiple_devices=False, gpu=None, clock=None, sclk=None, mclk=None, - pcie=None, slevel=None, mlevel=None, vc=None, srange=None, mrange=None, - fan=None, perflevel=None, overdrive=None, memoverdrive=None, - poweroverdrive=None, profile=None, perfdeterminism=None): + def set_value(self, args, multiple_devices=False, gpu=None, fan=None, perflevel=None, + profile=None, perfdeterminism=None): """Issue reset commands to target gpu(s) Args: args (Namespace): Namespace containing the parsed CLI args multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. gpu (device_handle, optional): device_handle for target device. Defaults to None. - clock ((amdsmi_interface.AmdSmiClkType, int), optional): Value override for args.clock. Defaults to None. - sclk (int, optional): Value override for args.sclk. Defaults to None. - mclk (int, optional): Value override for args.mclk. Defaults to None. - pcie (int, optional): Value override for args.pcie. Defaults to None. - slevel ((amdsmi_interface.AmdSmiFreqInd), int), optional): Value override for args.slevel. Defaults to None. - mlevel ((amdsmi_interface.AmdSmiFreqInd), optional): Value override for args.mlevel. Defaults to None. - vc ((int, int, int), optional): Value override for args.vc. Defaults to None. - srange ((int, int), optional): Value override for args.srange. Defaults to None. - mrange ((int, int), optional): Value override for args.mrange. Defaults to None. fan (int, optional): Value override for args.fan. Defaults to None. perflevel (amdsmi_interface.AmdSmiDevPerfLevel, optional): Value override for args.perflevel. Defaults to None. - overdrive (int, optional): Value override for args.overdrive. Defaults to None. - memoverdrive (int, optional): Value override for args.memoverdrive. Defaults to None. - poweroverdrive (int, optional): Value override for args.poweroverdrive. Defaults to None. profile (bool, optional): Value override for args.profile. Defaults to None. perfdeterminism (int, optional): Value override for args.perfdeterminism. Defaults to None. @@ -1499,34 +1463,10 @@ class AMDSMICommands(): # Set args.* to passed in arguments if gpu: args.gpu = gpu - if clock: - args.clock = clock - if sclk: - args.sclk = sclk - if mclk: - args.mclk = mclk - if pcie: - args.pcie = pcie - if slevel: - args.slevel = slevel - if mlevel: - args.mlevel = mlevel - if vc: - args.vc = vc - if srange: - args.srange = srange - if mrange: - args.mrange = mrange if fan: args.fan = fan if perflevel: args.perflevel = perflevel - if overdrive: - args.overdrive = overdrive - if memoverdrive: - args.memoverdrive = memoverdrive - if poweroverdrive: - args.poweroverdrive = poweroverdrive if profile: args.profile = profile if perfdeterminism: @@ -1555,178 +1495,6 @@ class AMDSMICommands(): gpu_string = f"GPU ID: {gpu_id} BDF:{gpu_bdf}" # Handle args - if args.clock: - clock_type, freq_bitmask = args.clock - - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - - if clock_type != amdsmi_interface.AmdSmiClkType.PCIE: - try: - amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e - else: - try: - amdsmi_interface.amdsmi_set_gpu_pci_bandwidth(args.gpu, freq_bitmask) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'clock', f'Successfully set clock frequency bitmask for {clock_type}') - if isinstance(args.sclk, int): - freq_bitmask = args.sclk - clock_type = amdsmi_interface.AmdSmiClkType.SYS - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - - try: - amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'sclk', 'Successfully set clock frequency bitmask') - if isinstance(args.mclk, int): - freq_bitmask = args.mclk - clock_type = amdsmi_interface.AmdSmiClkType.MEM - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - - try: - amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clock_type, freq_bitmask) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'mclk', 'Successfully set clock frequency bitmask') - if isinstance(args.pcie, int): - freq_bitmask = args.pcie - clock_type = amdsmi_interface.AmdSmiClkType.PCIE - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - try: - amdsmi_interface.amdsmi_set_gpu_pci_bandwidth(args.gpu, freq_bitmask) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the {clock_type} clock frequency on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'pcie', 'Successfully set clock frequency bitmask') - if isinstance(args.slevel, int): - - level, value = args.slevel - level = amdsmi_interface.AmdSmiFreqInd(level) - clock_type = amdsmi_interface.AmdSmiClkType.SYS - try: - amdsmi_interface.amdsmi_set_gpu_od_clk_info(args.gpu, level, value, clock_type) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'slevel', 'Successfully changed clock frequency') - if isinstance(args.mlevel, int): - level, value = args.mlevel - level = amdsmi_interface.AmdSmiFreqInd(level) - clock_type = amdsmi_interface.AmdSmiClkType.MEM - try: - amdsmi_interface.amdsmi_set_gpu_od_clk_info(args.gpu, level, value, clock_type) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to change the {clock_type} clock frequency in the PowerPlay table on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'mlevel', 'Successfully changed clock frequency') - if isinstance(args.vc, int): - point, clk, volt = args.vc - try: - amdsmi_interface.amdsmi_set_gpu_od_volt_info(args.gpu, point, clk, volt) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the Voltage Curve point {point} to {clk}(MHz) {volt}(mV) on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'vc', f'Successfully set voltage point {point} to {clk}(MHz) {volt}(mV)') - if isinstance(args.srange, int): - min_value, max_value = args.srange - clock_type = amdsmi_interface.AmdSmiClkType.SYS - try: - amdsmi_interface.amdsmi_set_gpu_clk_range(args.gpu, min_value, max_value, clock_type) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'srange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)") - if isinstance(args.mrange, int): - min_value, max_value = args.srange - clock_type = amdsmi_interface.AmdSmiClkType.MEM - try: - amdsmi_interface.amdsmi_set_gpu_clk_range(args.gpu, min_value, max_value, clock_type) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set {clock_type} from {min_value}(MHz) to {max_value}(MHz) on {gpu_string}") from e - - self.logger.store_output(args.gpu, 'mrange', f"Successfully set {clock_type} from {min_value}(MHz) to {max_value}(MHz)") if isinstance(args.fan, int): try: amdsmi_interface.amdsmi_set_gpu_fan_speed(args.gpu, 0, args.fan) @@ -1746,89 +1514,6 @@ class AMDSMICommands(): raise ValueError(f"Unable to set performance level {args.perflevel} on {gpu_string}") from e self.logger.store_output(args.gpu, 'perflevel', f"Successfully set performance level {args.perflevel}") - if isinstance(args.overdrive, int): - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - - try: - amdsmi_interface.amdsmi_set_gpu_overdrive_level(args.gpu, args.overdrive) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set overdrive {args.overdrive} to {gpu_string}") from e - - self.logger.store_output(args.gpu, 'overdrive', f"Successfully to set overdrive level to {args.overdrive}") - if isinstance(args.memoverdrive, int): - # Check if the performance level is manual, if not then set it to manual - try: - perf_level = amdsmi_interface.amdsmi_get_gpu_perf_level(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get performance level of {gpu_string}") from e - - if 'manual' in perf_level.lower(): - try: - amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, amdsmi_interface.AmdSmiDevPerfLevel.MANUAL) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set the performance level of {gpu_string} to manual") from e - - self.logger.store_output(args.gpu, 'memoverdrive', f"Successfully to set memoverdrive level to {args.memoverdrive}") - if isinstance(args.poweroverdrive, int): - overdrive_power_cap = args.poweroverdrive - try: - power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get the power cap info for {gpu_string}") from e - if overdrive_power_cap == 0: - overdrive_power_cap = power_caps['default_power_cap'] - else: - overdrive_power_cap *= 1000000 - - if overdrive_power_cap < power_caps['min_power_cap']: - raise ValueError(f"Requested power cap: {overdrive_power_cap} is lower than the min power cap: {power_caps['min_power_cap']}") - - if overdrive_power_cap > power_caps['max_power_cap']: - raise ValueError(f"Requested power cap: {overdrive_power_cap} is greater than the max power cap: {power_caps['max_power_cap']}") - - if overdrive_power_cap == power_caps['power_cap']: - raise ValueError(f"Requested power cap: {overdrive_power_cap} is the same as the current power cap: {power_caps['power_cap']}") - - try: - amdsmi_interface.amdsmi_set_power_cap(args.gpu, 0, overdrive_power_cap) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to set power cap to {overdrive_power_cap} on {gpu_string}") from e - - try: - power_caps = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_exception.AmdSmiRetCode.STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e - raise ValueError(f"Unable to get the power cap info for {gpu_string} post set") from e - - if power_caps['power_cap'] == overdrive_power_cap: - self.logger.store_output(args.gpu, 'power_cap', f"Successfully set the power cap {overdrive_power_cap}") - else: - raise ValueError(f"Power cap: {overdrive_power_cap} set failed on {gpu_string}") if args.profile: self.logger.store_output(args.gpu, 'profile', "Not Yet Implemented") if isinstance(args.perfdeterminism, int): @@ -1849,8 +1534,7 @@ class AMDSMICommands(): def reset(self, args, multiple_devices=False, gpu=None, gpureset=None, - clocks=None, fans=None, profile=None, - poweroverdrive=None, xgmierr=None, perfdeterminism=None): + clocks=None, fans=None, profile=None, xgmierr=None, perfdeterminism=None): """Issue reset commands to target gpu(s) Args: @@ -1861,7 +1545,6 @@ class AMDSMICommands(): clocks (bool, optional): Value override for args.clocks. Defaults to None. fans (bool, optional): Value override for args.fans. Defaults to None. profile (bool, optional): Value override for args.profile. Defaults to None. - poweroverdrive (bool, optional): Value override for args.poweroverdrive. Defaults to None. xgmierr (bool, optional): Value override for args.xgmierr. Defaults to None. perfdeterminism (bool, optional): Value override for args.perfdeterminism. Defaults to None. @@ -1883,8 +1566,6 @@ class AMDSMICommands(): args.fans = fans if profile: args.profile = profile - if poweroverdrive: - args.poweroverdrive = poweroverdrive if xgmierr: args.xgmierr = xgmierr if perfdeterminism: diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index 43fd42aef4..fc25e42eb5 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -25,7 +25,6 @@ import platform import sys import time -from pathlib import Path from subprocess import run from subprocess import PIPE, STDOUT diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index bdbe10f5d4..d0fd284681 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -127,6 +127,15 @@ class AMDSMIParser(argparse.ArgumentParser): path.touch() setattr(args, self.dest, path) elif path.is_file(): + file_name = str(path) + if args.json and str(path).split('.')[-1].lower() != 'json': + file_name += ".json" + elif args.csv and str(path).split('.')[-1].lower() != 'csv': + file_name += ".csv" + elif str(path).split('.')[-1].lower() != 'txt': + file_name += ".txt" + path = Path(file_name) + path.touch() setattr(args, self.dest, path) else: raise amdsmi_cli_exceptions.AmdSmiInvalidFilePathException(path, CheckOutputFilePath.outputformat) @@ -415,13 +424,11 @@ class AMDSMIParser(argparse.ArgumentParser): ecc_help = "Number of ECC errors" ecc_block_help = "Number of ECC errors per block" pcie_help = "Current PCIe speed and width" - voltage_help = "Current GPU voltages" # Help text for Arguments only on Linux Baremetal platforms fan_help = "Current fan speed" vc_help = "Display voltage curve" overdrive_help = "Current GPU clock overdrive level" - mo_help = "Current memory clock overdrive level" perf_level_help = "Current DPM performance level" replay_count_help = "PCIe replay count" xgmi_err_help = "XGMI error information since last read" @@ -450,7 +457,6 @@ class AMDSMIParser(argparse.ArgumentParser): if self.helpers.is_virtual_os() or self.helpers.is_baremetal(): metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help) metric_parser.add_argument('-m', '--mem-usage', action='store_true', required=False, help=mem_usage_help) - metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help) # Optional Args for Hypervisors and Baremetal systems if self.helpers.is_hypervisor() or self.helpers.is_baremetal(): @@ -459,9 +465,8 @@ class AMDSMIParser(argparse.ArgumentParser): metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help) metric_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help) metric_parser.add_argument('-k', '--ecc-block', action='store_true', required=False, help=ecc_block_help) - + metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help) metric_parser.add_argument('-P', '--pcie', action='store_true', required=False, help=pcie_help) - metric_parser.add_argument('-V', '--voltage', action='store_true', required=False, help=voltage_help) metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help) # Optional Args for Linux Baremetal Systems @@ -469,7 +474,6 @@ class AMDSMIParser(argparse.ArgumentParser): metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help) metric_parser.add_argument('-C', '--voltage-curve', action='store_true', required=False, help=vc_help) metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help) - metric_parser.add_argument('-M', '--mem-overdrive', action='store_true', required=False, help=mo_help) metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help) metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help) metric_parser.add_argument('-E', '--energy', action='store_true', required=False, help=energy_help) @@ -544,7 +548,7 @@ class AMDSMIParser(argparse.ArgumentParser): def _add_event_parser(self, subparsers, func): if self.helpers.is_linux() and not self.helpers.is_virtual_os(): - # This subparser only applies to Linux BareMetal & Linux Hypervisors, NOT Linux Guest + # This subparser only applies to Linux Hypervisors, NOT Linux Guest return # Subparser help text @@ -611,20 +615,8 @@ class AMDSMIParser(argparse.ArgumentParser): set_value_optionals_title = "Set Arguments" # Help text for Arguments only on Guest and BM platforms - set_clock_help = "Sets clock frequency levels for specified clocks" - set_sclk_help = "Sets GPU clock frequency levels" - set_mclk_help = "Sets memory clock frequency levels" - set_pcie_help = "Sets PCIe Bandwith" - set_slevel_help = "Change GPU clock frequency and voltage for a specific level" - set_mlevel_help = "Change GPU memory frequency and voltage for a specific level" - set_vc_help = "Change SCLK voltage curve for a specified point" - set_srange_help = "Sets min and max SCLK speed" - set_mrange_help = "Sets min and max MCLK speed" set_fan_help = "Sets GPU fan speed (0-255 or 0-100%%)" set_perf_level_help = "Sets performance level" - set_overdrive_help = "Set GPU overdrive (0-20%%) ***DEPRECATED IN NEWER KERNEL VERSIONS (use --slevel instead)***" - set_mem_overdrive_help = "Set memory overclock overdrive level ***DEPRECATED IN NEWER KERNEL VERSIONS (use --mlevel instead)***" - set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts" set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes" set_perf_det_help = "Sets GPU clock frequency limit and performance level to determinism to get minimal performance variation" @@ -639,20 +631,8 @@ class AMDSMIParser(argparse.ArgumentParser): self._add_device_arguments(set_value_parser, required=True) # Optional Args - set_value_parser.add_argument('-c', '--clock', action=self._validate_set_clock(True), nargs='+', required=False, help=set_clock_help, metavar=('CLK_TYPE', 'CLK_LEVELS')) - set_value_parser.add_argument('-s', '--sclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_sclk_help, metavar='CLK_LEVELS') - set_value_parser.add_argument('-m', '--mclk', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_mclk_help, metavar='CLK_LEVELS') - set_value_parser.add_argument('-p', '--pcie', action=self._validate_set_clock(False), nargs='+', type=self._positive_int, required=False, help=set_pcie_help, metavar='CLK_LEVELS') - set_value_parser.add_argument('-S', '--slevel', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_slevel_help, metavar=('SCLKLEVEL', 'SCLK')) - set_value_parser.add_argument('-M', '--mlevel', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_mlevel_help, metavar=('MCLKLEVEL', 'MCLK')) - set_value_parser.add_argument('-V', '--vc', action=self._prompt_spec_warning(), nargs=3, type=self._positive_int, required=False, help=set_vc_help, metavar=('POINT', 'SCLK', 'SVOLT')) - set_value_parser.add_argument('-r', '--srange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_srange_help, metavar=('SCLKMIN', 'SCLKMAX')) - set_value_parser.add_argument('-R', '--mrange', action=self._prompt_spec_warning(), nargs=2, type=self._positive_int, required=False, help=set_mrange_help, metavar=('MCLKMIN', 'MCLKMAX')) set_value_parser.add_argument('-f', '--fan', action=self._validate_fan_speed(), required=False, help=set_fan_help, metavar='%') set_value_parser.add_argument('-l', '--perflevel', action='store', choices=self.helpers.get_perf_levels()[0], type=str.upper, required=False, help=set_perf_level_help, metavar='LEVEL') - set_value_parser.add_argument('-o', '--overdrive', action=self._validate_overdrive_percent(), required=False, help=set_overdrive_help, metavar='%') - set_value_parser.add_argument('-O', '--memoverdrive', action=self._validate_overdrive_percent(), required=False, help=set_mem_overdrive_help, metavar='%') - set_value_parser.add_argument('-w', '--poweroverdrive', action=self._prompt_spec_warning(), type=self._positive_int, required=False, help=set_power_overdrive_help, metavar="WATTS") set_value_parser.add_argument('-P', '--profile', action='store', required=False, help=set_profile_help, metavar='SETPROFILE') set_value_parser.add_argument('-d', '--perfdeterminism', action='store', type=self._positive_int, required=False, help=set_perf_det_help, metavar='SCLKMAX') @@ -766,7 +746,6 @@ class AMDSMIParser(argparse.ArgumentParser): resetclocks_help = "Reset clocks and overdrive to default" resetfans_help = "Reset fans to automatic (driver) control" resetprofile_help = "Reset power profile back to default" - resetpoweroverdrive_help = "Set the maximum GPU power back to the device default state" resetxgmierr_help = "Reset XGMI error counts" resetperfdet_help = "Disable performance determinism" @@ -785,7 +764,6 @@ class AMDSMIParser(argparse.ArgumentParser): reset_parser.add_argument('-c', '--clocks', action='store_true', required=False, help=resetclocks_help) reset_parser.add_argument('-f', '--fans', action='store_true', required=False, help=resetfans_help) reset_parser.add_argument('-p', '--profile', action='store_true', required=False, help=resetprofile_help) - reset_parser.add_argument('-o', '--poweroverdrive', action='store_true', required=False, help=resetpoweroverdrive_help) reset_parser.add_argument('-x', '--xgmierr', action='store_true', required=False, help=resetxgmierr_help) reset_parser.add_argument('-d', '--perfdeterminism', action='store_true', required=False, help=resetperfdet_help)