From 194c33852f06254e995989d9a4942e9336924c7a Mon Sep 17 00:00:00 2001 From: gabrpham Date: Mon, 11 Nov 2024 16:57:15 -0600 Subject: [PATCH] [SWDEV-492739] Partial fix for sclk min/max out of bounds Signed-off-by: gabrpham Change-Id: I1f0230955c890c11a735c8cb352c8a9ee4cebe27 [ROCm/amdsmi commit: 2273d95a6c6ba912fe358975f10a50ad2ca08c07] --- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 41 ++++++++++++++++--- projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 12 +++++- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 045865a854..9a814b1cef 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -4030,16 +4030,47 @@ class AMDSMICommands(): raise ValueError(f"Unable to set XGMI policy to {args.xgmi_plpd} on {gpu_string}") from e self.logger.store_output(args.gpu, 'xgmiplpd', f"Successfully set per-link power down policy to id {args.xgmi_plpd}") if isinstance(args.clk_limit, tuple): + clk_type = args.clk_limit.clk_type + lim_type = args.clk_limit.lim_type + val = args.clk_limit.val + val_changed = True # Assume Clock limit value is changed + + # Validate the value against the extremum try: - clk_type = args.clk_limit.clk_type - lim_type = args.clk_limit.lim_type - val = args.clk_limit.val - amdsmi_interface.amdsmi_set_gpu_clk_limit(args.gpu, clk_type, lim_type, val) + # Parser only allows two options sclk or mclk + if clk_type == "sclk": + amdsmi_clk_type = amdsmi_interface.AmdSmiClkType.GFX + elif clk_type == "mclk": + amdsmi_clk_type = amdsmi_interface.AmdSmiClkType.MEM + clk_tuple = amdsmi_interface.amdsmi_get_clock_info(args.gpu, amdsmi_clk_type) + + if lim_type == "min": + if val > clk_tuple['max_clk']: + raise IndexError("cannot set min value greater than max") + if val == clk_tuple['min_clk']: + val_changed = False # Clock limit value did not changed + + if lim_type == "max": + if val < clk_tuple['min_clk']: + raise IndexError("cannot set max value less than min") + if val == clk_tuple['max_clk']: + val_changed = False # Clock limit value did not changed + except amdsmi_exception.AmdSmiLibraryException as e: + logging.debug("Failed to get clock extremum info for gpu %s | %s", gpu_id, e.get_error_info()) + + # Set the value + try: + if val_changed: + amdsmi_interface.amdsmi_set_gpu_clk_limit(args.gpu, clk_type, lim_type, val) except amdsmi_exception.AmdSmiLibraryException as e: if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: raise PermissionError('Command requires elevation') from e raise ValueError(f"Unable to set {args.clk_limit.lim_type} of {args.clk_limit.clk_type} to {args.clk_limit.val} on {gpu_string}") from e - self.logger.store_output(args.gpu, 'clk_limit', f"Successfully changed {args.clk_limit.lim_type} of {args.clk_limit.clk_type} to {args.clk_limit.val}") + + if val_changed: + self.logger.store_output(args.gpu, 'clk_limit', f"Successfully changed {args.clk_limit.lim_type} of {args.clk_limit.clk_type} to {args.clk_limit.val}") + else: + self.logger.store_output(args.gpu, 'clk_limit', f"Clock limit is already set to {args.clk_limit.val}") if isinstance(args.process_isolation, int): status_string = "Enabled" if args.process_isolation else "Disabled" diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 339468befd..35ce943e1d 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -187,11 +187,19 @@ class AMDSMIParser(argparse.ArgumentParser): valid_clk_types = ('sclk', 'mclk') valid_lim_types = ('min', 'max') clk_type, lim_type, val = values + + # Check if the sclk and mclk parameters are valid if clk_type not in valid_clk_types: - raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(clk_type, output_format) + raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(clk_type, output_format) if lim_type not in valid_lim_types: - raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(lim_type, output_format) + raise amdsmi_cli_exceptions.AmdSmiInvalidParameterException(lim_type, output_format) + + # Check if the val is a valid integer value + if not val.isdigit(): + raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(val, output_format) val = int(val) + if val < 0: + raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(val, output_format) clk_limit_args = collections.namedtuple('clk_limit_args', ['clk_type', 'lim_type', 'val']) setattr(namespace, self.dest, clk_limit_args(clk_type, lim_type, val)) return AMDSMILimitArgs