[SWDEV-448738] Added rocmsmi extremum command as 'set -L'
Change-Id: I997c630bd20cc61673813a2301eb5e3002619a32 Signed-off-by: gabrpham <Gabriel.Pham@amd.com> Change-Id: Ifa884303f9a0fa058af093a23f5be449bba54f29
This commit is contained in:
+11
-8
@@ -7,25 +7,28 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
## amd_smi_lib for ROCm 6.3.0
|
||||
|
||||
### Changes
|
||||
- **Moved python tests directory path install location.**
|
||||
- `/opt/<rocm-path>/share/amd_smi/pytest/.. to /opt/<rocm-path>/share/amd_smi/tests/python_unittest/..`
|
||||
- **On amd-smi-lib-tests uninstall, the amd_smi tests folder is removed.**
|
||||
- **Removed pytest dependency, our python testing depends only on unittest framework.**
|
||||
- **Moved python tests directory path install location**.
|
||||
- `/opt/<rocm-path>/share/amd_smi/pytest/..` to `/opt/<rocm-path>/share/amd_smi/tests/python_unittest/..`
|
||||
- On amd-smi-lib-tests uninstall, the amd_smi tests folder is removed.
|
||||
- Removed pytest dependency, our python testing now only depends on the unittest framework.
|
||||
|
||||
- **Added more supported utilization count types to `amdsmi_get_utilization_count()`**.
|
||||
|
||||
- **Added `amd-smi set -L/--clk-limit ...` command**.
|
||||
Equivalent to rocm-smi's '--extremum' command which sets sclk's or mclk's soft minimum or soft maximum clock frequency.
|
||||
|
||||
- **Added Pytest functionality to test amdsmi API calls in Python**.
|
||||
|
||||
- **Changed the `power` parameter in `amdsmi_get_energy_count()` to `energy_accumulator`**.
|
||||
Changes propagate forwards into the python interface as well, however we are maintaing backwards compatibility and keeping the `power` field in the python API until ROCm 6.4.
|
||||
|
||||
- **Added GPU memory overdrive percentage to `amd-smi metric -o`**.
|
||||
- **Added GPU memory overdrive percentage to `amd-smi metric -o`**.
|
||||
Added `amdsmi_get_gpu_mem_overdrive_level()` function to amd-smi C and Python Libraries.
|
||||
|
||||
- **Added Subsystem Device ID to `amd-smi static --asic`**.
|
||||
- **Added Subsystem Device ID to `amd-smi static --asic`**.
|
||||
No underlying changes to amdsmi_get_gpu_asic_info
|
||||
|
||||
- **Added retrieving connection type and P2P capabilities between two GPUs**.
|
||||
- **Added retrieving connection type and P2P capabilities between two GPUs**.
|
||||
- Added `amdsmi_topo_get_p2p_status` function to amd-smi C and Python Libraries.
|
||||
- Added retrieving P2P link capabilities to CLI `amd-smi topology`.
|
||||
|
||||
@@ -276,7 +279,7 @@ GPU: 1
|
||||
|
||||
### Removals
|
||||
|
||||
- **Removed usage of _validate_positive in Parser and replaced with _positive_int and _not_negative_int as appropriate**
|
||||
- **Removed usage of _validate_positive in Parser and replaced with _positive_int and _not_negative_int as appropriate**.
|
||||
This will allow 0 to be a valid input for several options in setting CPUs where appropriate (for example, as a mode or NBIOID)
|
||||
|
||||
### Optimizations
|
||||
|
||||
@@ -3665,7 +3665,7 @@ class AMDSMICommands():
|
||||
def set_gpu(self, args, multiple_devices=False, gpu=None, fan=None, perf_level=None,
|
||||
profile=None, perf_determinism=None, compute_partition=None,
|
||||
memory_partition=None, power_cap=None, soc_pstate=None, xgmi_plpd = None,
|
||||
process_isolation=None):
|
||||
process_isolation=None, clk_limit=None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
@@ -3712,6 +3712,8 @@ class AMDSMICommands():
|
||||
args.xgmi_plpd = xgmi_plpd
|
||||
if process_isolation:
|
||||
args.process_isolation = process_isolation
|
||||
if clk_limit:
|
||||
args.clk_limit = clk_limit
|
||||
|
||||
# Handle No GPU passed
|
||||
if args.gpu == None:
|
||||
@@ -3734,7 +3736,8 @@ class AMDSMICommands():
|
||||
args.power_cap is not None,
|
||||
args.soc_pstate is not None,
|
||||
args.xgmi_plpd is not None,
|
||||
args.process_isolation is not None]):
|
||||
args.process_isolation is not None,
|
||||
args.clk_limit is not None]):
|
||||
command = " ".join(sys.argv[1:])
|
||||
raise AmdSmiRequiredCommandException(command, self.logger.format)
|
||||
|
||||
@@ -3860,6 +3863,17 @@ class AMDSMICommands():
|
||||
raise ValueError(f"Unable to set process isolation to {status_string} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'process_isolation', result)
|
||||
if isinstance(args.clk_limit, tuple):
|
||||
try:
|
||||
clk_type = args.clk_limit.clk_type
|
||||
lim_type = args.clk_limit.lim_type
|
||||
val = args.clk_limit.val
|
||||
amdsmi_interface.amdsmi_set_gpu_clk_limit(args.gpu, clk_type, lim_type, val)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set {args.clk_limit.lim_type} of {args.clk_limit.clk_type} to {args.clk_limit.val} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'clk_limit', f"Successfully changed {args.clk_limit.lim_type} of {args.clk_limit.clk_type} to {args.clk_limit.val}")
|
||||
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
@@ -3875,7 +3889,7 @@ class AMDSMICommands():
|
||||
cpu_pwr_eff_mode=None, cpu_gmi3_link_width=None, cpu_pcie_link_rate=None,
|
||||
cpu_df_pstate_range=None, cpu_enable_apb=None, cpu_disable_apb=None,
|
||||
soc_boost_limit=None, core=None, core_boost_limit=None, soc_pstate=None, xgmi_plpd=None,
|
||||
process_isolation=None):
|
||||
process_isolation=None, clk_limit=None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
@@ -3926,8 +3940,8 @@ class AMDSMICommands():
|
||||
# Check if a GPU argument has been set
|
||||
gpu_args_enabled = False
|
||||
gpu_attributes = ["fan", "perf_level", "profile", "perf_determinism", "compute_partition",
|
||||
"memory_partition", "power_cap", "soc_pstate", "xgmi_plpd", "process_isolation",
|
||||
]
|
||||
"memory_partition", "power_cap", "soc_pstate", "xgmi_plpd",
|
||||
"process_isolation", "clk_limit"]
|
||||
for attr in gpu_attributes:
|
||||
if hasattr(args, attr):
|
||||
if getattr(args, attr) is not None:
|
||||
@@ -3983,7 +3997,7 @@ class AMDSMICommands():
|
||||
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
|
||||
profile, perf_determinism, compute_partition,
|
||||
memory_partition, power_cap, soc_pstate, xgmi_plpd,
|
||||
process_isolation)
|
||||
process_isolation, clk_limit)
|
||||
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
|
||||
if args.cpu == None and args.core == None:
|
||||
raise ValueError('No CPU or CORE provided, specific target(s) are needed')
|
||||
@@ -4003,7 +4017,7 @@ class AMDSMICommands():
|
||||
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
|
||||
profile, perf_determinism, compute_partition,
|
||||
memory_partition, power_cap, soc_pstate, xgmi_plpd,
|
||||
process_isolation)
|
||||
process_isolation, clk_limit)
|
||||
|
||||
|
||||
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
|
||||
|
||||
@@ -26,6 +26,7 @@ import errno
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import collections
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
@@ -172,6 +173,27 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(string_value, outputformat)
|
||||
|
||||
|
||||
def _limit_select(self):
|
||||
"""Custom action for setting clock limits"""
|
||||
output_format = self.helpers.get_output_format()
|
||||
|
||||
class AMDSMILimitArgs(argparse.Action):
|
||||
def __call__(self, parser: AMDSMIParser, namespace: argparse.Namespace,
|
||||
values: str | list | None, option_string: str | None = None) -> None:
|
||||
# valid values
|
||||
valid_clk_types = ('sclk', 'mclk')
|
||||
valid_lim_types = ('min', 'max')
|
||||
clk_type, lim_type, val = values
|
||||
if clk_type not in valid_clk_types:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(clk_type, output_format)
|
||||
if lim_type not in valid_lim_types:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(lim_type, output_format)
|
||||
val = int(val)
|
||||
clk_limit_args = collections.namedtuple('clk_limit_args', ['clk_type', 'lim_type', 'val'])
|
||||
setattr(namespace, self.dest, clk_limit_args(clk_type, lim_type, val))
|
||||
return AMDSMILimitArgs
|
||||
|
||||
|
||||
def _check_output_file_path(self):
|
||||
""" Argument action validator:
|
||||
Returns a path to a file from the output file path provided.
|
||||
@@ -1014,6 +1036,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_soc_pstate_help = "Set the GPU soc pstate policy using policy id\n"
|
||||
set_xgmi_plpd_help = "Set the GPU XGMI per-link power down policy using policy id\n"
|
||||
set_process_isolation_help = "Enable or disable the GPU process isolation: 0 for disable and 1 for enable.\n"
|
||||
set_clk_limit_help = "Sets the sclk (aka gfxclk) or mclk minimum and maximum frequencies. \nOf form: amd-smi set -L (sclk | mclk) (min | max) value"
|
||||
|
||||
# Help text for CPU set options
|
||||
set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value."
|
||||
@@ -1052,6 +1075,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_parser.add_argument('-o', '--power-cap', action='store', type=self._positive_int, required=False, help=set_power_cap_help, metavar='WATTS')
|
||||
set_value_parser.add_argument('-p', '--soc-pstate', action='store', required=False, type=self._not_negative_int, help=set_soc_pstate_help, metavar='POLICY_ID')
|
||||
set_value_parser.add_argument('-x', '--xgmi-plpd', action='store', required=False, type=self._not_negative_int, help=set_xgmi_plpd_help, metavar='POLICY_ID')
|
||||
set_value_parser.add_argument('-L', '--clk-limit', action=self._limit_select(), nargs=3, required=False, help=set_clk_limit_help, metavar=('CLK_TYPE', 'LIM_TYPE', 'VALUE'))
|
||||
|
||||
set_value_parser.add_argument('-R', '--process-isolation', action='store', choices=[0,1], type=self._not_negative_int, required=False, help=set_process_isolation_help, metavar='STATUS')
|
||||
|
||||
|
||||
@@ -124,6 +124,7 @@ from .amdsmi_interface import amdsmi_set_gpu_pci_bandwidth
|
||||
from .amdsmi_interface import amdsmi_set_power_cap
|
||||
from .amdsmi_interface import amdsmi_set_gpu_power_profile
|
||||
from .amdsmi_interface import amdsmi_set_gpu_clk_range
|
||||
from .amdsmi_interface import amdsmi_set_gpu_clk_limit
|
||||
from .amdsmi_interface import amdsmi_set_gpu_od_clk_info
|
||||
from .amdsmi_interface import amdsmi_set_gpu_od_volt_info
|
||||
from .amdsmi_interface import amdsmi_set_gpu_perf_level
|
||||
|
||||
@@ -3190,6 +3190,36 @@ def amdsmi_set_gpu_clk_range(
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_gpu_clk_limit(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
clk_type: str,
|
||||
limit_type: str,
|
||||
value: int
|
||||
) -> None:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(value, int):
|
||||
raise AmdSmiParameterException(value, int)
|
||||
if clk_type.lower() == "sclk":
|
||||
clk_type_conversion = amdsmi_wrapper.AMDSMI_CLK_TYPE_SYS
|
||||
elif clk_type.lower() == "mclk":
|
||||
clk_type_conversion = amdsmi_wrapper.AMDSMI_CLK_TYPE_MEM
|
||||
if limit_type.lower() == "min":
|
||||
limit_type_conversion = amdsmi_wrapper.CLK_LIMIT_MIN
|
||||
elif limit_type.lower() == "max":
|
||||
limit_type_conversion = amdsmi_wrapper.CLK_LIMIT_MAX
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_clk_limit(
|
||||
processor_handle,
|
||||
amdsmi_wrapper.amdsmi_clk_type_t(clk_type_conversion),
|
||||
amdsmi_wrapper.amdsmi_clk_limit_type_t(limit_type_conversion),
|
||||
ctypes.c_uint64(value),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_get_gpu_memory_total(processor_handle: amdsmi_wrapper.amdsmi_processor_handle, mem_type: AmdSmiMemoryType):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
|
||||
Reference in New Issue
Block a user