Process isolation and clean shader
A few APIs and command line options are added to support process isolation and clean shader. Change-Id: I98ad3fc9fc7429799a21798b7fca1c307de7f403
This commit is contained in:
committed by
Shuzhou Liu
parent
1ae3a5b6cb
commit
7d2ab7970d
@@ -148,9 +148,9 @@ Command Modifiers:
|
||||
|
||||
```bash
|
||||
~$ amd-smi static --help
|
||||
usage: amd-smi static [-h] [-g GPU [GPU ...] | -U CPU [CPU ...]] [-a] [-b] [-V] [-d] [-v]
|
||||
[-c] [-B] [-r] [-p] [-l] [-u] [-s] [-i] [--json | --csv]
|
||||
[--file FILE] [--loglevel LEVEL]
|
||||
usage: amd-smi static [-h] [-g GPU [GPU ...]] [-a] [-b] [-V] [-d] [-v] [-c] [-B] [-r] [-p]
|
||||
[-l] [-P] [-x] [-s] [-u] [--json | --csv] [--file FILE]
|
||||
[--loglevel LEVEL]
|
||||
|
||||
If no GPU is specified, returns static information for all GPUs on the system.
|
||||
If no static argument is provided, all static information will be displayed.
|
||||
@@ -179,6 +179,7 @@ Static Arguments:
|
||||
-r, --ras Displays RAS features information
|
||||
-p, --partition Partition information
|
||||
-l, --limit All limit metric values (i.e. power and thermal limits)
|
||||
-s, --process-isolation The process isolation status
|
||||
-u, --numa All numa node information
|
||||
|
||||
CPU Arguments:
|
||||
@@ -474,13 +475,13 @@ Command Modifiers:
|
||||
```bash
|
||||
usage: amd-smi set [-h] (-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]) [-f %]
|
||||
[-l LEVEL] [-P SETPROFILE] [-d SCLKMAX] [-C PARTITION] [-M PARTITION]
|
||||
[-o WATTS] [-p POLICY] [--cpu-pwr-limit PWR_LIMIT]
|
||||
[-o WATTS] [-p POLICY] [-i STATUS] [--cpu-pwr-limit PWR_LIMIT]
|
||||
[--cpu-xgmi-link-width MIN_WIDTH MAX_WIDTH]
|
||||
[--cpu-lclk-dpm-level NBIOID MIN_DPM MAX_DPM] [--cpu-pwr-eff-mode MODE]
|
||||
[--cpu-gmi3-link-width MIN_LW MAX_LW] [--cpu-pcie-link-rate LINK_RATE]
|
||||
[--cpu-df-pstate-range MAX_PSTATE MIN_PSTATE] [--cpu-enable-apb]
|
||||
[--cpu-disable-apb DF_PSTATE] [--soc-boost-limit BOOST_LIMIT]
|
||||
[--core-boost-limit BOOST_LIMIT] [--json | --csv] [--file FILE]
|
||||
[--core-boost-limit BOOST_LIMIT] [-c] [--json | --csv] [--file FILE]
|
||||
[--loglevel LEVEL]
|
||||
|
||||
A GPU must be specified to set a configuration.
|
||||
@@ -514,6 +515,8 @@ Set Arguments:
|
||||
-o, --power-cap WATTS Set power capacity limit
|
||||
-p, --dpm-policy POLICY_ID Set the GPU DPM policy using policy id
|
||||
-x, --xgmi-plpd POLICY_ID Set the GPU XGMI per-link power down policy using policy id
|
||||
-i, --process-isolation STATUS Enable or disable the GPU process isolation: 0 for disable and 1 for enable.
|
||||
-c, --clear-sram-data Clear the GPU SRAM data
|
||||
|
||||
CPU Arguments:
|
||||
--cpu-pwr-limit PWR_LIMIT Set power limit for the given socket. Input parameter is power limit value.
|
||||
|
||||
@@ -245,7 +245,7 @@ class AMDSMICommands():
|
||||
def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None,
|
||||
limit=None, driver=None, ras=None, board=None, numa=None, vram=None,
|
||||
cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None,
|
||||
policy=None, xgmi_plpd=None):
|
||||
policy=None, xgmi_plpd=None, process_isolation=None):
|
||||
"""Get Static information for target gpu
|
||||
|
||||
Args:
|
||||
@@ -270,6 +270,7 @@ class AMDSMICommands():
|
||||
num_vf (bool, optional): Value override for args.num_vf. Defaults to None.
|
||||
policy (bool, optional): Value override for args.policy. Defaults to None.
|
||||
xgmi_plpd (bool, optional): Value override for args.xgmi_plpd. Defaults to None.
|
||||
process_isolation (bool, optional): Value override for args.process_isolation. Defaults to None.
|
||||
Returns:
|
||||
None: Print output via AMDSMILogger to destination
|
||||
"""
|
||||
@@ -306,8 +307,10 @@ class AMDSMICommands():
|
||||
args.policy = policy
|
||||
if xgmi_plpd:
|
||||
args.xgmi_plpd = xgmi_plpd
|
||||
current_platform_args += ["ras", "limit", "partition", "policy", "xgmi_plpd"]
|
||||
current_platform_values += [args.ras, args.limit, args.partition, args.policy, args.xgmi_plpd]
|
||||
if process_isolation:
|
||||
args.process_isolation = process_isolation
|
||||
current_platform_args += ["ras", "limit", "partition", "policy", "xgmi_plpd", "process_isolation"]
|
||||
current_platform_values += [args.ras, args.limit, args.partition, args.policy, args.xgmi_plpd, args.process_isolation]
|
||||
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
if numa:
|
||||
@@ -643,6 +646,16 @@ class AMDSMICommands():
|
||||
logging.debug("Failed to get xgmi_plpd info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
static_dict['xgmi_plpd'] = policy_info
|
||||
if 'process_isolation' in current_platform_args:
|
||||
if args.process_isolation:
|
||||
try:
|
||||
status = amdsmi_interface.amdsmi_get_gpu_process_isolation(args.gpu)
|
||||
status = "Enabled" if status else "Disabled"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
status = "N/A"
|
||||
logging.debug("Failed to process isolation for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
static_dict['process_isolation'] = status
|
||||
if 'numa' in current_platform_args:
|
||||
if args.numa:
|
||||
try:
|
||||
@@ -779,7 +792,7 @@ class AMDSMICommands():
|
||||
bus=None, vbios=None, limit=None, driver=None, ras=None,
|
||||
board=None, numa=None, vram=None, cache=None, partition=None,
|
||||
dfc_ucode=None, fb_info=None, num_vf=None, cpu=None,
|
||||
interface_ver=None, policy=None, xgmi_plpd = None):
|
||||
interface_ver=None, policy=None, xgmi_plpd = None, process_isolation=None):
|
||||
"""Get Static information for target gpu and cpu
|
||||
|
||||
Args:
|
||||
@@ -804,6 +817,7 @@ class AMDSMICommands():
|
||||
interface_ver (bool, optional): Value override for args.interface_ver. Defaults to None
|
||||
policy (bool, optional): Value override for args.policy. Defaults to None.
|
||||
xgmi_plpd (bool, optional): Value override for args.xgmi_plpd. Defaults to None.
|
||||
process_isolation (bool, optional): Value override for args.process_isolation. Defaults to None.
|
||||
Raises:
|
||||
IndexError: Index error if gpu list is empty
|
||||
|
||||
@@ -829,7 +843,8 @@ class AMDSMICommands():
|
||||
gpu_args_enabled = False
|
||||
gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras",
|
||||
"board", "numa", "vram", "cache", "partition",
|
||||
"dfc_ucode", "fb_info", "num_vf", "policy", "xgmi_plpd"]
|
||||
"dfc_ucode", "fb_info", "num_vf", "policy", "xgmi_plpd",
|
||||
"process_isolation"]
|
||||
for attr in gpu_attributes:
|
||||
if hasattr(args, attr):
|
||||
if getattr(args, attr):
|
||||
@@ -859,7 +874,8 @@ class AMDSMICommands():
|
||||
self.static_gpu(args, multiple_devices, gpu, asic,
|
||||
bus, vbios, limit, driver, ras,
|
||||
board, numa, vram, cache, partition,
|
||||
dfc_ucode, fb_info, num_vf, policy)
|
||||
dfc_ucode, fb_info, num_vf, policy,
|
||||
process_isolation)
|
||||
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
|
||||
if args.cpu == None:
|
||||
args.cpu = self.cpu_handles
|
||||
@@ -873,7 +889,8 @@ class AMDSMICommands():
|
||||
self.static_gpu(args, multiple_devices, gpu, asic,
|
||||
bus, vbios, limit, driver, ras,
|
||||
board, numa, vram, cache, partition,
|
||||
dfc_ucode, fb_info, num_vf, policy, xgmi_plpd)
|
||||
dfc_ucode, fb_info, num_vf, policy, xgmi_plpd,
|
||||
process_isolation)
|
||||
|
||||
|
||||
def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True):
|
||||
@@ -3326,7 +3343,8 @@ class AMDSMICommands():
|
||||
|
||||
def set_gpu(self, args, multiple_devices=False, gpu=None, fan=None, perf_level=None,
|
||||
profile=None, perf_determinism=None, compute_partition=None,
|
||||
memory_partition=None, power_cap=None, dpm_policy=None, xgmi_plpd = None):
|
||||
memory_partition=None, power_cap=None, dpm_policy=None, xgmi_plpd = None,
|
||||
process_isolation=None, clear_sram_data = None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
@@ -3342,7 +3360,8 @@ class AMDSMICommands():
|
||||
power_cap (int, optional): Value override for args.power_cap. Defaults to None.
|
||||
dpm_policy (int, optional): Value override for args.dpm_policy. Defaults to None.
|
||||
xgmi_plpd (int, optional): Value override for args.xgmi_plpd. Defaults to None.
|
||||
|
||||
process_isolation (int, optional): Value override for args.process_isolation. Defaults to None.
|
||||
clear_sram_data (int, optional): Value override for args.clear_sram_data. Defaults to None.
|
||||
Raises:
|
||||
ValueError: Value error if no gpu value is provided
|
||||
IndexError: Index error if gpu list is empty
|
||||
@@ -3371,6 +3390,10 @@ class AMDSMICommands():
|
||||
args.dpm_policy = dpm_policy
|
||||
if xgmi_plpd:
|
||||
args.xgmi_plpd = xgmi_plpd
|
||||
if process_isolation:
|
||||
args.process_isolation = process_isolation
|
||||
if clear_sram_data:
|
||||
args.clear_sram_data = clear_sram_data
|
||||
# Handle No GPU passed
|
||||
if args.gpu == None:
|
||||
raise ValueError('No GPU provided, specific GPU target(s) are needed')
|
||||
@@ -3389,9 +3412,11 @@ class AMDSMICommands():
|
||||
args.compute_partition,
|
||||
args.memory_partition,
|
||||
args.perf_determinism is not None,
|
||||
args.power_cap,
|
||||
args.dpm_policy,
|
||||
args.xgmi_plpd]):
|
||||
args.power_cap is not None,
|
||||
args.dpm_policy is not None,
|
||||
args.xgmi_plpd is not None,
|
||||
args.process_isolation is not None,
|
||||
args.clear_sram_data]):
|
||||
command = " ".join(sys.argv[1:])
|
||||
raise AmdSmiRequiredCommandException(command, self.logger.format)
|
||||
|
||||
@@ -3455,25 +3480,6 @@ class AMDSMICommands():
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set memory partition to {args.memory_partition} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'memorypartition', f"Successfully set memory partition to {args.memory_partition}")
|
||||
|
||||
if args.dpm_policy:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_dpm_policy(args.gpu, args.dpm_policy)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set dpm policy to {args.dpm_policy} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'dpmpolicy', f"Successfully set dpm policy to id {args.dpm_policy}")
|
||||
|
||||
if args.xgmi_plpd:
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_xgmi_plpd(args.gpu, args.xgmi_plpd)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set XGMI policy to {args.xgmi_plpd} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'xgmiplpd', f"Successfully set per-link power down policy to id {args.dpm_policy}")
|
||||
|
||||
if isinstance(args.power_cap, int):
|
||||
try:
|
||||
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
|
||||
@@ -3499,6 +3505,48 @@ class AMDSMICommands():
|
||||
if min_power_cap == 0:
|
||||
min_power_cap = 1
|
||||
self.logger.store_output(args.gpu, 'powercap', f"Power cap must be between {min_power_cap} and {max_power_cap}")
|
||||
if isinstance(args.dpm_policy, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_dpm_policy(args.gpu, args.dpm_policy)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set dpm policy to {args.dpm_policy} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'dpmpolicy', f"Successfully set dpm policy to id {args.dpm_policy}")
|
||||
if isinstance(args.xgmi_plpd, int):
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_xgmi_plpd(args.gpu, args.xgmi_plpd)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set XGMI policy to {args.xgmi_plpd} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'xgmiplpd', f"Successfully set per-link power down policy to id {args.dpm_policy}")
|
||||
if isinstance(args.process_isolation, int):
|
||||
status_string = "Enabled" if args.process_isolation else "Disabled"
|
||||
result = f"Requested process isolation to {status_string}" # This should not print out
|
||||
try:
|
||||
current_status = amdsmi_interface.amdsmi_get_gpu_process_isolation(args.gpu)
|
||||
if current_status == args.process_isolation:
|
||||
result = f"Process isolation is already {status_string}"
|
||||
else:
|
||||
amdsmi_interface.amdsmi_set_gpu_process_isolation(args.gpu, args.process_isolation)
|
||||
result = f"Successfully set process isolation to {status_string}"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set process isolation to {status_string} on {gpu_string}") from e
|
||||
|
||||
self.logger.store_output(args.gpu, 'process_isolation', result)
|
||||
if args.clear_sram_data:
|
||||
try:
|
||||
# Only 1 can be used for now.
|
||||
amdsmi_interface.amdsmi_set_gpu_clear_sram_data(args.gpu, 1)
|
||||
result = 'Successfully clear GPU SRAM data'
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to clear SRAM data on GPU {gpu_id}") from e
|
||||
self.logger.store_output(args.gpu, 'clear_sram_data', result)
|
||||
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
@@ -3513,7 +3561,8 @@ class AMDSMICommands():
|
||||
cpu=None, cpu_pwr_limit=None, cpu_xgmi_link_width=None, cpu_lclk_dpm_level=None,
|
||||
cpu_pwr_eff_mode=None, cpu_gmi3_link_width=None, cpu_pcie_link_rate=None,
|
||||
cpu_df_pstate_range=None, cpu_enable_apb=None, cpu_disable_apb=None,
|
||||
soc_boost_limit=None, core=None, core_boost_limit=None, dpm_policy=None, xgmi_plpd=None):
|
||||
soc_boost_limit=None, core=None, core_boost_limit=None, dpm_policy=None, xgmi_plpd=None,
|
||||
process_isolation=None, clear_sram_data=None):
|
||||
"""Issue reset commands to target gpu(s)
|
||||
|
||||
Args:
|
||||
@@ -3544,7 +3593,8 @@ class AMDSMICommands():
|
||||
core_boost_limit (int, optional): Value override for args.core_boost_limit. Defaults to None
|
||||
dpm_policy (int, optional): Value override for args.dpm_policy. Defaults to None.
|
||||
xgmi_plpd (int, optional): Value override for args.xgmi_plpd. Defaults to None.
|
||||
|
||||
process_isolation (int, optional): Value override for args.process_isolation. Defaults to None.
|
||||
clear_sram_data (int, optional): Value override for args.clear_sram_data. Defaults to None.
|
||||
Raises:
|
||||
ValueError: Value error if no gpu value is provided
|
||||
IndexError: Index error if gpu list is empty
|
||||
@@ -3564,7 +3614,8 @@ class AMDSMICommands():
|
||||
# Check if a GPU argument has been set
|
||||
gpu_args_enabled = False
|
||||
gpu_attributes = ["fan", "perf_level", "profile", "perf_determinism", "compute_partition",
|
||||
"memory_partition", "power_cap", "dpm_policy", "xgmi_plpd"]
|
||||
"memory_partition", "power_cap", "dpm_policy", "xgmi_plpd", "process_isolation",
|
||||
"clear_sram_data"]
|
||||
for attr in gpu_attributes:
|
||||
if hasattr(args, attr):
|
||||
if getattr(args, attr) is not None:
|
||||
@@ -3620,7 +3671,8 @@ class AMDSMICommands():
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
|
||||
profile, perf_determinism, compute_partition,
|
||||
memory_partition, power_cap, dpm_policy, xgmi_plpd)
|
||||
memory_partition, power_cap, dpm_policy, xgmi_plpd,
|
||||
process_isolation, clear_sram_data)
|
||||
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
|
||||
if args.cpu == None and args.core == None:
|
||||
raise ValueError('No CPU or CORE provided, specific target(s) are needed')
|
||||
@@ -3639,7 +3691,8 @@ class AMDSMICommands():
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
|
||||
profile, perf_determinism, compute_partition,
|
||||
memory_partition, power_cap, dpm_policy, xgmi_plpd)
|
||||
memory_partition, power_cap, dpm_policy, xgmi_plpd,
|
||||
process_isolation, clear_sram_data)
|
||||
|
||||
|
||||
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
|
||||
@@ -3660,7 +3713,6 @@ class AMDSMICommands():
|
||||
compute_partition (bool, optional): Value override for args.compute_partition. Defaults to None.
|
||||
memory_partition (bool, optional): Value override for args.memory_partition. Defaults to None.
|
||||
power_cap (int, optional): Value override for args.power_cap. Defaults to None.
|
||||
|
||||
Raises:
|
||||
ValueError: Value error if no gpu value is provided
|
||||
IndexError: Index error if gpu list is empty
|
||||
|
||||
@@ -545,6 +545,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
board_help = "All board information"
|
||||
dpm_policy_help = "The available DPM policy"
|
||||
xgmi_plpd_help = "The available XGMI per-link power down policy"
|
||||
process_isolation_help = "The process isolation status"
|
||||
|
||||
# Options arguments help text for Hypervisors and Baremetal
|
||||
ras_help = "Displays RAS features information"
|
||||
@@ -586,6 +587,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
|
||||
static_parser.add_argument('-P', '--policy', action='store_true', required=False, help=dpm_policy_help)
|
||||
static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
|
||||
static_parser.add_argument('-R', '--process-isolation', action='store_true', required=False, help=process_isolation_help)
|
||||
|
||||
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
|
||||
static_parser.add_argument('-u', '--numa', action='store_true', required=False, help=numa_help)
|
||||
@@ -967,8 +969,9 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_compute_partition_help = f"Set one of the following the compute partition modes:\n\t{compute_partition_choices_str}"
|
||||
set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}"
|
||||
set_power_cap_help = "Set power capacity limit"
|
||||
set_dpm_policy_help = f"Set the GPU DPM policy using policy id\n"
|
||||
set_xgmi_plpd_help = f"Set the GPU XGMI per-link power down policy using policy id\n"
|
||||
set_dpm_policy_help = "Set the GPU DPM policy using policy id\n"
|
||||
set_xgmi_plpd_help = "Set the GPU XGMI per-link power down policy using policy id\n"
|
||||
set_process_isolation_help = "Enable or disable the GPU process isolation: 0 for disable and 1 for enable.\n"
|
||||
|
||||
# Help text for CPU set options
|
||||
set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value."
|
||||
@@ -982,6 +985,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_cpu_enable_apb_help = "Enables the DF p-state performance boost algorithm"
|
||||
set_cpu_disable_apb_help = "Disables the DF p-state performance boost algorithm. Input parameter is DFPstate (0-3)"
|
||||
set_soc_boost_limit_help = "Sets the boost limit for the given socket. Input parameter is socket BOOST_LIMIT value"
|
||||
run_gpu_clear_sram_data_help = f"Clear the GPU SRAM data\n"
|
||||
|
||||
# Help text for CPU Core set options
|
||||
set_core_boost_limit_help = "Sets the boost limit for the given core. Input parameter is core BOOST_LIMIT value"
|
||||
@@ -1006,6 +1010,8 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_parser.add_argument('-o', '--power-cap', action='store', type=self._positive_int, required=False, help=set_power_cap_help, metavar='WATTS')
|
||||
set_value_parser.add_argument('-p', '--dpm-policy', action='store', required=False, type=self._not_negative_int, help=set_dpm_policy_help, metavar='POLICY_ID')
|
||||
set_value_parser.add_argument('-x', '--xgmi-plpd', action='store', required=False, type=self._not_negative_int, help=set_xgmi_plpd_help, metavar='POLICY_ID')
|
||||
set_value_parser.add_argument('-R', '--process-isolation', action='store', choices=[0,1], type=self._not_negative_int, required=False, help=set_process_isolation_help, metavar='STATUS')
|
||||
set_value_parser.add_argument('-c', '--clear-sram-data', action='store_true', required=False, help=run_gpu_clear_sram_data_help)
|
||||
|
||||
if self.helpers.is_amd_hsmp_initialized():
|
||||
# Optional CPU Args
|
||||
|
||||
@@ -3455,6 +3455,68 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle,
|
||||
uint32_t plpd_id);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the status of the Process Isolation
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{guest_1vf}
|
||||
*
|
||||
* @details Given a processor handle @p processor_handle, this function will write
|
||||
* current process isolation status to @p pisolate. The 0 is the process isolation
|
||||
* disabled, and the 1 is the process isolation enabled.
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
*
|
||||
* @param[in, out] pisolate the process isolation status.
|
||||
* If this parameter is nullptr, this function will return
|
||||
* ::AMDSMI_STATUS_INVAL
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle processor_handle,
|
||||
uint32_t* pisolate);
|
||||
|
||||
/**
|
||||
* @brief Enable/disable the system Process Isolation
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{guest_1vf}
|
||||
*
|
||||
* @details Given a processor handle @p processor_handle and a process isolation @p pisolate,
|
||||
* flag, this function will set the Process Isolation for this processor. The 0 is the process
|
||||
* isolation disabled, and the 1 is the process isolation enabled.
|
||||
*
|
||||
* @note This function requires root access
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
*
|
||||
* @param[in] pisolate the process isolation status to set.
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle processor_handle,
|
||||
uint32_t pisolate);
|
||||
|
||||
/**
|
||||
* @brief Clear the GPU SRAM data
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{guest_1vf}
|
||||
*
|
||||
* @details Given a processor handle @p processor_handle, and a sclean flag @p sclean,
|
||||
* this function will clear the SRAM data of this processor. This can be called between
|
||||
* user logins to prevent information leak.
|
||||
*
|
||||
* @note This function requires root access
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
*
|
||||
* @param[in] sclean the clean flag. Only 1 will take effect and other number
|
||||
* are reserved for future usage.
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t amdsmi_set_gpu_clear_sram_data(amdsmi_processor_handle processor_handle,
|
||||
uint32_t sclean);
|
||||
|
||||
/** @} End PerfCont */
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -1963,6 +1963,98 @@ except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_get_gpu_process_isolation
|
||||
|
||||
Description: Get the status of the Process Isolation
|
||||
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` handle for the given device
|
||||
|
||||
Output: integer corresponding to isolation_status; 0 - disabled, 1 - enabled
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_get_gpu_process_isolation` function:
|
||||
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_processor_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
isolate = amdsmi_get_gpu_process_isolation(device)
|
||||
print("Process Isolation Status: ", isolate)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_set_gpu_process_isolation
|
||||
Description: Enable/disable the system Process Isolation for the given device handle.
|
||||
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` handle for the given device
|
||||
* `pisolate` the process isolation status to set. 0 is the process isolation disabled, and 1 is the process isolation enabled.
|
||||
|
||||
Output: None
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_set_gpu_process_isolation` function:
|
||||
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_processor_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_set_gpu_process_isolation(device, 1)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_set_gpu_clear_sram_data
|
||||
Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak.
|
||||
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` handle for the given device
|
||||
* `sclean` the clean flag. Only 1 will take effect and other number are reserved for future usage.
|
||||
|
||||
Output: None
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_set_gpu_clear_sram_data` function:
|
||||
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_processor_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_set_gpu_clear_sram_data(device, 1)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
|
||||
### amdsmi_get_gpu_overdrive_level
|
||||
|
||||
Description: Get the overdrive percent associated with the device with provided
|
||||
@@ -2602,6 +2694,75 @@ except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_get_dpm_policy
|
||||
|
||||
Description: Get dpm policy information.
|
||||
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` handle for the given device
|
||||
* `policy_id` the policy id to set.
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`num_supported` | total number of supported policies
|
||||
`current_id` | current policy id
|
||||
`policies` | list of dictionaries containing possible policies
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_get_dpm_policy` function:
|
||||
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_processor_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
dpm_policies = amdsmi_get_dpm_policy(device)
|
||||
print(dpm_policies)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_set_dpm_policy
|
||||
|
||||
Description: Set the dpm policy to corresponding policy_id. Typically following: 0(default),1,2,3
|
||||
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` handle for the given device
|
||||
* `policy_id` the policy id to set.
|
||||
|
||||
Output: None
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_set_dpm_policy` function:
|
||||
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
try:
|
||||
devices = amdsmi_get_processor_handles()
|
||||
if len(devices) == 0:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_set_dpm_policy(device, 0)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
### amdsmi_set_xgmi_plpd
|
||||
|
||||
Description: Set the xgmi per-link power down policy parameter for the processor
|
||||
|
||||
@@ -2734,6 +2734,7 @@ def amdsmi_set_clk_freq(
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_dpm_policy(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
policy_id: int,
|
||||
@@ -2748,6 +2749,7 @@ def amdsmi_set_dpm_policy(
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_xgmi_plpd(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
policy_id: int,
|
||||
@@ -2762,6 +2764,37 @@ def amdsmi_set_xgmi_plpd(
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_gpu_process_isolation(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
pisolate: int,
|
||||
):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_process_isolation(
|
||||
processor_handle, pisolate
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_gpu_clear_sram_data(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
sclean: int,
|
||||
):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_clear_sram_data(
|
||||
processor_handle, sclean
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def amdsmi_set_gpu_overdrive_level(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, overdrive_value: int
|
||||
):
|
||||
@@ -2793,6 +2826,7 @@ def amdsmi_get_gpu_bdf_id(processor_handle: amdsmi_wrapper.amdsmi_processor_hand
|
||||
|
||||
return bdfid.value
|
||||
|
||||
|
||||
def amdsmi_set_gpu_pci_bandwidth(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, bitmask: int
|
||||
) -> None:
|
||||
@@ -3089,7 +3123,6 @@ def amdsmi_set_gpu_od_volt_info(
|
||||
)
|
||||
|
||||
|
||||
|
||||
def amdsmi_get_gpu_fan_rpms(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, sensor_idx: int
|
||||
) -> int:
|
||||
@@ -3320,6 +3353,7 @@ def amdsmi_get_clk_freq(
|
||||
"frequency": list(freq.frequency)[: freq.num_supported - 1],
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_get_dpm_policy(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
) -> Dict[str, Any]:
|
||||
@@ -3351,6 +3385,7 @@ def amdsmi_get_dpm_policy(
|
||||
"policies": polices,
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_get_xgmi_plpd(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
) -> Dict[str, Any]:
|
||||
@@ -3382,6 +3417,25 @@ def amdsmi_get_xgmi_plpd(
|
||||
"plpds": polices,
|
||||
}
|
||||
|
||||
|
||||
def amdsmi_get_gpu_process_isolation(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
) -> int:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
pisolate = ctypes.c_uint32()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_gpu_process_isolation(
|
||||
processor_handle, ctypes.byref(pisolate)
|
||||
)
|
||||
)
|
||||
|
||||
return pisolate.value
|
||||
|
||||
|
||||
def amdsmi_get_gpu_od_volt_info(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
) -> Dict[str, Any]:
|
||||
|
||||
@@ -2076,6 +2076,15 @@ amdsmi_get_xgmi_plpd.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_
|
||||
amdsmi_set_xgmi_plpd = _libraries['libamd_smi.so'].amdsmi_set_xgmi_plpd
|
||||
amdsmi_set_xgmi_plpd.restype = amdsmi_status_t
|
||||
amdsmi_set_xgmi_plpd.argtypes = [amdsmi_processor_handle, uint32_t]
|
||||
amdsmi_get_gpu_process_isolation = _libraries['libamd_smi.so'].amdsmi_get_gpu_process_isolation
|
||||
amdsmi_get_gpu_process_isolation.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_process_isolation.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)]
|
||||
amdsmi_set_gpu_process_isolation = _libraries['libamd_smi.so'].amdsmi_set_gpu_process_isolation
|
||||
amdsmi_set_gpu_process_isolation.restype = amdsmi_status_t
|
||||
amdsmi_set_gpu_process_isolation.argtypes = [amdsmi_processor_handle, uint32_t]
|
||||
amdsmi_set_gpu_clear_sram_data = _libraries['libamd_smi.so'].amdsmi_set_gpu_clear_sram_data
|
||||
amdsmi_set_gpu_clear_sram_data.restype = amdsmi_status_t
|
||||
amdsmi_set_gpu_clear_sram_data.argtypes = [amdsmi_processor_handle, uint32_t]
|
||||
amdsmi_get_lib_version = _libraries['libamd_smi.so'].amdsmi_get_lib_version
|
||||
amdsmi_get_lib_version.restype = amdsmi_status_t
|
||||
amdsmi_get_lib_version.argtypes = [ctypes.POINTER(struct_amdsmi_version_t)]
|
||||
@@ -2589,7 +2598,7 @@ __all__ = \
|
||||
'amdsmi_get_gpu_pci_throughput', 'amdsmi_get_gpu_perf_level',
|
||||
'amdsmi_get_gpu_pm_metrics_info',
|
||||
'amdsmi_get_gpu_power_profile_presets',
|
||||
'amdsmi_get_gpu_process_list',
|
||||
'amdsmi_get_gpu_process_isolation', 'amdsmi_get_gpu_process_list',
|
||||
'amdsmi_get_gpu_ras_block_features_enabled',
|
||||
'amdsmi_get_gpu_ras_feature_info',
|
||||
'amdsmi_get_gpu_reg_table_info', 'amdsmi_get_gpu_revision',
|
||||
@@ -2646,18 +2655,19 @@ __all__ = \
|
||||
'amdsmi_set_cpu_socket_boostlimit',
|
||||
'amdsmi_set_cpu_socket_lclk_dpm_level',
|
||||
'amdsmi_set_cpu_socket_power_cap', 'amdsmi_set_cpu_xgmi_width',
|
||||
'amdsmi_set_dpm_policy', 'amdsmi_set_gpu_clk_range',
|
||||
'amdsmi_set_gpu_compute_partition',
|
||||
'amdsmi_set_dpm_policy', 'amdsmi_set_gpu_clear_sram_data',
|
||||
'amdsmi_set_gpu_clk_range', 'amdsmi_set_gpu_compute_partition',
|
||||
'amdsmi_set_gpu_event_notification_mask',
|
||||
'amdsmi_set_gpu_fan_speed', 'amdsmi_set_gpu_memory_partition',
|
||||
'amdsmi_set_gpu_od_clk_info', 'amdsmi_set_gpu_od_volt_info',
|
||||
'amdsmi_set_gpu_overdrive_level', 'amdsmi_set_gpu_pci_bandwidth',
|
||||
'amdsmi_set_gpu_perf_determinism_mode',
|
||||
'amdsmi_set_gpu_perf_level', 'amdsmi_set_gpu_power_profile',
|
||||
'amdsmi_set_power_cap', 'amdsmi_set_xgmi_plpd',
|
||||
'amdsmi_shut_down', 'amdsmi_smu_fw_version_t',
|
||||
'amdsmi_socket_handle', 'amdsmi_status_code_to_string',
|
||||
'amdsmi_status_t', 'amdsmi_stop_gpu_event_notification',
|
||||
'amdsmi_set_gpu_process_isolation', 'amdsmi_set_power_cap',
|
||||
'amdsmi_set_xgmi_plpd', 'amdsmi_shut_down',
|
||||
'amdsmi_smu_fw_version_t', 'amdsmi_socket_handle',
|
||||
'amdsmi_status_code_to_string', 'amdsmi_status_t',
|
||||
'amdsmi_stop_gpu_event_notification',
|
||||
'amdsmi_temp_range_refresh_rate_t', 'amdsmi_temperature_metric_t',
|
||||
'amdsmi_temperature_type_t', 'amdsmi_topo_get_link_type',
|
||||
'amdsmi_topo_get_link_weight', 'amdsmi_topo_get_numa_node_number',
|
||||
|
||||
@@ -3362,7 +3362,7 @@ rsmi_status_t rsmi_dev_dpm_policy_get(uint32_t dv_ind,
|
||||
*
|
||||
* @note This function requires root access
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] policy_id the dpm policy will be modified
|
||||
*
|
||||
@@ -3410,6 +3410,61 @@ rsmi_status_t rsmi_dev_xgmi_plpd_get(uint32_t dv_ind,
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_xgmi_plpd_set(uint32_t dv_ind,
|
||||
uint32_t plpd_id);
|
||||
|
||||
/**
|
||||
* @brief Get the status of the Process Isolation
|
||||
*
|
||||
* @details Given a device index @p dv_ind, this function will write
|
||||
* current process isolation status to @p pisolate. The 0 is the process isolation
|
||||
* disabled, and the 1 is the process isolation enabled.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in, out] pisolate the process isolation status.
|
||||
* If this parameter is nullptr, this function will return
|
||||
* ::RSMI_STATUS_INVAL
|
||||
*
|
||||
* @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_process_isolation_get(uint32_t dv_ind,
|
||||
uint32_t* pisolate);
|
||||
|
||||
/**
|
||||
* @brief Enable/disable the system Process Isolation
|
||||
*
|
||||
* @details Given a device index @p dv_ind and a process isolation @p pisolate,
|
||||
* flag, this function will set the Process Isolation for this device. The 0 is the process
|
||||
* isolation disabled, and the 1 is the process isolation enabled.
|
||||
*
|
||||
* @note This function requires root access
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] pisolate the process isolation status to set.
|
||||
*
|
||||
* @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind,
|
||||
uint32_t pisolate);
|
||||
|
||||
/**
|
||||
* @brief Clear the GPU SRAM data
|
||||
*
|
||||
*
|
||||
* @details Given a device index @p dv_ind, this function will clear the
|
||||
* GPU SRAM data of this device. This can be called between user logins to prevent information leak.
|
||||
*
|
||||
* @note This function requires root access
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] sclean the clean flag. Only 1 will take effect and other number
|
||||
* are reserved for future usage.
|
||||
*
|
||||
* @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_gpu_clear_sram_data(uint32_t dv_ind, uint32_t sclean);
|
||||
|
||||
/** @} */ // end of PerfCont
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -101,6 +101,8 @@ enum DevKFDNodePropTypes {
|
||||
|
||||
enum DevInfoTypes {
|
||||
kDevPerfLevel,
|
||||
kDevProcessIsolation,
|
||||
kDevShaderClean,
|
||||
kDevOverDriveLevel,
|
||||
kDevMemOverDriveLevel,
|
||||
kDevDevID,
|
||||
@@ -222,6 +224,7 @@ class Device {
|
||||
void set_drm_render_minor(uint32_t minor) {drm_render_minor_ = minor;}
|
||||
static rsmi_dev_perf_level perfLvlStrToEnum(std::string s);
|
||||
uint64_t bdfid(void) const {return bdfid_;}
|
||||
int get_partition_id() const {return (bdfid_ >> 28) & 0xf; } // location_id[31:28]
|
||||
void set_bdfid(uint64_t val) {bdfid_ = val;}
|
||||
pthread_mutex_t *mutex(void) {return mutex_.ptr;}
|
||||
evt::dev_evt_grp_set_t* supported_event_groups(void) {
|
||||
|
||||
@@ -1974,6 +1974,121 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
|
||||
}
|
||||
|
||||
|
||||
rsmi_status_t rsmi_dev_process_isolation_get(uint32_t dv_ind,
|
||||
uint32_t* pisolate) {
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << "| ======= start ======= dev_ind:"
|
||||
<< dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
CHK_SUPPORT_NAME_ONLY(pisolate)
|
||||
|
||||
// the enforce_isolation sysfs is in this format <partition_id, enable_flag>
|
||||
// Get the partition_id. For SPX, the partition_id will be 0.
|
||||
int partition_id = dev->get_partition_id();
|
||||
|
||||
DEVICE_MUTEX
|
||||
std::vector<std::string> val_vec;
|
||||
rsmi_status_t ret = GetDevValueVec(amd::smi::kDevProcessIsolation, dv_ind, &val_vec);
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||
<< ", GetDevValueVec() ret was RSMI_STATUS_FILE_ERROR "
|
||||
<< "-> reporting RSMI_STATUS_NOT_SUPPORTED";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||
<< ", GetDevValueVec() ret was not RSMI_STATUS_SUCCESS"
|
||||
<< " -> reporting " << amd::smi::getRSMIStatusString(ret);
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
For TPX system where partition0 is enabled, but partition1 and partition2 are disabled,
|
||||
it will be in this format:
|
||||
0 1
|
||||
1 0
|
||||
2 0
|
||||
*/
|
||||
|
||||
for (uint32_t i = 0; i < val_vec.size(); ++i) {
|
||||
// Get tokens: <integer> <integer>
|
||||
auto current_line = amd::smi::trim(val_vec[i]);
|
||||
std::vector<std::string> tokens;
|
||||
std::istringstream f(current_line);
|
||||
std::string s;
|
||||
while (getline(f, s, ' ')) {
|
||||
tokens.push_back(s);
|
||||
}
|
||||
int cur_part_id = 0;
|
||||
if (tokens.size() == 2) {
|
||||
if (amd::smi::stringToInteger(tokens[0], cur_part_id)) {
|
||||
if (cur_part_id == partition_id) {
|
||||
int isolate_status = 0;
|
||||
if (amd::smi::stringToInteger(tokens[1], isolate_status)) {
|
||||
*pisolate = isolate_status;
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
} else {
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||
<< ", the sysfs line " << current_line
|
||||
<< "should be in <integer> <integer> format";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end tokens.size()
|
||||
} // end for
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
|
||||
<< ", cannot find the partition_id " << partition_id
|
||||
<<" from sysfs";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_NOT_FOUND;
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind,
|
||||
uint32_t pisolate) {
|
||||
rsmi_status_t ret;
|
||||
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
// the enforce_isolation sysfs is in this format <partition_id, enable_flag>
|
||||
// The smi will always pass partition_id. For SPX, the partition_id will be 0.
|
||||
int partition_id = dev->get_partition_id();
|
||||
std::string value = std::to_string(partition_id) + " "+ std::to_string(pisolate);
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevProcessIsolation , value);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_gpu_clear_sram_data(uint32_t dv_ind,
|
||||
uint32_t sclean) {
|
||||
rsmi_status_t ret;
|
||||
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
DEVICE_MUTEX
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
std::string value = std::to_string(sclean);
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevShaderClean , value);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_dpm_policy_set(uint32_t dv_ind,
|
||||
uint32_t policy_id) {
|
||||
|
||||
@@ -82,6 +82,8 @@ static const char *kDevPCieVendorIDFName = "vendor";
|
||||
|
||||
// Device sysfs file names
|
||||
static const char *kDevPerfLevelFName = "power_dpm_force_performance_level";
|
||||
static const char *kDevProcessIsolationFName = "enforce_isolation";
|
||||
static const char *kDevShaderCleanFName = "run_cleaner_shader";
|
||||
static const char *kDevDevProdNameFName = "product_name";
|
||||
static const char *kDevDevProdNumFName = "product_number";
|
||||
static const char *kDevDevIDFName = "device";
|
||||
@@ -317,6 +319,8 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevGpuMetrics, kDevGpuMetricsFName},
|
||||
{kDevPmMetrics, kDevPmMetricsFName},
|
||||
{kDevDPMPolicy, kDevDPMPolicyFName},
|
||||
{kDevProcessIsolation, kDevProcessIsolationFName},
|
||||
{kDevShaderClean, kDevShaderCleanFName},
|
||||
{kDevRegMetrics, kDevRegMetricsFName},
|
||||
{kDevGpuReset, kDevGpuResetFName},
|
||||
{kDevAvailableComputePartition, kDevAvailableComputePartitionFName},
|
||||
@@ -475,6 +479,8 @@ Device::devInfoTypesStrings = {
|
||||
{kDevMemoryPartition, "kDevMemoryPartition"},
|
||||
{kDevPCieVendorID, "kDevPCieVendorID"},
|
||||
{kDevDPMPolicy, "kDevDPMPolicy"},
|
||||
{kDevProcessIsolation, "kDevProcessIsolation"},
|
||||
{kDevShaderClean, "kDevShaderClean"},
|
||||
};
|
||||
|
||||
static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
@@ -516,6 +522,9 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
{"rsmi_dev_perf_level_set", {{kDevPerfLevelFName}, {}}},
|
||||
{"rsmi_dev_perf_level_set_v1", {{kDevPerfLevelFName}, {}}},
|
||||
{"rsmi_dev_perf_level_get", {{kDevPerfLevelFName}, {}}},
|
||||
{"rsmi_dev_process_isolation_set", {{kDevProcessIsolationFName}, {}}},
|
||||
{"rsmi_dev_process_isolation_get", {{kDevProcessIsolationFName}, {}}},
|
||||
{"rsmi_dev_gpu_shader_clean", {{kDevShaderCleanFName}, {}}},
|
||||
{"rsmi_perf_determinism_mode_set", {{kDevPerfLevelFName,
|
||||
kDevPowerODVoltageFName}, {}}},
|
||||
{"rsmi_dev_overdrive_level_set", {{kDevOverDriveLevelFName}, {}}},
|
||||
@@ -939,6 +948,8 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
sysfs_path += kDevAttribNameMap.at(type);
|
||||
switch (type) {
|
||||
case kDevGPUMClk:
|
||||
case kDevProcessIsolation:
|
||||
case kDevShaderClean:
|
||||
case kDevDCEFClk:
|
||||
case kDevFClk:
|
||||
case kDevGPUSClk:
|
||||
@@ -1212,6 +1223,7 @@ int Device::readDevInfo(DevInfoTypes type, std::vector<std::string> *val) {
|
||||
|
||||
switch (type) {
|
||||
case kDevGPUMClk:
|
||||
case kDevProcessIsolation:
|
||||
case kDevGPUSClk:
|
||||
case kDevDCEFClk:
|
||||
case kDevFClk:
|
||||
@@ -1279,6 +1291,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevMemoryPartition:
|
||||
case kDevNumaNode:
|
||||
case kDevXGMIPhysicalID:
|
||||
case kDevProcessIsolation:
|
||||
return readDevInfoStr(type, val);
|
||||
break;
|
||||
|
||||
|
||||
@@ -1385,6 +1385,30 @@ amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
|
||||
reinterpret_cast<rsmi_dpm_policy_t*>(policy));
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_get_gpu_process_isolation(amdsmi_processor_handle processor_handle,
|
||||
uint32_t* pisolate) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
return rsmi_wrapper(rsmi_dev_process_isolation_get, processor_handle,
|
||||
pisolate);
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle processor_handle,
|
||||
uint32_t pisolate) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
return rsmi_wrapper(rsmi_dev_process_isolation_set, processor_handle,
|
||||
pisolate);
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_set_gpu_clear_sram_data(amdsmi_processor_handle processor_handle,
|
||||
uint32_t sclean) {
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
return rsmi_wrapper(rsmi_dev_gpu_clear_sram_data, processor_handle,
|
||||
sclean);
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_memory_reserved_pages(amdsmi_processor_handle processor_handle,
|
||||
uint32_t *num_pages,
|
||||
|
||||
Reference in New Issue
Block a user