diff --git a/projects/amdsmi/amdsmi_cli/README.md b/projects/amdsmi/amdsmi_cli/README.md
index 3273f8077f..f9c0c06766 100644
--- a/projects/amdsmi/amdsmi_cli/README.md
+++ b/projects/amdsmi/amdsmi_cli/README.md
@@ -280,7 +280,7 @@ usage: amd-smi metric [-h] [-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE
[--core-curr-active-freq-core-limit] [--core-energy]
[--json | --csv] [--file FILE] [--loglevel LEVEL]
-If no GPU is specified, returns metric information for all GPUs on the system.
+If no GPU is specified, returns metric information for all GPUs on the system.
If no metric argument is provided all metric information will be displayed.
Metric arguments:
@@ -325,16 +325,16 @@ CPU Arguments:
--cpu-c0-res Displays C0 residency
--cpu-lclk-dpm-level NBIOID Displays lclk dpm level range. Requires socket ID and NBOID as inputs
--cpu-pwr-svi-telemtry-rails Displays svi based telemetry for all rails
- --cpu-io-bandwidth IO_BW LINKID_NAME Displays current IO bandwidth for the selected CPU.
- input parameters are bandwidth type(1) and link ID encodings
+ --cpu-io-bandwidth IO_BW LINKID_NAME Displays current IO bandwidth for the selected CPU.
+ input parameters are bandwidth type(1) and link ID encodings
i.e. P2, P3, G0 - G7
- --cpu-xgmi-bandwidth XGMI_BW LINKID_NAME Displays current XGMI bandwidth for the selected CPU
- input parameters are bandwidth type(1,2,4) and link ID encodings
+ --cpu-xgmi-bandwidth XGMI_BW LINKID_NAME Displays current XGMI bandwidth for the selected CPU
+ input parameters are bandwidth type(1,2,4) and link ID encodings
i.e. P2, P3, G0 - G7
--cpu-metrics-ver Displays metrics table version
--cpu-metrics-table Displays metric table
--cpu-socket-energy Displays socket energy for the selected CPU socket
- --cpu-ddr-bandwidth Displays per socket max ddr bw, current utilized bw,
+ --cpu-ddr-bandwidth Displays per socket max ddr bw, current utilized bw,
and current utilized ddr bw in percentage
--cpu-temp Displays cpu socket temperature
--cpu-dimm-temp-range-rate DIMM_ADDR Displays dimm temperature range and refresh rate
@@ -437,7 +437,7 @@ usage: amd-smi topology [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL]
[-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]] [-a]
[-w] [-o] [-t] [-b]
-If no GPU is specified, returns information for all GPUs on the system.
+If no GPU is specified, returns information for all GPUs on the system.
If no topology argument is provided all topology information will be displayed.
Topology arguments:
@@ -483,7 +483,7 @@ usage: amd-smi set [-h] (-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...
[--core-boost-limit BOOST_LIMIT] [--json | --csv] [--file FILE]
[--loglevel LEVEL]
-A GPU must be specified to set a configuration.
+A GPU must be specified to set a configuration.
A set argument must be provided; Multiple set arguments are accepted
Set Arguments:
@@ -513,11 +513,12 @@ Set Arguments:
NPS1, NPS2, NPS4, NPS8
-o, --power-cap WATTS Set power capacity limit
-p, --dpm-policy POLICY_ID Set the GPU DPM policy using policy id
+ -x, --xgmi-plpd POLICY_ID Set the GPU XGMI per-link power down policy using policy id
CPU Arguments:
--cpu-pwr-limit PWR_LIMIT Set power limit for the given socket. Input parameter is power limit value.
--cpu-xgmi-link-width MIN_WIDTH MAX_WIDTH Set max and Min linkwidth. Input parameters are min and max link width values
- --cpu-lclk-dpm-level NBIOID MIN_DPM MAX_DPM Sets the max and min dpm level on a given NBIO.
+ --cpu-lclk-dpm-level NBIOID MIN_DPM MAX_DPM Sets the max and min dpm level on a given NBIO.
Input parameters are die_index, min dpm, max dpm.
--cpu-pwr-eff-mode MODE Sets the power efficency mode policy. Input parameter is mode.
--cpu-gmi3-link-width MIN_LW MAX_LW Sets max and min gmi3 link width range
@@ -675,7 +676,7 @@ GPU: 0
PARTITION:
COMPUTE_PARTITION: SPX
MEMORY_PARTITION: NPS1
- POLICY:
+ DPM_POLICY:
NUM_SUPPORTED: 4
CURRENT_ID: 1
POLICIES:
@@ -687,6 +688,16 @@ GPU: 0
POLICY_DESCRIPTION: soc_pstate_1
POLICY_ID: 3
POLICY_DESCRIPTION: soc_pstate_2
+ XGMI_PLPD:
+ NUM_SUPPORTED: 3
+ CURRENT_ID: 1
+ PLPDS:
+ POLICY_ID: 0
+ POLICY_DESCRIPTION: plpd_disallow
+ POLICY_ID: 1
+ POLICY_DESCRIPTION: plpd_default
+ POLICY_ID: 2
+ POLICY_DESCRIPTION: plpd_optimized
NUMA:
NODE: 0
AFFINITY: 0
@@ -783,7 +794,7 @@ GPU: 1
PARTITION:
COMPUTE_PARTITION: SPX
MEMORY_PARTITION: NPS1
- POLICY:
+ DPM_POLICY:
NUM_SUPPORTED: 4
CURRENT_ID: 1
POLICIES:
@@ -795,6 +806,16 @@ GPU: 1
POLICY_DESCRIPTION: soc_pstate_1
POLICY_ID: 3
POLICY_DESCRIPTION: soc_pstate_2
+ XGMI_PLPD:
+ NUM_SUPPORTED: 3
+ CURRENT_ID: 1
+ PLPDS:
+ POLICY_ID: 0
+ POLICY_DESCRIPTION: plpd_disallow
+ POLICY_ID: 1
+ POLICY_DESCRIPTION: plpd_default
+ POLICY_ID: 2
+ POLICY_DESCRIPTION: plpd_optimized
NUMA:
NODE: 1
AFFINITY: 1
@@ -891,7 +912,7 @@ GPU: 2
PARTITION:
COMPUTE_PARTITION: SPX
MEMORY_PARTITION: NPS1
- POLICY:
+ DPM_POLICY:
NUM_SUPPORTED: 4
CURRENT_ID: 1
POLICIES:
@@ -903,6 +924,16 @@ GPU: 2
POLICY_DESCRIPTION: soc_pstate_1
POLICY_ID: 3
POLICY_DESCRIPTION: soc_pstate_2
+ XGMI_PLPD:
+ NUM_SUPPORTED: 3
+ CURRENT_ID: 1
+ PLPDS:
+ POLICY_ID: 0
+ POLICY_DESCRIPTION: plpd_disallow
+ POLICY_ID: 1
+ POLICY_DESCRIPTION: plpd_default
+ POLICY_ID: 2
+ POLICY_DESCRIPTION: plpd_optimized
NUMA:
NODE: 2
AFFINITY: 2
@@ -999,7 +1030,7 @@ GPU: 3
PARTITION:
COMPUTE_PARTITION: SPX
MEMORY_PARTITION: NPS1
- POLICY:
+ DPM_POLICY:
NUM_SUPPORTED: 4
CURRENT_ID: 1
POLICIES:
@@ -1011,6 +1042,16 @@ GPU: 3
POLICY_DESCRIPTION: soc_pstate_1
POLICY_ID: 3
POLICY_DESCRIPTION: soc_pstate_2
+ XGMI_PLPD:
+ NUM_SUPPORTED: 3
+ CURRENT_ID: 1
+ PLPDS:
+ POLICY_ID: 0
+ POLICY_DESCRIPTION: plpd_disallow
+ POLICY_ID: 1
+ POLICY_DESCRIPTION: plpd_default
+ POLICY_ID: 2
+ POLICY_DESCRIPTION: plpd_optimized
NUMA:
NODE: 3
AFFINITY: 3
diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
index dbdc16acb3..689b3fa55f 100644
--- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
+++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py
@@ -244,7 +244,8 @@ class AMDSMICommands():
def static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None,
limit=None, driver=None, ras=None, board=None, numa=None, vram=None,
- cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None, policy=None):
+ cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None,
+ policy=None, xgmi_plpd=None):
"""Get Static information for target gpu
Args:
@@ -268,6 +269,7 @@ class AMDSMICommands():
fb_info (bool, optional): Value override for args.fb_info. Defaults to None.
num_vf (bool, optional): Value override for args.num_vf. Defaults to None.
policy (bool, optional): Value override for args.policy. Defaults to None.
+ xgmi_plpd (bool, optional): Value override for args.xgmi_plpd. Defaults to None.
Returns:
None: Print output via AMDSMILogger to destination
"""
@@ -302,8 +304,10 @@ class AMDSMICommands():
args.limit = limit
if policy:
args.policy = policy
- current_platform_args += ["ras", "limit", "partition", "policy"]
- current_platform_values += [args.ras, args.limit, args.partition, args.policy]
+ if xgmi_plpd:
+ args.xgmi_plpd = xgmi_plpd
+ current_platform_args += ["ras", "limit", "partition", "policy", "xgmi_plpd"]
+ current_platform_values += [args.ras, args.limit, args.partition, args.policy, args.xgmi_plpd]
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
if numa:
@@ -630,6 +634,15 @@ class AMDSMICommands():
logging.debug("Failed to get policy info for gpu %s | %s", gpu_id, e.get_error_info())
static_dict['dpm_policy'] = policy_info
+ if 'xgmi_plpd' in current_platform_args:
+ if args.xgmi_plpd:
+ try:
+ policy_info = amdsmi_interface.amdsmi_get_xgmi_plpd(args.gpu)
+ except amdsmi_exception.AmdSmiLibraryException as e:
+ policy_info = "N/A"
+ logging.debug("Failed to get xgmi_plpd info for gpu %s | %s", gpu_id, e.get_error_info())
+
+ static_dict['xgmi_plpd'] = policy_info
if 'numa' in current_platform_args:
if args.numa:
try:
@@ -766,7 +779,7 @@ class AMDSMICommands():
bus=None, vbios=None, limit=None, driver=None, ras=None,
board=None, numa=None, vram=None, cache=None, partition=None,
dfc_ucode=None, fb_info=None, num_vf=None, cpu=None,
- interface_ver=None, policy=None):
+ interface_ver=None, policy=None, xgmi_plpd = None):
"""Get Static information for target gpu and cpu
Args:
@@ -790,6 +803,7 @@ class AMDSMICommands():
cpu (cpu_handle, optional): cpu_handle for target device. Defaults to None.
interface_ver (bool, optional): Value override for args.interface_ver. Defaults to None
policy (bool, optional): Value override for args.policy. Defaults to None.
+ xgmi_plpd (bool, optional): Value override for args.xgmi_plpd. Defaults to None.
Raises:
IndexError: Index error if gpu list is empty
@@ -815,7 +829,7 @@ class AMDSMICommands():
gpu_args_enabled = False
gpu_attributes = ["asic", "bus", "vbios", "limit", "driver", "ras",
"board", "numa", "vram", "cache", "partition",
- "dfc_ucode", "fb_info", "num_vf", "policy"]
+ "dfc_ucode", "fb_info", "num_vf", "policy", "xgmi_plpd"]
for attr in gpu_attributes:
if hasattr(args, attr):
if getattr(args, attr):
@@ -859,7 +873,7 @@ class AMDSMICommands():
self.static_gpu(args, multiple_devices, gpu, asic,
bus, vbios, limit, driver, ras,
board, numa, vram, cache, partition,
- dfc_ucode, fb_info, num_vf, policy)
+ dfc_ucode, fb_info, num_vf, policy, xgmi_plpd)
def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True):
@@ -3090,7 +3104,7 @@ class AMDSMICommands():
def set_gpu(self, args, multiple_devices=False, gpu=None, fan=None, perf_level=None,
profile=None, perf_determinism=None, compute_partition=None,
- memory_partition=None, power_cap=None, dpm_policy=None):
+ memory_partition=None, power_cap=None, dpm_policy=None, xgmi_plpd = None):
"""Issue reset commands to target gpu(s)
Args:
@@ -3105,6 +3119,7 @@ class AMDSMICommands():
memory_partition (amdsmi_interface.AmdSmiMemoryPartitionType, optional): Value override for args.memory_partition. Defaults to None.
power_cap (int, optional): Value override for args.power_cap. Defaults to None.
dpm_policy (int, optional): Value override for args.dpm_policy. Defaults to None.
+ xgmi_plpd (int, optional): Value override for args.xgmi_plpd. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -3132,6 +3147,8 @@ class AMDSMICommands():
args.power_cap = power_cap
if dpm_policy:
args.dpm_policy = dpm_policy
+ if xgmi_plpd:
+ args.xgmi_plpd = xgmi_plpd
# Handle No GPU passed
if args.gpu == None:
raise ValueError('No GPU provided, specific GPU target(s) are needed')
@@ -3151,7 +3168,8 @@ class AMDSMICommands():
args.memory_partition,
args.perf_determinism is not None,
args.power_cap,
- args.dpm_policy]):
+ args.dpm_policy,
+ args.xgmi_plpd]):
command = " ".join(sys.argv[1:])
raise AmdSmiRequiredCommandException(command, self.logger.format)
@@ -3225,6 +3243,15 @@ class AMDSMICommands():
raise ValueError(f"Unable to set dpm policy to {args.dpm_policy} on {gpu_string}") from e
self.logger.store_output(args.gpu, 'dpmpolicy', f"Successfully set dpm policy to id {args.dpm_policy}")
+ if args.xgmi_plpd:
+ try:
+ amdsmi_interface.amdsmi_set_xgmi_plpd(args.gpu, args.xgmi_plpd)
+ except amdsmi_exception.AmdSmiLibraryException as e:
+ if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
+ raise PermissionError('Command requires elevation') from e
+ raise ValueError(f"Unable to set XGMI policy to {args.xgmi_plpd} on {gpu_string}") from e
+ self.logger.store_output(args.gpu, 'xgmiplpd', f"Successfully set per-link power down policy to id {args.dpm_policy}")
+
if isinstance(args.power_cap, int):
try:
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
@@ -3264,7 +3291,7 @@ class AMDSMICommands():
cpu=None, cpu_pwr_limit=None, cpu_xgmi_link_width=None, cpu_lclk_dpm_level=None,
cpu_pwr_eff_mode=None, cpu_gmi3_link_width=None, cpu_pcie_link_rate=None,
cpu_df_pstate_range=None, cpu_enable_apb=None, cpu_disable_apb=None,
- soc_boost_limit=None, core=None, core_boost_limit=None, dpm_policy=None):
+ soc_boost_limit=None, core=None, core_boost_limit=None, dpm_policy=None, xgmi_plpd=None):
"""Issue reset commands to target gpu(s)
Args:
@@ -3294,6 +3321,7 @@ class AMDSMICommands():
core (device_handle, optional): device_handle for target core. Defaults to None.
core_boost_limit (int, optional): Value override for args.core_boost_limit. Defaults to None
dpm_policy (int, optional): Value override for args.dpm_policy. Defaults to None.
+ xgmi_plpd (int, optional): Value override for args.xgmi_plpd. Defaults to None.
Raises:
ValueError: Value error if no gpu value is provided
@@ -3314,7 +3342,7 @@ class AMDSMICommands():
# Check if a GPU argument has been set
gpu_args_enabled = False
gpu_attributes = ["fan", "perf_level", "profile", "perf_determinism", "compute_partition",
- "memory_partition", "power_cap", "dpm_policy"]
+ "memory_partition", "power_cap", "dpm_policy", "xgmi_plpd"]
for attr in gpu_attributes:
if hasattr(args, attr):
if getattr(args, attr) is not None:
@@ -3370,7 +3398,7 @@ class AMDSMICommands():
self.logger.clear_multiple_devices_ouput()
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
profile, perf_determinism, compute_partition,
- memory_partition, power_cap, dpm_policy)
+ memory_partition, power_cap, dpm_policy, xgmi_plpd)
elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized
if args.cpu == None and args.core == None:
raise ValueError('No CPU or CORE provided, specific target(s) are needed')
@@ -3389,7 +3417,7 @@ class AMDSMICommands():
self.logger.clear_multiple_devices_ouput()
self.set_gpu(args, multiple_devices, gpu, fan, perf_level,
profile, perf_determinism, compute_partition,
- memory_partition, power_cap, dpm_policy)
+ memory_partition, power_cap, dpm_policy, xgmi_plpd)
def reset(self, args, multiple_devices=False, gpu=None, gpureset=None,
diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py
index 5341b27486..adaa91c34e 100644
--- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py
+++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py
@@ -544,6 +544,7 @@ class AMDSMIParser(argparse.ArgumentParser):
cache_help = "All cache information"
board_help = "All board information"
dpm_policy_help = "The available DPM policy"
+ xgmi_plpd_help = "The available XGMI per-link power down policy"
# Options arguments help text for Hypervisors and Baremetal
ras_help = "Displays RAS features information"
@@ -584,6 +585,7 @@ class AMDSMIParser(argparse.ArgumentParser):
static_parser.add_argument('-p', '--partition', action='store_true', required=False, help=partition_help)
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
static_parser.add_argument('-P', '--policy', action='store_true', required=False, help=dpm_policy_help)
+ static_parser.add_argument('-x', '--xgmi-plpd', action='store_true', required=False, help=xgmi_plpd_help)
if self.helpers.is_linux() and not self.helpers.is_virtual_os():
static_parser.add_argument('-u', '--numa', action='store_true', required=False, help=numa_help)
@@ -966,6 +968,7 @@ class AMDSMIParser(argparse.ArgumentParser):
set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}"
set_power_cap_help = "Set power capacity limit"
set_dpm_policy_help = f"Set the GPU DPM policy using policy id\n"
+ set_xgmi_plpd_help = f"Set the GPU XGMI per-link power down policy using policy id\n"
# Help text for CPU set options
set_cpu_pwr_limit_help = "Set power limit for the given socket. Input parameter is power limit value."
@@ -1002,6 +1005,7 @@ class AMDSMIParser(argparse.ArgumentParser):
set_value_parser.add_argument('-M', '--memory-partition', action='store', choices=self.helpers.get_memory_partition_types(), type=str.upper, required=False, help=set_memory_partition_help, metavar='PARTITION')
set_value_parser.add_argument('-o', '--power-cap', action='store', type=self._positive_int, required=False, help=set_power_cap_help, metavar='WATTS')
set_value_parser.add_argument('-p', '--dpm-policy', action='store', required=False, type=self._not_negative_int, help=set_dpm_policy_help, metavar='POLICY_ID')
+ set_value_parser.add_argument('-x', '--xgmi-plpd', action='store', required=False, type=self._not_negative_int, help=set_xgmi_plpd_help, metavar='POLICY_ID')
if self.helpers.is_amd_hsmp_initialized():
# Optional CPU Args
diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h
index 861709b98d..64bdb1253a 100644
--- a/projects/amdsmi/include/amd_smi/amdsmi.h
+++ b/projects/amdsmi/include/amd_smi/amdsmi.h
@@ -3405,6 +3405,49 @@ amdsmi_status_t amdsmi_get_dpm_policy(amdsmi_processor_handle processor_handle,
*/
amdsmi_status_t amdsmi_set_dpm_policy(amdsmi_processor_handle processor_handle,
uint32_t policy_id);
+
+/**
+ * @brief Get the xgmi per-link power down policy parameter for the processor
+ *
+ * @platform{gpu_bm_linux}
+ *
+ * @details Given a processor handle @p processor_handle, this function will write
+ * current xgmi plpd settings to @p policy. All the processors at the same socket
+ * will have the same policy.
+ *
+ * @param[in] processor_handle a processor handle
+ *
+ * @param[in, out] policy the xgmi plpd for this processor.
+ * If this parameter is nullptr, this function will return
+ * ::AMDSMI_STATUS_INVAL
+ *
+ * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
+ */
+amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
+ amdsmi_dpm_policy_t* xgmi_plpd);
+
+/**
+ * @brief Set the xgmi per-link power down policy parameter for the processor
+ *
+ * @platform{gpu_bm_linux}
+ *
+ * @details Given a processor handle @p processor_handle and a dpm policy @p plpd_id,
+ * this function will set the xgmi plpd for this processor. All the processors at
+ * the same socket will be set to the same policy.
+ *
+ * @note This function requires root access
+ *
+ * @param[in] processor_handle a processor handle
+ *
+ * @param[in] xgmi_plpd_id the xgmi plpd id to set. The id is the id in
+ * amdsmi_dpm_policy_entry_t, which can be obtained by calling
+ * amdsmi_get_xgmi_plpd()
+ *
+ * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
+ */
+amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle,
+ uint32_t plpd_id);
+
/** @} End PerfCont */
/*****************************************************************************/
diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md
index 7d9fd5908e..82f8ca974a 100644
--- a/projects/amdsmi/py-interface/README.md
+++ b/projects/amdsmi/py-interface/README.md
@@ -909,8 +909,8 @@ Field | Description
`name` | Name of process
`pid` | Process ID
`mem` | Process memory usage
-`engine_usage`|
| Subfield | Description |
| `gfx` | GFX engine usage in ns |
| `enc` | Encode engine usage in ns |
-`memory_usage`| | Subfield | Description |
| `gtt_mem` | GTT memory usage |
| `cpu_mem` | CPU memory usage |
| `vram_mem` | VRAM memory usage |
+`engine_usage` | | Subfield | Description |
| `gfx` | GFX engine usage in ns |
| `enc` | Encode engine usage in ns |
+`memory_usage` | | Subfield | Description |
| `gtt_mem` | GTT memory usage |
| `cpu_mem` | CPU memory usage |
| `vram_mem` | VRAM memory usage |
Exceptions that can be thrown by `amdsmi_get_gpu_process_info` function:
@@ -2612,6 +2612,74 @@ except AmdSmiException as e:
print(e)
```
+### amdsmi_set_xgmi_plpd
+
+Description: Set the xgmi per-link power down policy parameter for the processor
+
+Input parameters:
+
+* `processor_handle` handle for the given device
+* `policy_id` the xgmi plpd id to set.
+
+Output: None
+
+Exceptions that can be thrown by `amdsmi_set_xgmi_plpd` function:
+
+* `AmdSmiLibraryException`
+* `AmdSmiRetryException`
+* `AmdSmiParameterException`
+
+Example:
+
+```python
+try:
+ devices = amdsmi_get_processor_handles()
+ if len(devices) == 0:
+ print("No GPUs on machine")
+ else:
+ for device in devices:
+ amdsmi_set_xgmi_plpd(device, 0)
+except AmdSmiException as e:
+ print(e)
+```
+
+### amdsmi_get_xgmi_plpd
+
+Description: Get the xgmi per-link power down policy parameter for the processor
+
+Input parameters:
+
+* `processor_handle` handle for the given device
+
+Output: Dict containing information about xgmi per-link power down policy
+
+Field | Description
+---|---
+`num_supported` | The number of supported policies
+`current_id` | The current policy index
+`plpds` | List of policies.
+
+Exceptions that can be thrown by `amdsmi_get_xgmi_plpd` function:
+
+* `AmdSmiLibraryException`
+* `AmdSmiRetryException`
+* `AmdSmiParameterException`
+
+Example:
+
+```python
+try:
+ devices = amdsmi_get_processor_handles()
+ if len(devices) == 0:
+ print("No GPUs on machine")
+ else:
+ for device in devices:
+ xgmi_plpd = amdsmi_get_xgmi_plpd(device)
+ print(xgmi_plpd)
+except AmdSmiException as e:
+ print(e)
+```
+
### amdsmi_set_gpu_overdrive_level
Description: **deprecated** Set the overdrive percent associated with the
diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py
index e27451dab4..c9e773b88f 100644
--- a/projects/amdsmi/py-interface/amdsmi_interface.py
+++ b/projects/amdsmi/py-interface/amdsmi_interface.py
@@ -2746,6 +2746,20 @@ def amdsmi_set_dpm_policy(
)
)
+def amdsmi_set_xgmi_plpd(
+ processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
+ policy_id: int,
+):
+ if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
+ raise AmdSmiParameterException(
+ processor_handle, amdsmi_wrapper.amdsmi_processor_handle
+ )
+ _check_res(
+ amdsmi_wrapper.amdsmi_set_xgmi_plpd(
+ processor_handle, policy_id
+ )
+ )
+
def amdsmi_set_gpu_overdrive_level(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, overdrive_value: int
):
@@ -3335,6 +3349,37 @@ def amdsmi_get_dpm_policy(
"policies": polices,
}
+def amdsmi_get_xgmi_plpd(
+ processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
+) -> Dict[str, Any]:
+ if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
+ raise AmdSmiParameterException(
+ processor_handle, amdsmi_wrapper.amdsmi_processor_handle
+ )
+
+ policy = amdsmi_wrapper.amdsmi_dpm_policy_t()
+ _check_res(
+ amdsmi_wrapper.amdsmi_get_xgmi_plpd(
+ processor_handle, ctypes.byref(policy)
+ )
+ )
+
+ polices = []
+ for i in range(0, policy.num_supported):
+ id = policy.policies[i].policy_id
+ desc = policy.policies[i].policy_description
+ polices.append({
+ 'policy_id' : id,
+ 'policy_description': desc.decode()
+ })
+ current_id = policy.policies[policy.current].policy_id
+
+ return {
+ "num_supported": policy.num_supported,
+ "current_id": current_id,
+ "plpds": polices,
+ }
+
def amdsmi_get_gpu_od_volt_info(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
) -> Dict[str, Any]:
diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py
index f718dcfa87..13cd2062ac 100644
--- a/projects/amdsmi/py-interface/amdsmi_wrapper.py
+++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py
@@ -746,19 +746,6 @@ amdsmi_card_form_factor_t = ctypes.c_uint32 # enum
class struct_amdsmi_pcie_info_t(Structure):
pass
-class struct_pcie_static_(Structure):
- pass
-
-struct_pcie_static_._pack_ = 1 # source:False
-struct_pcie_static_._fields_ = [
- ('max_pcie_width', ctypes.c_uint16),
- ('PADDING_0', ctypes.c_ubyte * 2),
- ('max_pcie_speed', ctypes.c_uint32),
- ('pcie_interface_version', ctypes.c_uint32),
- ('slot_type', amdsmi_card_form_factor_t),
- ('reserved', ctypes.c_uint64 * 10),
-]
-
class struct_pcie_metric_(Structure):
pass
@@ -777,6 +764,19 @@ struct_pcie_metric_._fields_ = [
('reserved', ctypes.c_uint64 * 13),
]
+class struct_pcie_static_(Structure):
+ pass
+
+struct_pcie_static_._pack_ = 1 # source:False
+struct_pcie_static_._fields_ = [
+ ('max_pcie_width', ctypes.c_uint16),
+ ('PADDING_0', ctypes.c_ubyte * 2),
+ ('max_pcie_speed', ctypes.c_uint32),
+ ('pcie_interface_version', ctypes.c_uint32),
+ ('slot_type', amdsmi_card_form_factor_t),
+ ('reserved', ctypes.c_uint64 * 10),
+]
+
struct_amdsmi_pcie_info_t._pack_ = 1 # source:False
struct_amdsmi_pcie_info_t._fields_ = [
('pcie_static', struct_pcie_static_),
@@ -2058,6 +2058,12 @@ amdsmi_get_dpm_policy.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct
amdsmi_set_dpm_policy = _libraries['libamd_smi.so'].amdsmi_set_dpm_policy
amdsmi_set_dpm_policy.restype = amdsmi_status_t
amdsmi_set_dpm_policy.argtypes = [amdsmi_processor_handle, uint32_t]
+amdsmi_get_xgmi_plpd = _libraries['libamd_smi.so'].amdsmi_get_xgmi_plpd
+amdsmi_get_xgmi_plpd.restype = amdsmi_status_t
+amdsmi_get_xgmi_plpd.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_dpm_policy_t)]
+amdsmi_set_xgmi_plpd = _libraries['libamd_smi.so'].amdsmi_set_xgmi_plpd
+amdsmi_set_xgmi_plpd.restype = amdsmi_status_t
+amdsmi_set_xgmi_plpd.argtypes = [amdsmi_processor_handle, uint32_t]
amdsmi_get_lib_version = _libraries['libamd_smi.so'].amdsmi_get_lib_version
amdsmi_get_lib_version.restype = amdsmi_status_t
amdsmi_get_lib_version.argtypes = [ctypes.POINTER(struct_amdsmi_version_t)]
@@ -2594,8 +2600,9 @@ __all__ = \
'amdsmi_get_processor_info', 'amdsmi_get_processor_type',
'amdsmi_get_socket_handles', 'amdsmi_get_socket_info',
'amdsmi_get_temp_metric', 'amdsmi_get_utilization_count',
- 'amdsmi_get_xgmi_info', 'amdsmi_gpu_block_t',
- 'amdsmi_gpu_cache_info_t', 'amdsmi_gpu_control_counter',
+ 'amdsmi_get_xgmi_info', 'amdsmi_get_xgmi_plpd',
+ 'amdsmi_gpu_block_t', 'amdsmi_gpu_cache_info_t',
+ 'amdsmi_gpu_control_counter',
'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter',
'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_metrics_t',
'amdsmi_gpu_read_counter', 'amdsmi_gpu_xgmi_error_status',
@@ -2636,10 +2643,10 @@ __all__ = \
'amdsmi_set_gpu_overdrive_level', 'amdsmi_set_gpu_pci_bandwidth',
'amdsmi_set_gpu_perf_determinism_mode',
'amdsmi_set_gpu_perf_level', 'amdsmi_set_gpu_power_profile',
- 'amdsmi_set_power_cap', 'amdsmi_shut_down',
- 'amdsmi_smu_fw_version_t', 'amdsmi_socket_handle',
- 'amdsmi_status_code_to_string', 'amdsmi_status_t',
- 'amdsmi_stop_gpu_event_notification',
+ 'amdsmi_set_power_cap', 'amdsmi_set_xgmi_plpd',
+ 'amdsmi_shut_down', 'amdsmi_smu_fw_version_t',
+ 'amdsmi_socket_handle', 'amdsmi_status_code_to_string',
+ 'amdsmi_status_t', 'amdsmi_stop_gpu_event_notification',
'amdsmi_temp_range_refresh_rate_t', 'amdsmi_temperature_metric_t',
'amdsmi_temperature_type_t', 'amdsmi_topo_get_link_type',
'amdsmi_topo_get_link_weight', 'amdsmi_topo_get_numa_node_number',
diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h
index 1265421355..e10ab49b34 100755
--- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h
+++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h
@@ -3364,6 +3364,45 @@ rsmi_status_t rsmi_dev_dpm_policy_get(uint32_t dv_ind,
rsmi_status_t rsmi_dev_dpm_policy_set(uint32_t dv_ind,
uint32_t policy_id);
+/**
+ * @brief Get the xgmi per-link power down policy parameter for a device
+ *
+ *
+ * @details Given a device index @p dv_ind, this function will write
+ * current xgmi plpd settings to @p xgmi_plpd. All the processors at the same socket
+ * will have the same policy.
+ *
+ * @param[in] dv_ind a device index
+ *
+ * @param[in, out] xgmi_plpd the xgmi_plpd policy for this device.
+ * If this parameter is nullptr, this function will return
+ * ::RSMI_STATUS_INVAL
+ *
+ * @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail
+ */
+rsmi_status_t rsmi_dev_xgmi_plpd_get(uint32_t dv_ind,
+ rsmi_dpm_policy_t* xgmi_plpd);
+
+/**
+ * @brief Set the xgmi per-link power down policy parameter for a device
+ *
+ *
+ * @details Given a device index @p dv_ind, and a dpm policy @p plpd_id,
+ * this function will set the xgmi plpd for this processor. All the processors at
+ * the same socket will be set to the same policy.
+ *
+ * @note This function requires root access
+ *
+ * @param[in] processor_handle a processor handle
+ *
+ * @param[in] xgmi_plpd_id the xgmi plpd id to set. The id is the id in
+ * rsmi_dpm_policy_entry_t, which can be obtained by calling
+ * rsmi_dev_xgmi_plpd_get()
+ *
+ * @return ::RSMI_STATUS_SUCCESS is returned upon successful call, non-zero on fail
+ */
+rsmi_status_t rsmi_dev_xgmi_plpd_set(uint32_t dv_ind,
+ uint32_t plpd_id);
/** @} */ // end of PerfCont
/*****************************************************************************/
diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc
index 91c8ddbb69..6aa0d86fce 100755
--- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc
+++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc
@@ -2038,6 +2038,130 @@ rsmi_dev_dpm_policy_set(uint32_t dv_ind,
CATCH
}
+rsmi_status_t
+rsmi_dev_xgmi_plpd_get(uint32_t dv_ind,
+ rsmi_dpm_policy_t* policy) {
+ rsmi_status_t ret;
+ std::vector val_vec;
+
+ if (policy == nullptr) {
+ return RSMI_STATUS_INVALID_ARGS;
+ }
+
+ *policy = {};
+
+ TRY
+ std::ostringstream ss;
+ ss << __PRETTY_FUNCTION__ << " | ======= start =======";
+ LOG_TRACE(ss);
+ DEVICE_MUTEX
+
+ ret = GetDevValueVec(amd::smi::kDevDPMPolicy, dv_ind, &val_vec);
+ if (ret == RSMI_STATUS_FILE_ERROR) {
+ ss << __PRETTY_FUNCTION__ << " | ======= end ======="
+ << ", GetDevValueVec() ret was RSMI_STATUS_FILE_ERROR "
+ << "-> reporting RSMI_STATUS_NOT_SUPPORTED";
+ LOG_ERROR(ss);
+ return RSMI_STATUS_NOT_SUPPORTED;
+ }
+ if (ret != RSMI_STATUS_SUCCESS) {
+ ss << __PRETTY_FUNCTION__ << " | ======= end ======="
+ << ", GetDevValueVec() ret was not RSMI_STATUS_SUCCESS"
+ << " -> reporting " << amd::smi::getRSMIStatusString(ret);
+ LOG_ERROR(ss);
+ return ret;
+ }
+ /*
+ It will reply on the number but no string as it may vary from soc to soc.
+ The current xmgi plpd marked with *
+ xgmi plpd
+ 0 : plpd_disallow
+ 1 : plpd_default
+ 2 : plpd_optimized*
+ */
+ bool see_plpd_pstate = false;
+ bool see_current = false;
+ policy->num_supported = 0;
+ for (uint32_t i = 0; i < val_vec.size(); ++i) {
+ auto current_line = amd::smi::trim(val_vec[i]);
+ if (current_line == "xgmi plpd") {
+ see_plpd_pstate = true;
+ continue;
+ }
+ if (see_plpd_pstate == false) continue;
+
+ // Get tokens: :
+ std::vector tokens;
+ std::istringstream f(current_line);
+ std::string s;
+ while (getline(f, s, ':')) {
+ tokens.push_back(s);
+ }
+
+ int value = 0;
+ // At the end
+ if (tokens.size() < 2 || !amd::smi::stringToInteger(tokens[0], value)) {
+ break;
+ }
+
+ if (value < 0 || policy->num_supported >= RSMI_MAX_NUM_PM_POLICIES) {
+ ss << __PRETTY_FUNCTION__ << " | ======= end ======="
+ << ", Unexpected pstat data: the id is negative or too many plpd policies.";
+ LOG_ERROR(ss);
+ return RSMI_STATUS_UNEXPECTED_DATA;
+ }
+
+ policy->policies[policy->num_supported].policy_id = value;
+ std::string description = amd::smi::trim(tokens[1]);
+ if (current_line.back() == '*') { // current policy
+ description.pop_back(); // remove last *
+ description = amd::smi::trim(description);
+ policy->current = policy->num_supported;
+ see_current = true;
+ }
+ strncpy(policy->policies[policy->num_supported].policy_description,
+ description.c_str(),
+ RSMI_MAX_POLICY_NAME-1);
+ policy->num_supported++;
+ } // end for
+
+ if (!see_plpd_pstate) {
+ return RSMI_STATUS_NOT_SUPPORTED;
+ }
+
+ if (!see_current) {
+ ss << __PRETTY_FUNCTION__ << " | ======= end ======="
+ << ", Unexpected pstat data: cannot find the current plpd policy.";
+ LOG_ERROR(ss);
+ return RSMI_STATUS_UNEXPECTED_DATA;
+ }
+ // Cannot find it
+ return RSMI_STATUS_SUCCESS;
+
+ CATCH
+}
+
+rsmi_status_t
+rsmi_dev_xgmi_plpd_set(uint32_t dv_ind,
+ uint32_t plpd_id) {
+ rsmi_status_t ret;
+
+ TRY
+ std::ostringstream ss;
+ ss << __PRETTY_FUNCTION__ << " | ======= start =======";
+ LOG_TRACE(ss);
+ REQUIRE_ROOT_ACCESS
+ DEVICE_MUTEX
+ GET_DEV_FROM_INDX
+
+ std::string value("xgmi ");
+ value += std::to_string(plpd_id);
+ int ret = dev->writeDevInfo(amd::smi::kDevDPMPolicy , value);
+ return amd::smi::ErrnoToRsmiStatus(ret);
+
+ CATCH
+}
+
rsmi_status_t
rsmi_dev_dpm_policy_get(uint32_t dv_ind,
rsmi_dpm_policy_t* policy) {
@@ -2107,7 +2231,7 @@ rsmi_dev_dpm_policy_get(uint32_t dv_ind,
if (value < 0 || policy->num_supported >= RSMI_MAX_NUM_PM_POLICIES) {
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
- << ", Unexpeced pstat data: the id is negative or too many policies.";
+ << ", Unexpected pstat data: the id is negative or too many policies.";
LOG_ERROR(ss);
return RSMI_STATUS_UNEXPECTED_DATA;
}
@@ -2132,7 +2256,7 @@ rsmi_dev_dpm_policy_get(uint32_t dv_ind,
if (!see_current) {
ss << __PRETTY_FUNCTION__ << " | ======= end ======="
- << ", Unexpeced pstat data: cannot find the current policy.";
+ << ", Unexpected pstat data: cannot find the current policy.";
LOG_ERROR(ss);
return RSMI_STATUS_UNEXPECTED_DATA;
}
diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc
index 3e63659c82..92de58c6a1 100755
--- a/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc
+++ b/projects/amdsmi/rocm_smi/src/rocm_smi_device.cc
@@ -536,8 +536,10 @@ static const std::map kDevFuncDependsMap = {
{"rsmi_topo_numa_affinity_get", {{kDevNumaNodeFName}, {}}},
{"rsmi_dev_gpu_metrics_info_get", {{kDevGpuMetricsFName}, {}}},
{"rsmi_dev_pm_metrics_info_get", {{kDevPmMetricsFName}, {}}},
- {"rsmi_dev_dpm_policy_get", {{kDevDPMPolicyFName}, {}}},
- {"rsmi_dev_dpm_policy_set", {{kDevDPMPolicyFName}, {}}},
+ {"rsmi_dev_dpm_policy_get", {{kDevDPMPolicyFName}, {}}},
+ {"rsmi_dev_dpm_policy_set", {{kDevDPMPolicyFName}, {}}},
+ {"rsmi_dev_xgmi_plpd_get", {{kDevDPMPolicyFName}, {}}},
+ {"rsmi_dev_xgmi_plpd_set", {{kDevDPMPolicyFName}, {}}},
{"rsmi_dev_reg_table_info_get", {{kDevRegMetricsFName}, {}}},
{"rsmi_dev_gpu_reset", {{kDevGpuResetFName}, {}}},
{"rsmi_dev_compute_partition_get", {{kDevComputePartitionFName}, {}}},
diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc
index e57ae30cbb..1dafee87ff 100644
--- a/projects/amdsmi/src/amd_smi/amd_smi.cc
+++ b/projects/amdsmi/src/amd_smi/amd_smi.cc
@@ -1369,6 +1369,22 @@ amdsmi_status_t amdsmi_get_dpm_policy(amdsmi_processor_handle processor_handle,
reinterpret_cast(policy));
}
+amdsmi_status_t amdsmi_set_xgmi_plpd(amdsmi_processor_handle processor_handle,
+ uint32_t policy) {
+ AMDSMI_CHECK_INIT();
+
+ return rsmi_wrapper(rsmi_dev_xgmi_plpd_set, processor_handle,
+ policy);
+}
+
+amdsmi_status_t amdsmi_get_xgmi_plpd(amdsmi_processor_handle processor_handle,
+ amdsmi_dpm_policy_t* policy) {
+ AMDSMI_CHECK_INIT();
+
+ return rsmi_wrapper(rsmi_dev_xgmi_plpd_get, processor_handle,
+ reinterpret_cast(policy));
+}
+
amdsmi_status_t
amdsmi_get_gpu_memory_reserved_pages(amdsmi_processor_handle processor_handle,
uint32_t *num_pages,