[SWDEV-532769] amd-smi APIs mismatch with documentation (#428)
* Populated socket_power to get power info --------- Signed-off-by: josnarlo <Joseph.Narlo@amd.com> Signed-off-by: Maisam Arif <Maisam.Arif@amd.com> Co-authored-by: Maisam Arif <Maisam.Arif@amd.com>
This commit is contained in:
@@ -88,6 +88,11 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
- **Added `amdsmi_get_cpu_affinity_with_scope()`**.
|
||||
|
||||
- **Added `socket power` to `amdsmi_get_power_info`**
|
||||
- Previously the C API had the value in the `amdsmi_power_info` structure, but was unused
|
||||
- Now we populate the value in both C & Python APIs
|
||||
- The value is representative of the socket's power agnostic of the the GPU version.
|
||||
|
||||
### Changed
|
||||
|
||||
- **Padded `asic_serial` in `amdsmi_get_asic_info` with 0s**.
|
||||
@@ -162,6 +167,12 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
- **Removed `amdsmi_io_link_type_t` and replaced with amdsmi_link_type_t**.
|
||||
- The IO Link type is no longer needed as the link type is sufficient.
|
||||
- Mapping from amdsmi_io_link_type_t to amdsmi_link_type_t is as follows:
|
||||
```shell
|
||||
AMDSMI_IOLINK_TYPE_UNDEFINED == AMDSMI_LINK_TYPE_INTERNAL
|
||||
AMDSMI_IOLINK_TYPE_PCIEXPRESS == AMDSMI_LINK_TYPE_PCIE
|
||||
AMDSMI_IOLINK_TYPE_XGMI == AMDSMI_LINK_TYPE_XGMI
|
||||
```
|
||||
|
||||
- **Removed `amdsmi_get_power_info_v2()`**.
|
||||
- The amdsmi_get_power_info() has been unified and the v2 function is no longer needed/used.
|
||||
|
||||
@@ -1844,22 +1844,16 @@ class AMDSMICommands():
|
||||
power_unit = "W"
|
||||
power_info = amdsmi_interface.amdsmi_get_power_info(args.gpu)
|
||||
for key, value in power_info.items():
|
||||
if value == 0xFFFF:
|
||||
power_info[key] = "N/A"
|
||||
elif "voltage" in key:
|
||||
if "voltage" in key:
|
||||
power_info[key] = self.helpers.unit_format(self.logger,
|
||||
value,
|
||||
voltage_unit)
|
||||
elif "power" in key:
|
||||
if ((key == "current_socket_power" or key == "average_socket_power")
|
||||
and value != "N/A"):
|
||||
power_dict['socket_power'] = self.helpers.unit_format(self.logger,
|
||||
value,
|
||||
power_unit)
|
||||
value,
|
||||
voltage_unit)
|
||||
elif key == "socket_power":
|
||||
power_info[key] = self.helpers.unit_format(self.logger,
|
||||
value,
|
||||
power_unit)
|
||||
value,
|
||||
power_unit)
|
||||
|
||||
power_dict['socket_power'] = power_info['socket_power']
|
||||
power_dict['gfx_voltage'] = power_info['gfx_voltage']
|
||||
power_dict['soc_voltage'] = power_info['soc_voltage']
|
||||
power_dict['mem_voltage'] = power_info['mem_voltage']
|
||||
|
||||
@@ -460,12 +460,12 @@ try:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
power_info = amdsmi_get_power_cap_info(device)
|
||||
print(power_info['power_cap'])
|
||||
print(power_info['dpm_cap'])
|
||||
print(power_info['default_power_cap'])
|
||||
print(power_info['min_power_cap'])
|
||||
print(power_info['max_power_cap'])
|
||||
power_cap_info = amdsmi_get_power_cap_info(device)
|
||||
print(power_cap_info['power_cap'])
|
||||
print(power_cap_info['dpm_cap'])
|
||||
print(power_cap_info['default_power_cap'])
|
||||
print(power_cap_info['min_power_cap'])
|
||||
print(power_cap_info['max_power_cap'])
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
@@ -736,18 +736,18 @@ It is not supported on virtual machine guest
|
||||
Input parameters:
|
||||
|
||||
* `processor_handle` device which to query
|
||||
* `sensor_ind` optional argument that defaults to 0
|
||||
|
||||
Output: Dictionary with fields
|
||||
|
||||
Field | Description
|
||||
---|---
|
||||
`current_socket_power` | current socket power; Mi300+ Series Cards
|
||||
`average_socket_power` | average socket power; Navi + Mi 200 and earlier Series cards
|
||||
`gfx_voltage` | voltage gfx
|
||||
`soc_voltage` | voltage soc
|
||||
`mem_voltage` | voltage mem
|
||||
`power_limit` | power limit
|
||||
Field | Description | Units
|
||||
---|---|---
|
||||
`socket_power` | socket power; matches current or average socket power | W
|
||||
`current_socket_power` | current socket power; Mi300+ Series Cards | W
|
||||
`average_socket_power` | average socket power; Navi + Mi 200 and earlier Series cards | W
|
||||
`gfx_voltage` | voltage gfx | mV
|
||||
`soc_voltage` | voltage soc | mV
|
||||
`mem_voltage` | voltage mem | mV
|
||||
`power_limit` | power limit | W
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_get_power_info` function:
|
||||
|
||||
@@ -764,15 +764,13 @@ try:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
power_measure = amdsmi_get_power_info(device)
|
||||
# Example with using sensor_ind
|
||||
# power_measure = amdsmi_get_power_info(device, 0)
|
||||
print(power_measure['current_socket_power'])
|
||||
print(power_measure['average_socket_power'])
|
||||
print(power_measure['gfx_voltage'])
|
||||
print(power_measure['soc_voltage'])
|
||||
print(power_measure['mem_voltage'])
|
||||
print(power_measure['power_limit'])
|
||||
power_info = amdsmi_get_power_info(device)
|
||||
print(power_info['current_socket_power'])
|
||||
print(power_info['average_socket_power'])
|
||||
print(power_info['gfx_voltage'])
|
||||
print(power_info['soc_voltage'])
|
||||
print(power_info['mem_voltage'])
|
||||
print(power_info['power_limit'])
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
+16
-15
@@ -352,19 +352,20 @@ typedef enum {
|
||||
* @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_CLK_TYPE_SYS = 0x0, //!< System clock
|
||||
AMDSMI_CLK_TYPE_SYS = 0x0, //!< Graphics clock
|
||||
AMDSMI_CLK_TYPE_FIRST = AMDSMI_CLK_TYPE_SYS,
|
||||
AMDSMI_CLK_TYPE_GFX = AMDSMI_CLK_TYPE_SYS,
|
||||
AMDSMI_CLK_TYPE_DF, //!< Data Fabric clock (for ASICs
|
||||
//!< running on a separate clock)
|
||||
AMDSMI_CLK_TYPE_DCEF, //!< Display Controller Engine clock
|
||||
AMDSMI_CLK_TYPE_SOC,
|
||||
AMDSMI_CLK_TYPE_MEM,
|
||||
AMDSMI_CLK_TYPE_PCIE,
|
||||
AMDSMI_CLK_TYPE_VCLK0,
|
||||
AMDSMI_CLK_TYPE_VCLK1,
|
||||
AMDSMI_CLK_TYPE_DCLK0,
|
||||
AMDSMI_CLK_TYPE_DCLK1,
|
||||
AMDSMI_CLK_TYPE_GFX = AMDSMI_CLK_TYPE_SYS, //!< Graphics clock
|
||||
AMDSMI_CLK_TYPE_DF, /**< Data Fabric clock (for ASICs
|
||||
running on a separate clock) */
|
||||
AMDSMI_CLK_TYPE_DCEF, /**< Display Controller Engine Front clock,
|
||||
timing/bandwidth signals to display */
|
||||
AMDSMI_CLK_TYPE_SOC, //!< System On Chip clock, integrated circuit frequency
|
||||
AMDSMI_CLK_TYPE_MEM, //!< Memory clock speed, system operating frequency
|
||||
AMDSMI_CLK_TYPE_PCIE, //!< PCI Express clock, high bandwidth peripherals
|
||||
AMDSMI_CLK_TYPE_VCLK0, //!< Video 0 clock, video processing units
|
||||
AMDSMI_CLK_TYPE_VCLK1, //!< Video 1 clock, video processing units
|
||||
AMDSMI_CLK_TYPE_DCLK0, //!< Display 1 clock, timing signals for display output
|
||||
AMDSMI_CLK_TYPE_DCLK1, //!< Display 2 clock, timing signals for display output
|
||||
AMDSMI_CLK_TYPE__MAX = AMDSMI_CLK_TYPE_DCLK1
|
||||
} amdsmi_clk_type_t;
|
||||
|
||||
@@ -1027,9 +1028,9 @@ typedef struct {
|
||||
* @cond @tag{gpu_bm_linux} @tag{host} @endcond
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t socket_power; //!< Units in uW {@host}, Host only
|
||||
uint32_t current_socket_power; //!< Units in W {@linux_bm}, Linux only, Mi 300+ Series cards
|
||||
uint32_t average_socket_power; //!< Units in W {@linux_bm}, Linux only, Navi + Mi 200 and earlier Series cards
|
||||
uint64_t socket_power; //!< Socket power in W {@linux_bm}, uW {@host}
|
||||
uint32_t current_socket_power; //!< Current socket power in W {@linux_bm}, Linux only, Mi 300+ Series cards
|
||||
uint32_t average_socket_power; //!< Average socket power in W {@linux_bm}, Linux only, Navi + Mi 200 and earlier Series cards
|
||||
uint64_t gfx_voltage; //!< GFX voltage measurement in mV {@linux_bm} or V {@host}
|
||||
uint64_t soc_voltage; //!< SOC voltage measurement in mV {@linux_bm} or V {@host}
|
||||
uint64_t mem_voltage; //!< MEM voltage measurement in mV {@linux_bm} or V {@host}
|
||||
|
||||
@@ -1958,18 +1958,18 @@ def amdsmi_get_power_cap_info(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
power_info = amdsmi_wrapper.amdsmi_power_cap_info_t()
|
||||
power_cap_info = amdsmi_wrapper.amdsmi_power_cap_info_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_power_cap_info(
|
||||
processor_handle, ctypes.c_uint32(0), ctypes.byref(power_info)
|
||||
processor_handle, ctypes.c_uint32(0), ctypes.byref(power_cap_info)
|
||||
)
|
||||
)
|
||||
|
||||
return {"power_cap": power_info.power_cap,
|
||||
"default_power_cap": power_info.default_power_cap,
|
||||
"dpm_cap": power_info.dpm_cap,
|
||||
"min_power_cap": power_info.min_power_cap,
|
||||
"max_power_cap": power_info.max_power_cap}
|
||||
return {"power_cap": power_cap_info.power_cap,
|
||||
"default_power_cap": power_cap_info.default_power_cap,
|
||||
"dpm_cap": power_cap_info.dpm_cap,
|
||||
"min_power_cap": power_cap_info.min_power_cap,
|
||||
"max_power_cap": power_cap_info.max_power_cap}
|
||||
|
||||
|
||||
def amdsmi_get_gpu_pm_metrics_info(
|
||||
@@ -2733,20 +2733,21 @@ def amdsmi_get_power_info(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
power_measure = amdsmi_wrapper.amdsmi_power_info_t()
|
||||
power_info = amdsmi_wrapper.amdsmi_power_info_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_power_info(
|
||||
processor_handle, ctypes.byref(power_measure)
|
||||
processor_handle, ctypes.byref(power_info)
|
||||
)
|
||||
)
|
||||
|
||||
power_info_dict = {
|
||||
"current_socket_power": power_measure.current_socket_power,
|
||||
"average_socket_power": power_measure.average_socket_power,
|
||||
"gfx_voltage": power_measure.gfx_voltage,
|
||||
"soc_voltage": power_measure.soc_voltage,
|
||||
"mem_voltage": power_measure.mem_voltage,
|
||||
"power_limit" : power_measure.power_limit,
|
||||
"socket_power": power_info.socket_power,
|
||||
"current_socket_power": power_info.current_socket_power,
|
||||
"average_socket_power": power_info.average_socket_power,
|
||||
"gfx_voltage": power_info.gfx_voltage,
|
||||
"soc_voltage": power_info.soc_voltage,
|
||||
"mem_voltage": power_info.mem_voltage,
|
||||
"power_limit" : power_info.power_limit,
|
||||
}
|
||||
|
||||
for key, value in power_info_dict.items():
|
||||
|
||||
@@ -4224,6 +4224,7 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf
|
||||
if (status != AMDSMI_STATUS_SUCCESS)
|
||||
return status;
|
||||
|
||||
info->socket_power = 0xFFFF;
|
||||
info->current_socket_power = 0xFFFF;
|
||||
info->average_socket_power = 0xFFFF;
|
||||
info->gfx_voltage = 0xFFFF;
|
||||
@@ -4241,6 +4242,12 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf
|
||||
info->mem_voltage = metrics.voltage_mem;
|
||||
}
|
||||
|
||||
if (metrics.current_socket_power != 0xFFFF) {
|
||||
info->socket_power = metrics.current_socket_power;
|
||||
} else if (metrics.average_socket_power != 0xFFFF) {
|
||||
info->socket_power = metrics.average_socket_power;
|
||||
}
|
||||
|
||||
int power_limit = 0;
|
||||
status = smi_amdgpu_get_power_cap(gpu_device, &power_limit);
|
||||
if (status == AMDSMI_STATUS_SUCCESS) {
|
||||
|
||||
Reference in New Issue
Block a user