diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index b064d2732d..f13b2e7cf3 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -88,6 +88,11 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr - **Added `amdsmi_get_cpu_affinity_with_scope()`**. +- **Added `socket power` to `amdsmi_get_power_info`** + - Previously the C API had the value in the `amdsmi_power_info` structure, but was unused + - Now we populate the value in both C & Python APIs + - The value is representative of the socket's power agnostic of the the GPU version. + ### Changed - **Padded `asic_serial` in `amdsmi_get_asic_info` with 0s**. @@ -162,6 +167,12 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr - **Removed `amdsmi_io_link_type_t` and replaced with amdsmi_link_type_t**. - The IO Link type is no longer needed as the link type is sufficient. + - Mapping from amdsmi_io_link_type_t to amdsmi_link_type_t is as follows: + ```shell + AMDSMI_IOLINK_TYPE_UNDEFINED == AMDSMI_LINK_TYPE_INTERNAL + AMDSMI_IOLINK_TYPE_PCIEXPRESS == AMDSMI_LINK_TYPE_PCIE + AMDSMI_IOLINK_TYPE_XGMI == AMDSMI_LINK_TYPE_XGMI + ``` - **Removed `amdsmi_get_power_info_v2()`**. - The amdsmi_get_power_info() has been unified and the v2 function is no longer needed/used. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 09d7846c26..79539d5e8d 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1844,22 +1844,16 @@ class AMDSMICommands(): power_unit = "W" power_info = amdsmi_interface.amdsmi_get_power_info(args.gpu) for key, value in power_info.items(): - if value == 0xFFFF: - power_info[key] = "N/A" - elif "voltage" in key: + if "voltage" in key: power_info[key] = self.helpers.unit_format(self.logger, - value, - voltage_unit) - elif "power" in key: - if ((key == "current_socket_power" or key == "average_socket_power") - and value != "N/A"): - power_dict['socket_power'] = self.helpers.unit_format(self.logger, - value, - power_unit) + value, + voltage_unit) + elif key == "socket_power": power_info[key] = self.helpers.unit_format(self.logger, - value, - power_unit) + value, + power_unit) + power_dict['socket_power'] = power_info['socket_power'] power_dict['gfx_voltage'] = power_info['gfx_voltage'] power_dict['soc_voltage'] = power_info['soc_voltage'] power_dict['mem_voltage'] = power_info['mem_voltage'] diff --git a/projects/amdsmi/docs/reference/amdsmi-py-api.md b/projects/amdsmi/docs/reference/amdsmi-py-api.md index 414630c55f..476d50076a 100644 --- a/projects/amdsmi/docs/reference/amdsmi-py-api.md +++ b/projects/amdsmi/docs/reference/amdsmi-py-api.md @@ -460,12 +460,12 @@ try: print("No GPUs on machine") else: for device in devices: - power_info = amdsmi_get_power_cap_info(device) - print(power_info['power_cap']) - print(power_info['dpm_cap']) - print(power_info['default_power_cap']) - print(power_info['min_power_cap']) - print(power_info['max_power_cap']) + power_cap_info = amdsmi_get_power_cap_info(device) + print(power_cap_info['power_cap']) + print(power_cap_info['dpm_cap']) + print(power_cap_info['default_power_cap']) + print(power_cap_info['min_power_cap']) + print(power_cap_info['max_power_cap']) except AmdSmiException as e: print(e) ``` @@ -736,18 +736,18 @@ It is not supported on virtual machine guest Input parameters: * `processor_handle` device which to query -* `sensor_ind` optional argument that defaults to 0 Output: Dictionary with fields -Field | Description ----|--- -`current_socket_power` | current socket power; Mi300+ Series Cards -`average_socket_power` | average socket power; Navi + Mi 200 and earlier Series cards -`gfx_voltage` | voltage gfx -`soc_voltage` | voltage soc -`mem_voltage` | voltage mem -`power_limit` | power limit +Field | Description | Units +---|---|--- +`socket_power` | socket power; matches current or average socket power | W +`current_socket_power` | current socket power; Mi300+ Series Cards | W +`average_socket_power` | average socket power; Navi + Mi 200 and earlier Series cards | W +`gfx_voltage` | voltage gfx | mV +`soc_voltage` | voltage soc | mV +`mem_voltage` | voltage mem | mV +`power_limit` | power limit | W Exceptions that can be thrown by `amdsmi_get_power_info` function: @@ -764,15 +764,13 @@ try: print("No GPUs on machine") else: for device in devices: - power_measure = amdsmi_get_power_info(device) - # Example with using sensor_ind - # power_measure = amdsmi_get_power_info(device, 0) - print(power_measure['current_socket_power']) - print(power_measure['average_socket_power']) - print(power_measure['gfx_voltage']) - print(power_measure['soc_voltage']) - print(power_measure['mem_voltage']) - print(power_measure['power_limit']) + power_info = amdsmi_get_power_info(device) + print(power_info['current_socket_power']) + print(power_info['average_socket_power']) + print(power_info['gfx_voltage']) + print(power_info['soc_voltage']) + print(power_info['mem_voltage']) + print(power_info['power_limit']) except AmdSmiException as e: print(e) ``` diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index d6c051d61f..8517ceb30b 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -352,19 +352,20 @@ typedef enum { * @cond @tag{gpu_bm_linux} @tag{host} @tag{guest_windows} @endcond */ typedef enum { - AMDSMI_CLK_TYPE_SYS = 0x0, //!< System clock + AMDSMI_CLK_TYPE_SYS = 0x0, //!< Graphics clock AMDSMI_CLK_TYPE_FIRST = AMDSMI_CLK_TYPE_SYS, - AMDSMI_CLK_TYPE_GFX = AMDSMI_CLK_TYPE_SYS, - AMDSMI_CLK_TYPE_DF, //!< Data Fabric clock (for ASICs - //!< running on a separate clock) - AMDSMI_CLK_TYPE_DCEF, //!< Display Controller Engine clock - AMDSMI_CLK_TYPE_SOC, - AMDSMI_CLK_TYPE_MEM, - AMDSMI_CLK_TYPE_PCIE, - AMDSMI_CLK_TYPE_VCLK0, - AMDSMI_CLK_TYPE_VCLK1, - AMDSMI_CLK_TYPE_DCLK0, - AMDSMI_CLK_TYPE_DCLK1, + AMDSMI_CLK_TYPE_GFX = AMDSMI_CLK_TYPE_SYS, //!< Graphics clock + AMDSMI_CLK_TYPE_DF, /**< Data Fabric clock (for ASICs + running on a separate clock) */ + AMDSMI_CLK_TYPE_DCEF, /**< Display Controller Engine Front clock, + timing/bandwidth signals to display */ + AMDSMI_CLK_TYPE_SOC, //!< System On Chip clock, integrated circuit frequency + AMDSMI_CLK_TYPE_MEM, //!< Memory clock speed, system operating frequency + AMDSMI_CLK_TYPE_PCIE, //!< PCI Express clock, high bandwidth peripherals + AMDSMI_CLK_TYPE_VCLK0, //!< Video 0 clock, video processing units + AMDSMI_CLK_TYPE_VCLK1, //!< Video 1 clock, video processing units + AMDSMI_CLK_TYPE_DCLK0, //!< Display 1 clock, timing signals for display output + AMDSMI_CLK_TYPE_DCLK1, //!< Display 2 clock, timing signals for display output AMDSMI_CLK_TYPE__MAX = AMDSMI_CLK_TYPE_DCLK1 } amdsmi_clk_type_t; @@ -1027,9 +1028,9 @@ typedef struct { * @cond @tag{gpu_bm_linux} @tag{host} @endcond */ typedef struct { - uint64_t socket_power; //!< Units in uW {@host}, Host only - uint32_t current_socket_power; //!< Units in W {@linux_bm}, Linux only, Mi 300+ Series cards - uint32_t average_socket_power; //!< Units in W {@linux_bm}, Linux only, Navi + Mi 200 and earlier Series cards + uint64_t socket_power; //!< Socket power in W {@linux_bm}, uW {@host} + uint32_t current_socket_power; //!< Current socket power in W {@linux_bm}, Linux only, Mi 300+ Series cards + uint32_t average_socket_power; //!< Average socket power in W {@linux_bm}, Linux only, Navi + Mi 200 and earlier Series cards uint64_t gfx_voltage; //!< GFX voltage measurement in mV {@linux_bm} or V {@host} uint64_t soc_voltage; //!< SOC voltage measurement in mV {@linux_bm} or V {@host} uint64_t mem_voltage; //!< MEM voltage measurement in mV {@linux_bm} or V {@host} diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index 2230df59dc..d4787f46ff 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -1958,18 +1958,18 @@ def amdsmi_get_power_cap_info( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - power_info = amdsmi_wrapper.amdsmi_power_cap_info_t() + power_cap_info = amdsmi_wrapper.amdsmi_power_cap_info_t() _check_res( amdsmi_wrapper.amdsmi_get_power_cap_info( - processor_handle, ctypes.c_uint32(0), ctypes.byref(power_info) + processor_handle, ctypes.c_uint32(0), ctypes.byref(power_cap_info) ) ) - return {"power_cap": power_info.power_cap, - "default_power_cap": power_info.default_power_cap, - "dpm_cap": power_info.dpm_cap, - "min_power_cap": power_info.min_power_cap, - "max_power_cap": power_info.max_power_cap} + return {"power_cap": power_cap_info.power_cap, + "default_power_cap": power_cap_info.default_power_cap, + "dpm_cap": power_cap_info.dpm_cap, + "min_power_cap": power_cap_info.min_power_cap, + "max_power_cap": power_cap_info.max_power_cap} def amdsmi_get_gpu_pm_metrics_info( @@ -2733,20 +2733,21 @@ def amdsmi_get_power_info( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - power_measure = amdsmi_wrapper.amdsmi_power_info_t() + power_info = amdsmi_wrapper.amdsmi_power_info_t() _check_res( amdsmi_wrapper.amdsmi_get_power_info( - processor_handle, ctypes.byref(power_measure) + processor_handle, ctypes.byref(power_info) ) ) power_info_dict = { - "current_socket_power": power_measure.current_socket_power, - "average_socket_power": power_measure.average_socket_power, - "gfx_voltage": power_measure.gfx_voltage, - "soc_voltage": power_measure.soc_voltage, - "mem_voltage": power_measure.mem_voltage, - "power_limit" : power_measure.power_limit, + "socket_power": power_info.socket_power, + "current_socket_power": power_info.current_socket_power, + "average_socket_power": power_info.average_socket_power, + "gfx_voltage": power_info.gfx_voltage, + "soc_voltage": power_info.soc_voltage, + "mem_voltage": power_info.mem_voltage, + "power_limit" : power_info.power_limit, } for key, value in power_info_dict.items(): diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index c8af0d020f..104eca1c1d 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -4224,6 +4224,7 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf if (status != AMDSMI_STATUS_SUCCESS) return status; + info->socket_power = 0xFFFF; info->current_socket_power = 0xFFFF; info->average_socket_power = 0xFFFF; info->gfx_voltage = 0xFFFF; @@ -4241,6 +4242,12 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_inf info->mem_voltage = metrics.voltage_mem; } + if (metrics.current_socket_power != 0xFFFF) { + info->socket_power = metrics.current_socket_power; + } else if (metrics.average_socket_power != 0xFFFF) { + info->socket_power = metrics.average_socket_power; + } + int power_limit = 0; status = smi_amdgpu_get_power_cap(gpu_device, &power_limit); if (status == AMDSMI_STATUS_SUCCESS) {