Make amdsmi_get_power_info backwards compatible

Change-Id: Ie5b4c35265827e78934caa94c142d31efce597e4
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>


[ROCm/amdsmi commit: 4a3c70136f]
This commit is contained in:
Galantsev, Dmitrii
2025-03-18 18:38:36 +00:00
کامیت شده توسط Galantsev, Dmitrii
والد fff2d21baf
کامیت 633d2a8890
12فایلهای تغییر یافته به همراه114 افزوده شده و 17 حذف شده
+15 -2
مشاهده پرونده
@@ -18,6 +18,19 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
- Increasing available JPEG engines to 40.
Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.
## amd_smi_lib for ROCm 6.4.1
### Removed
- **Removed `sensor_ind` in `amdsmi_get_power_info()` for backwards compatibility**.
- This change breaks 6.4.0 C API change, but makes it backwards compatible with 6.3
- Python API still accepts `sensor_ind` as an optional argument
- Changed AMDSMI version from 25.2 to 25.3
### Added
- **Added `amdsmi_get_power_info_v2()` with `sensor_ind`**.
## amd_smi_lib for ROCm 6.4.0
### Added
@@ -31,7 +44,7 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
uint32_t drm_card; // the graphic card device under /sys/class/drm/card*
uint32_t hsa_id; // the HSA enumeration ID
uint32_t hip_id; // the HIP enumeration ID
char hip_uuid[AMDSMI_MAX_STRING_LENGTH]; // the HIP unique identifer
char hip_uuid[AMDSMI_MAX_STRING_LENGTH]; // the HIP unique identifier
} amdsmi_enumeration_info_t;
```
@@ -198,7 +211,7 @@ Updated `amdsmi_get_gpu_metrics_info()` and structure `amdsmi_gpu_metrics_t` to
- **Added an additional argument `sensor_ind` to `amdsmi_get_power_info()`**.
- This change breaks previous C API calls and will require a change
- Python API now accepts `sensor_ind` as an optional argument, does not imapact previous usage
- Python API now accepts `sensor_ind` as an optional argument, does not impact previous usage
- **Depricated enum `AMDSMI_NORMAL_STRING_LENGTH` in favor of `AMDSMI_MAX_STRING_LENGTH`**.
@@ -28,7 +28,7 @@ find_program(GIT NAMES git)
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "amdsmi_pkg_ver")
get_package_version_number("25.2.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
get_package_version_number("25.3.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
message("Package version: ${PKG_VERSION_STR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
@@ -48,7 +48,7 @@ PROJECT_NAME = AMD SMI
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = "25.2.0"
PROJECT_NUMBER = "25.3.0"
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
@@ -35,7 +35,7 @@ detected:
~$ amd-smi
usage: amd-smi [-h] ...
AMD System Management Interface | Version: 25.2.0 | ROCm version: 6.4.0 | Platform: Linux Baremetal
AMD System Management Interface | Version: 25.3.0 | ROCm version: 6.4.0 | Platform: Linux Baremetal
options:
-h, --help show this help message and exit
@@ -566,7 +566,7 @@ int main() {
// Get power measure
amdsmi_power_info_t power_measure = {};
ret = amdsmi_get_power_info(processor_handles[j], 0, &power_measure);
ret = amdsmi_get_power_info(processor_handles[j], &power_measure);
CHK_AMDSMI_RET(ret)
printf(" Output of amdsmi_get_power_info:\n");
printf("\tCurrent GFX Voltage: %d\n",
@@ -467,7 +467,7 @@ uint64_t goamdsmi_gpu_dev_power_get(uint32_t dv_ind)
uint64_t gpu_power_temp = GOAMDSMI_UINT64_MAX;
amdsmi_power_info_t amdsmi_power_info_temp = {0};
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], 0, &amdsmi_power_info_temp)))
if((dv_ind < num_gpu_devices_inAllSocket) && (AMDSMI_STATUS_SUCCESS == amdsmi_get_power_info(amdsmi_processor_handle_all_gpu_device_across_socket[dv_ind], &amdsmi_power_info_temp)))
{
gpu_power_temp = amdsmi_power_info_temp.average_socket_power;
if (enable_debug_level(GOAMDSMI_DEBUG_LEVEL_2)) {printf("AMDSMI, Success for Gpu:%d, GpuPowerAverage:%llu, GpuPowerAverageinWatt:%.6f\n", dv_ind, (unsigned long long)(gpu_power_temp), ((double)(gpu_power_temp))/1000000);}
@@ -202,7 +202,7 @@ typedef enum {
#define AMDSMI_LIB_VERSION_MAJOR 25
//! Minor version should be updated for each API change, but without changing headers
#define AMDSMI_LIB_VERSION_MINOR 2
#define AMDSMI_LIB_VERSION_MINOR 3
//! Release version should be set to 0 as default and can be updated by the PMs for each CSP point release
#define AMDSMI_LIB_VERSION_RELEASE 0
@@ -5969,7 +5969,24 @@ amdsmi_get_gpu_activity(amdsmi_processor_handle processor_handle, amdsmi_engine_
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_get_power_info(amdsmi_processor_handle processor_handle, uint32_t sensor_ind, amdsmi_power_info_t *info);
amdsmi_status_t amdsmi_get_power_info_v2(amdsmi_processor_handle processor_handle, uint32_t sensor_ind, amdsmi_power_info_t *info);
/**
* @brief Returns the current power and voltage of the GPU.
*
* @ingroup tagGPUMonitor
*
* @platform{gpu_bm_linux} @platform{host} @platform{guest_windows}
*
* @note amdsmi_power_info_t::socket_power metric can rarely spike above the socket power limit in some cases
*
* @param[in] processor_handle PF of a processor for which to query
*
* @param[out] info Reference to the gpu power structure. Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_info_t *info);
/**
* @brief Returns is power management enabled
@@ -2378,6 +2378,8 @@ def amdsmi_get_gpu_driver_info(
return driver_info
# NOTE: this uses amdsmi_get_power_info_v2 under the hood because the C api
# needs to be backwards compatible
def amdsmi_get_power_info(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
sensor_ind: int = 0
@@ -2389,7 +2391,7 @@ def amdsmi_get_power_info(
power_measure = amdsmi_wrapper.amdsmi_power_info_t()
_check_res(
amdsmi_wrapper.amdsmi_get_power_info(
amdsmi_wrapper.amdsmi_get_power_info_v2(
processor_handle, sensor_ind, ctypes.byref(power_measure)
)
)
@@ -2619,9 +2619,12 @@ amdsmi_get_gpu_vbios_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(st
amdsmi_get_gpu_activity = _libraries['libamd_smi.so'].amdsmi_get_gpu_activity
amdsmi_get_gpu_activity.restype = amdsmi_status_t
amdsmi_get_gpu_activity.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_engine_usage_t)]
amdsmi_get_power_info_v2 = _libraries['libamd_smi.so'].amdsmi_get_power_info_v2
amdsmi_get_power_info_v2.restype = amdsmi_status_t
amdsmi_get_power_info_v2.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.POINTER(struct_amdsmi_power_info_t)]
amdsmi_get_power_info = _libraries['libamd_smi.so'].amdsmi_get_power_info
amdsmi_get_power_info.restype = amdsmi_status_t
amdsmi_get_power_info.argtypes = [amdsmi_processor_handle, uint32_t, ctypes.POINTER(struct_amdsmi_power_info_t)]
amdsmi_get_power_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_power_info_t)]
amdsmi_is_gpu_power_management_enabled = _libraries['libamd_smi.so'].amdsmi_is_gpu_power_management_enabled
amdsmi_is_gpu_power_management_enabled.restype = amdsmi_status_t
amdsmi_is_gpu_power_management_enabled.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_bool)]
@@ -3063,7 +3066,7 @@ __all__ = \
'amdsmi_get_link_metrics', 'amdsmi_get_link_topology_nearest',
'amdsmi_get_minmax_bandwidth_between_processors',
'amdsmi_get_pcie_info', 'amdsmi_get_power_cap_info',
'amdsmi_get_power_info',
'amdsmi_get_power_info', 'amdsmi_get_power_info_v2',
'amdsmi_get_processor_count_from_handles',
'amdsmi_get_processor_handle_from_bdf',
'amdsmi_get_processor_handles',
@@ -5723,7 +5723,60 @@ pub fn amdsmi_get_gpu_activity(
/// let sensor_ind = 0
///
/// // Retrieve the power information
/// match amdsmi_get_power_info(processor_handle, sensor_ind) {
/// match amdsmi_get_power_info_v2(processor_handle, sensor_ind) {
/// Ok(info) => println!("Power information: {:?}", info),
/// Err(e) => panic!("Failed to get power information: {}", e),
/// }
/// #
/// # // Shut down the AMD SMI library
/// # amdsmi_shut_down().expect("Failed to shut down AMD SMI");
/// # }
/// ```
///
/// # Errors
///
/// This function will return the error in [`AmdsmiStatusT`] if the underlying `amdsmi_wrapper::amdsmi_get_power_info_v2` call fails.
pub fn amdsmi_get_power_info_v2(
processor_handle: AmdsmiProcessorHandle,
sensor_ind: u32,
) -> AmdsmiResult<AmdsmiPowerInfoT> {
let mut info = MaybeUninit::<AmdsmiPowerInfoT>::uninit();
call_unsafe!(amdsmi_wrapper::amdsmi_get_power_info_v2(
processor_handle,
sensor_ind,
info.as_mut_ptr()
));
let info = unsafe { info.assume_init() };
Ok(info)
}
/// Get the power information for the device with the specified processor handle.
///
/// Given a processor handle `processor_handle`, this function retrieves the power information
/// for the specified processor.
///
/// # Arguments
///
/// * `processor_handle` - A handle to the processor for which the power information is being queried.
///
/// # Returns
///
/// * `AmdsmiResult<AmdsmiPowerInfoT>` - Returns `Ok(AmdsmiPowerInfoT)` containing the [`AmdsmiPowerInfoT`] if successful, or an error if it fails.
///
/// # Example
///
/// ```rust
/// # use amdsmi::*;
/// #
/// # fn main() {
/// # // Initialize the AMD SMI library
/// # amdsmi_init(AmdsmiInitFlagsT::AmdsmiInitAmdGpus).expect("Failed to initialize AMD SMI");
/// #
/// // Example processor_handle, assuming the number of processors is greater than zero
/// let processor_handle = amdsmi_get_processor_handles!()[0];
///
/// // Retrieve the power information
/// match amdsmi_get_power_info(processor_handle) {
/// Ok(info) => println!("Power information: {:?}", info),
/// Err(e) => panic!("Failed to get power information: {}", e),
/// }
@@ -5738,12 +5791,10 @@ pub fn amdsmi_get_gpu_activity(
/// This function will return the error in [`AmdsmiStatusT`] if the underlying `amdsmi_wrapper::amdsmi_get_power_info` call fails.
pub fn amdsmi_get_power_info(
processor_handle: AmdsmiProcessorHandle,
sensor_ind: u32,
) -> AmdsmiResult<AmdsmiPowerInfoT> {
let mut info = MaybeUninit::<AmdsmiPowerInfoT>::uninit();
call_unsafe!(amdsmi_wrapper::amdsmi_get_power_info(
processor_handle,
sensor_ind,
info.as_mut_ptr()
));
let info = unsafe { info.assume_init() };
@@ -3018,12 +3018,18 @@ extern "C" {
) -> AmdsmiStatusT;
}
extern "C" {
pub fn amdsmi_get_power_info(
pub fn amdsmi_get_power_info_v2(
processor_handle: AmdsmiProcessorHandle,
sensor_ind: u32,
info: *mut AmdsmiPowerInfoT,
) -> AmdsmiStatusT;
}
extern "C" {
pub fn amdsmi_get_power_info(
processor_handle: AmdsmiProcessorHandle,
info: *mut AmdsmiPowerInfoT,
) -> AmdsmiStatusT;
}
extern "C" {
pub fn amdsmi_is_gpu_power_management_enabled(
processor_handle: AmdsmiProcessorHandle,
@@ -3588,7 +3588,7 @@ amdsmi_get_gpu_process_list(amdsmi_processor_handle processor_handle, uint32_t *
}
amdsmi_status_t
amdsmi_get_power_info(amdsmi_processor_handle processor_handle, __attribute__((unused)) uint32_t sensor_ind, amdsmi_power_info_t *info) {
amdsmi_get_power_info_v2(amdsmi_processor_handle processor_handle, __attribute__((unused)) uint32_t sensor_ind, amdsmi_power_info_t *info) {
AMDSMI_CHECK_INIT();
@@ -3628,6 +3628,11 @@ amdsmi_get_power_info(amdsmi_processor_handle processor_handle, __attribute__((u
return status;
}
amdsmi_status_t
amdsmi_get_power_info(amdsmi_processor_handle processor_handle, amdsmi_power_info_t *info) {
return amdsmi_get_power_info_v2(processor_handle, 0, info);
}
amdsmi_status_t amdsmi_get_gpu_driver_info(amdsmi_processor_handle processor_handle,
amdsmi_driver_info_t *info) {
AMDSMI_CHECK_INIT();