[SWDEV-539078] Add missing API definitions to python interface (#525)

Added the following API's to amdsmi_interface.py.
	amdsmi_get_cpu_handle()
	amdsmi_get_esmi_err_msg()
	amdsmi_get_gpu_event_notification()
	amdsmi_get_processor_count_from_handles()
	amdsmi_get_processor_handles_by_type()
	amdsmi_gpu_validate_ras_eeprom()
	amdsmi_init_gpu_event_notification()
	amdsmi_set_gpu_event_notification_mask()
	amdsmi_stop_gpu_event_notification()
	amdsmi_get_gpu_busy_percent()

Added additional return value to API amdsmi_get_xgmi_plpd().
	The entry policies is added to the end of the dictionary to match API definition.
	The entry plpds is marked for deprecation as it has the same information as policies.

---------

Signed-off-by: josnarlo <Joseph.Narlo@amd.com>
Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
This commit is contained in:
Narlo, Joseph
2025-10-06 14:50:00 -05:00
committad av GitHub
förälder faf0024135
incheckning 7decbc67a1
8 ändrade filer med 2662 tillägg och 3061 borttagningar
+19
Visa fil
@@ -8,6 +8,22 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
### Added
- **Added the following API's to amdsmi_interface.py**.
- amdsmi_get_cpu_handle()
- amdsmi_get_esmi_err_msg()
- amdsmi_get_gpu_event_notification()
- amdsmi_get_processor_count_from_handles()
- amdsmi_get_processor_handles_by_type()
- amdsmi_gpu_validate_ras_eeprom()
- amdsmi_init_gpu_event_notification()
- amdsmi_set_gpu_event_notification_mask()
- amdsmi_stop_gpu_event_notification()
- amdsmi_get_gpu_busy_percent()
- **Added additional return value to API amdsmi_get_xgmi_plpd()**.
- The entry `policies` is added to the end of the dictionary to match API definition.
- The entry `plpds` is marked for deprecation as it has the same information as `policies`.
- **Added `amdsmi_get_gpu_revision()` to Python API**
- This function retrieves the GPU revision ID. Available in `amdsmi_interface.py` as `amdsmi_get_gpu_revision()`.
@@ -190,6 +206,9 @@ GPU: 0
### Resolved Issues
- **Fixed an issue where amdsmi_get_gpu_od_volt_info() returned a reference to a python object**.
- The returned dictionary was changed to return values in all fields
- **Fixed an issue where using `amd-smi ras --folder <folder_name>` was forcing the created folder's name to be lowercase**.
- This fix also allows all string input options to be case insensitive.
+27 -27
Visa fil
@@ -99,6 +99,19 @@ class AMDSMICommands():
logging.error('Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)')
exit_flag = True
self.convert_clock_type = {
"sys": amdsmi_interface.AmdSmiClkType.SYS,
"mem": amdsmi_interface.AmdSmiClkType.MEM,
"df": amdsmi_interface.AmdSmiClkType.DF,
"soc": amdsmi_interface.AmdSmiClkType.SOC,
"dcef": amdsmi_interface.AmdSmiClkType.DCEF,
# vclk and dclk currently do not support levels so average clk is given for frequency levels
"vclk0": amdsmi_interface.AmdSmiClkType.VCLK0,
"vclk1": amdsmi_interface.AmdSmiClkType.VCLK1,
"dclk0": amdsmi_interface.AmdSmiClkType.DCLK0,
"dclk1": amdsmi_interface.AmdSmiClkType.DCLK1
}
if exit_flag:
version_args = argparse.Namespace()
version_args.gpu_version = False
@@ -1041,28 +1054,9 @@ class AMDSMICommands():
for clk in list(clk_dict.keys()):
if clk not in args.clock:
del clk_dict[clk]
for clk in args.clock:
clk_type = clk.lower()
if clk_type == "sys":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.SYS
elif clk_type == "mem":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.MEM
elif clk_type == "df":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DF
elif clk_type == "soc":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.SOC
elif clk_type == "dcef":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCEF
# vclk and dclk currently do not support levels so average clk is given for frequency levels
elif clk_type == "vclk0":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK0
elif clk_type == "vclk1":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK1
elif clk_type == "dclk0":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK0
elif clk_type == "dclk1":
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK1
if clk in self.convert_clock_type:
clk_type_conversion = self.convert_clock_type[clk]
else:
clk_type_conversion = "N/A"
output_format = self.helpers.get_output_format()
@@ -2471,10 +2465,10 @@ class AMDSMICommands():
# Populate voltage point values
for point in range(amdsmi_interface.AMDSMI_NUM_VOLTAGE_CURVE_POINTS):
if isinstance(od_volt, dict):
logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point].frequency}")
logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point].voltage}")
frequency = int(od_volt["curve.vc_points"][point].frequency / 1000000)
voltage = int(od_volt["curve.vc_points"][point].voltage)
logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point]['frequency']}")
logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point]['voltage']}")
frequency = int(od_volt["curve.vc_points"][point]['frequency'] / 1000000)
voltage = int(od_volt["curve.vc_points"][point]['voltage'])
else:
frequency = "N/A"
voltage = "N/A"
@@ -4875,6 +4869,11 @@ class AMDSMICommands():
return
else:
# For non-pcie clocks
if clk_type in self.convert_clock_type:
clk_type_conversion = self.convert_clock_type[clk_type]
else:
clk_type_conversion = "N/A"
try:
amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clk_type, freq_bitmask)
results_clk_lvl['set_clock'] = f"Successfully set {clk_type} perf level(s) to {perf_levels_str}"
@@ -4959,6 +4958,7 @@ class AMDSMICommands():
clk_tuple = amdsmi_interface.amdsmi_get_clock_info(args.gpu, amdsmi_clk_type)
if lim_type == "min":
amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MIN
if val > clk_tuple['max_clk']:
self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} min value greater than max ({clk_tuple['max_clk']}MHz)")
self.logger.print_output()
@@ -4967,8 +4967,8 @@ class AMDSMICommands():
if val == clk_tuple['min_clk']:
val_changed = False # Clock limit value did not changed
if lim_type == "max":
elif lim_type == "max":
amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MAX
if val < clk_tuple['min_clk']:
self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} max value less than min ({clk_tuple['min_clk']}MHz)")
self.logger.print_output()
+1 -1
Visa fil
@@ -857,7 +857,7 @@ class AMDSMIHelpers():
xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(dev)
except amdsmi_interface.AmdSmiLibraryException as e:
continue
for policy in xgmi_plpd_info['plpds']:
for policy in xgmi_plpd_info['policies']:
policy_string = f"{policy['policy_id']}: {policy['policy_description']}"
if not policy_string in xgmi_plpd_profile_list:
xgmi_plpd_profile_list.append(policy_string)
+1 -1
Visa fil
@@ -3325,7 +3325,7 @@ Field | Description
---|---
`num_supported` | The number of supported policies
`current_id` | The current policy index
`plpds` | List of policies.
`policies` | List of policies. (`plpds` marked for deprecation in next major release)
Exceptions that can be thrown by `amdsmi_get_xgmi_plpd` function:
+17 -1
Visa fil
@@ -29,6 +29,8 @@ from .amdsmi_interface import amdsmi_get_processor_type
from .amdsmi_interface import amdsmi_get_processor_handles
from .amdsmi_interface import amdsmi_get_socket_handles
from .amdsmi_interface import amdsmi_get_socket_info
from .amdsmi_interface import amdsmi_get_processor_count_from_handles
from .amdsmi_interface import amdsmi_get_processor_handles_by_type
# ESMI Dependent Functions
try:
@@ -78,6 +80,7 @@ try:
from .amdsmi_interface import amdsmi_get_cpu_family
from .amdsmi_interface import amdsmi_get_cpu_model
from .amdsmi_interface import amdsmi_get_cpu_model_name
from .amdsmi_interface import amdsmi_get_cpu_handles
except AttributeError:
pass
@@ -101,6 +104,7 @@ from .amdsmi_interface import amdsmi_get_power_cap_info
from .amdsmi_interface import amdsmi_get_gpu_vram_info
from .amdsmi_interface import amdsmi_get_gpu_cache_info
from .amdsmi_interface import amdsmi_get_gpu_xcd_counter
from .amdsmi_interface import amdsmi_get_gpu_revision
# # Microcode and VBIOS Information
from .amdsmi_interface import amdsmi_get_gpu_vbios_info
@@ -111,13 +115,19 @@ from .amdsmi_interface import amdsmi_get_gpu_activity
from .amdsmi_interface import amdsmi_get_gpu_vram_usage
from .amdsmi_interface import amdsmi_get_power_info
from .amdsmi_interface import amdsmi_get_clock_info
from .amdsmi_interface import amdsmi_get_gpu_busy_percent
from .amdsmi_interface import amdsmi_get_pcie_info
from .amdsmi_interface import amdsmi_get_gpu_bad_page_info
from .amdsmi_interface import amdsmi_get_gpu_bad_page_threshold
from .amdsmi_interface import amdsmi_get_violation_status
from .amdsmi_interface import amdsmi_get_gpu_xgmi_link_status
from .amdsmi_interface import amdsmi_get_gpu_revision
# # Event Notification
from .amdsmi_interface import amdsmi_init_gpu_event_notification
from .amdsmi_interface import amdsmi_set_gpu_event_notification_mask
from .amdsmi_interface import amdsmi_get_gpu_event_notification
from .amdsmi_interface import amdsmi_stop_gpu_event_notification
# # Process Information
from .amdsmi_interface import amdsmi_get_gpu_process_list
@@ -132,6 +142,7 @@ from .amdsmi_interface import amdsmi_get_gpu_board_info
from .amdsmi_interface import amdsmi_get_gpu_ras_feature_info
from .amdsmi_interface import amdsmi_get_gpu_ras_block_features_enabled
from .amdsmi_interface import amdsmi_get_gpu_cper_entries
from .amdsmi_interface import amdsmi_gpu_validate_ras_eeprom
# # Unsupported Functions In Virtual Environment
from .amdsmi_interface import amdsmi_set_gpu_pci_bandwidth
@@ -150,9 +161,12 @@ from .amdsmi_interface import amdsmi_set_gpu_fan_speed
from .amdsmi_interface import amdsmi_reset_gpu_fan
from .amdsmi_interface import amdsmi_set_clk_freq
from .amdsmi_interface import amdsmi_set_gpu_overdrive_level
from .amdsmi_interface import amdsmi_get_soc_pstate
from .amdsmi_interface import amdsmi_set_soc_pstate
from .amdsmi_interface import amdsmi_set_xgmi_plpd
from .amdsmi_interface import amdsmi_get_xgmi_plpd
from .amdsmi_interface import amdsmi_clean_gpu_local_data
from .amdsmi_interface import amdsmi_get_gpu_process_isolation
from .amdsmi_interface import amdsmi_set_gpu_process_isolation
# # Physical State Queries
@@ -193,6 +207,7 @@ from .amdsmi_interface import amdsmi_get_gpu_compute_process_info_by_pid
from .amdsmi_interface import amdsmi_get_gpu_compute_process_gpus
from .amdsmi_interface import amdsmi_gpu_xgmi_error_status
from .amdsmi_interface import amdsmi_reset_gpu_xgmi_error
from .amdsmi_interface import amdsmi_get_esmi_err_msg
# # PCIE information
from .amdsmi_interface import amdsmi_get_gpu_bdf_id
@@ -255,6 +270,7 @@ from .amdsmi_interface import amdsmi_get_lib_version
from .amdsmi_interface import amdsmi_get_rocm_version
# # Enums
from .amdsmi_interface import AmdSmiStatus
from .amdsmi_interface import AmdSmiInitFlags
from .amdsmi_interface import AmdSmiContainerTypes
from .amdsmi_interface import AmdSmiDeviceType
Filskillnaden har hållits tillbaka eftersom den är för stor Load Diff
+2 -4
Visa fil
@@ -63,7 +63,6 @@
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi/rocm_smi_kfd.h"
// a global instance of std::mutex to protect data passed during threads
std::mutex myMutex;
@@ -495,8 +494,7 @@ amdsmi_status_t amdsmi_get_processor_count_from_handles(amdsmi_processor_handle*
}
for (uint32_t i = 0; i < *processor_count; i++) {
amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i],
&processor_type);
amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i], &processor_type);
if (r != AMDSMI_STATUS_SUCCESS) return r;
if(processor_type == AMDSMI_PROCESSOR_TYPE_AMD_CPU) {
@@ -546,7 +544,7 @@ amdsmi_status_t amdsmi_get_processor_handles_by_type(amdsmi_socket_handle socket
#endif
amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle ,
amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle,
processor_type_t* processor_type) {
AMDSMI_CHECK_INIT();
Filskillnaden har hållits tillbaka eftersom den är för stor Load Diff