[SWDEV-539078] Add missing API definitions to python interface (#525)
Added the following API's to amdsmi_interface.py. amdsmi_get_cpu_handle() amdsmi_get_esmi_err_msg() amdsmi_get_gpu_event_notification() amdsmi_get_processor_count_from_handles() amdsmi_get_processor_handles_by_type() amdsmi_gpu_validate_ras_eeprom() amdsmi_init_gpu_event_notification() amdsmi_set_gpu_event_notification_mask() amdsmi_stop_gpu_event_notification() amdsmi_get_gpu_busy_percent() Added additional return value to API amdsmi_get_xgmi_plpd(). The entry policies is added to the end of the dictionary to match API definition. The entry plpds is marked for deprecation as it has the same information as policies. --------- Signed-off-by: josnarlo <Joseph.Narlo@amd.com> Signed-off-by: Maisam Arif <Maisam.Arif@amd.com>
This commit is contained in:
committad av
GitHub
förälder
faf0024135
incheckning
7decbc67a1
+19
@@ -8,6 +8,22 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
|
||||
|
||||
### Added
|
||||
|
||||
- **Added the following API's to amdsmi_interface.py**.
|
||||
- amdsmi_get_cpu_handle()
|
||||
- amdsmi_get_esmi_err_msg()
|
||||
- amdsmi_get_gpu_event_notification()
|
||||
- amdsmi_get_processor_count_from_handles()
|
||||
- amdsmi_get_processor_handles_by_type()
|
||||
- amdsmi_gpu_validate_ras_eeprom()
|
||||
- amdsmi_init_gpu_event_notification()
|
||||
- amdsmi_set_gpu_event_notification_mask()
|
||||
- amdsmi_stop_gpu_event_notification()
|
||||
- amdsmi_get_gpu_busy_percent()
|
||||
|
||||
- **Added additional return value to API amdsmi_get_xgmi_plpd()**.
|
||||
- The entry `policies` is added to the end of the dictionary to match API definition.
|
||||
- The entry `plpds` is marked for deprecation as it has the same information as `policies`.
|
||||
|
||||
- **Added `amdsmi_get_gpu_revision()` to Python API**
|
||||
- This function retrieves the GPU revision ID. Available in `amdsmi_interface.py` as `amdsmi_get_gpu_revision()`.
|
||||
|
||||
@@ -190,6 +206,9 @@ GPU: 0
|
||||
|
||||
### Resolved Issues
|
||||
|
||||
- **Fixed an issue where amdsmi_get_gpu_od_volt_info() returned a reference to a python object**.
|
||||
- The returned dictionary was changed to return values in all fields
|
||||
|
||||
- **Fixed an issue where using `amd-smi ras --folder <folder_name>` was forcing the created folder's name to be lowercase**.
|
||||
- This fix also allows all string input options to be case insensitive.
|
||||
|
||||
|
||||
@@ -99,6 +99,19 @@ class AMDSMICommands():
|
||||
logging.error('Unable to detect any CPU devices, check amd_hsmp version and module status (sudo modprobe amd_hsmp)')
|
||||
exit_flag = True
|
||||
|
||||
self.convert_clock_type = {
|
||||
"sys": amdsmi_interface.AmdSmiClkType.SYS,
|
||||
"mem": amdsmi_interface.AmdSmiClkType.MEM,
|
||||
"df": amdsmi_interface.AmdSmiClkType.DF,
|
||||
"soc": amdsmi_interface.AmdSmiClkType.SOC,
|
||||
"dcef": amdsmi_interface.AmdSmiClkType.DCEF,
|
||||
# vclk and dclk currently do not support levels so average clk is given for frequency levels
|
||||
"vclk0": amdsmi_interface.AmdSmiClkType.VCLK0,
|
||||
"vclk1": amdsmi_interface.AmdSmiClkType.VCLK1,
|
||||
"dclk0": amdsmi_interface.AmdSmiClkType.DCLK0,
|
||||
"dclk1": amdsmi_interface.AmdSmiClkType.DCLK1
|
||||
}
|
||||
|
||||
if exit_flag:
|
||||
version_args = argparse.Namespace()
|
||||
version_args.gpu_version = False
|
||||
@@ -1041,28 +1054,9 @@ class AMDSMICommands():
|
||||
for clk in list(clk_dict.keys()):
|
||||
if clk not in args.clock:
|
||||
del clk_dict[clk]
|
||||
|
||||
for clk in args.clock:
|
||||
clk_type = clk.lower()
|
||||
if clk_type == "sys":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.SYS
|
||||
elif clk_type == "mem":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.MEM
|
||||
elif clk_type == "df":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DF
|
||||
elif clk_type == "soc":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.SOC
|
||||
elif clk_type == "dcef":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCEF
|
||||
# vclk and dclk currently do not support levels so average clk is given for frequency levels
|
||||
elif clk_type == "vclk0":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK0
|
||||
elif clk_type == "vclk1":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.VCLK1
|
||||
elif clk_type == "dclk0":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK0
|
||||
elif clk_type == "dclk1":
|
||||
clk_type_conversion = amdsmi_interface.AmdSmiClkType.DCLK1
|
||||
if clk in self.convert_clock_type:
|
||||
clk_type_conversion = self.convert_clock_type[clk]
|
||||
else:
|
||||
clk_type_conversion = "N/A"
|
||||
output_format = self.helpers.get_output_format()
|
||||
@@ -2471,10 +2465,10 @@ class AMDSMICommands():
|
||||
# Populate voltage point values
|
||||
for point in range(amdsmi_interface.AMDSMI_NUM_VOLTAGE_CURVE_POINTS):
|
||||
if isinstance(od_volt, dict):
|
||||
logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point].frequency}")
|
||||
logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point].voltage}")
|
||||
frequency = int(od_volt["curve.vc_points"][point].frequency / 1000000)
|
||||
voltage = int(od_volt["curve.vc_points"][point].voltage)
|
||||
logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point]['frequency']}")
|
||||
logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point]['voltage']}")
|
||||
frequency = int(od_volt["curve.vc_points"][point]['frequency'] / 1000000)
|
||||
voltage = int(od_volt["curve.vc_points"][point]['voltage'])
|
||||
else:
|
||||
frequency = "N/A"
|
||||
voltage = "N/A"
|
||||
@@ -4875,6 +4869,11 @@ class AMDSMICommands():
|
||||
return
|
||||
else:
|
||||
# For non-pcie clocks
|
||||
if clk_type in self.convert_clock_type:
|
||||
clk_type_conversion = self.convert_clock_type[clk_type]
|
||||
else:
|
||||
clk_type_conversion = "N/A"
|
||||
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_clk_freq(args.gpu, clk_type, freq_bitmask)
|
||||
results_clk_lvl['set_clock'] = f"Successfully set {clk_type} perf level(s) to {perf_levels_str}"
|
||||
@@ -4959,6 +4958,7 @@ class AMDSMICommands():
|
||||
clk_tuple = amdsmi_interface.amdsmi_get_clock_info(args.gpu, amdsmi_clk_type)
|
||||
|
||||
if lim_type == "min":
|
||||
amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MIN
|
||||
if val > clk_tuple['max_clk']:
|
||||
self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} min value greater than max ({clk_tuple['max_clk']}MHz)")
|
||||
self.logger.print_output()
|
||||
@@ -4967,8 +4967,8 @@ class AMDSMICommands():
|
||||
|
||||
if val == clk_tuple['min_clk']:
|
||||
val_changed = False # Clock limit value did not changed
|
||||
|
||||
if lim_type == "max":
|
||||
elif lim_type == "max":
|
||||
amdsmi_lim_type = amdsmi_interface.AmdSmiClkLimitType.MAX
|
||||
if val < clk_tuple['min_clk']:
|
||||
self.logger.store_output(args.gpu, 'clk_limit', f"Cannot set {args.clk_limit.clk_type} max value less than min ({clk_tuple['min_clk']}MHz)")
|
||||
self.logger.print_output()
|
||||
|
||||
@@ -857,7 +857,7 @@ class AMDSMIHelpers():
|
||||
xgmi_plpd_info = amdsmi_interface.amdsmi_get_xgmi_plpd(dev)
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
continue
|
||||
for policy in xgmi_plpd_info['plpds']:
|
||||
for policy in xgmi_plpd_info['policies']:
|
||||
policy_string = f"{policy['policy_id']}: {policy['policy_description']}"
|
||||
if not policy_string in xgmi_plpd_profile_list:
|
||||
xgmi_plpd_profile_list.append(policy_string)
|
||||
|
||||
@@ -3325,7 +3325,7 @@ Field | Description
|
||||
---|---
|
||||
`num_supported` | The number of supported policies
|
||||
`current_id` | The current policy index
|
||||
`plpds` | List of policies.
|
||||
`policies` | List of policies. (`plpds` marked for deprecation in next major release)
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_get_xgmi_plpd` function:
|
||||
|
||||
|
||||
+17
-1
@@ -29,6 +29,8 @@ from .amdsmi_interface import amdsmi_get_processor_type
|
||||
from .amdsmi_interface import amdsmi_get_processor_handles
|
||||
from .amdsmi_interface import amdsmi_get_socket_handles
|
||||
from .amdsmi_interface import amdsmi_get_socket_info
|
||||
from .amdsmi_interface import amdsmi_get_processor_count_from_handles
|
||||
from .amdsmi_interface import amdsmi_get_processor_handles_by_type
|
||||
|
||||
# ESMI Dependent Functions
|
||||
try:
|
||||
@@ -78,6 +80,7 @@ try:
|
||||
from .amdsmi_interface import amdsmi_get_cpu_family
|
||||
from .amdsmi_interface import amdsmi_get_cpu_model
|
||||
from .amdsmi_interface import amdsmi_get_cpu_model_name
|
||||
from .amdsmi_interface import amdsmi_get_cpu_handles
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
@@ -101,6 +104,7 @@ from .amdsmi_interface import amdsmi_get_power_cap_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_vram_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_cache_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_xcd_counter
|
||||
from .amdsmi_interface import amdsmi_get_gpu_revision
|
||||
|
||||
# # Microcode and VBIOS Information
|
||||
from .amdsmi_interface import amdsmi_get_gpu_vbios_info
|
||||
@@ -111,13 +115,19 @@ from .amdsmi_interface import amdsmi_get_gpu_activity
|
||||
from .amdsmi_interface import amdsmi_get_gpu_vram_usage
|
||||
from .amdsmi_interface import amdsmi_get_power_info
|
||||
from .amdsmi_interface import amdsmi_get_clock_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_busy_percent
|
||||
|
||||
from .amdsmi_interface import amdsmi_get_pcie_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_bad_page_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_bad_page_threshold
|
||||
from .amdsmi_interface import amdsmi_get_violation_status
|
||||
from .amdsmi_interface import amdsmi_get_gpu_xgmi_link_status
|
||||
from .amdsmi_interface import amdsmi_get_gpu_revision
|
||||
|
||||
# # Event Notification
|
||||
from .amdsmi_interface import amdsmi_init_gpu_event_notification
|
||||
from .amdsmi_interface import amdsmi_set_gpu_event_notification_mask
|
||||
from .amdsmi_interface import amdsmi_get_gpu_event_notification
|
||||
from .amdsmi_interface import amdsmi_stop_gpu_event_notification
|
||||
|
||||
# # Process Information
|
||||
from .amdsmi_interface import amdsmi_get_gpu_process_list
|
||||
@@ -132,6 +142,7 @@ from .amdsmi_interface import amdsmi_get_gpu_board_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_ras_feature_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_ras_block_features_enabled
|
||||
from .amdsmi_interface import amdsmi_get_gpu_cper_entries
|
||||
from .amdsmi_interface import amdsmi_gpu_validate_ras_eeprom
|
||||
|
||||
# # Unsupported Functions In Virtual Environment
|
||||
from .amdsmi_interface import amdsmi_set_gpu_pci_bandwidth
|
||||
@@ -150,9 +161,12 @@ from .amdsmi_interface import amdsmi_set_gpu_fan_speed
|
||||
from .amdsmi_interface import amdsmi_reset_gpu_fan
|
||||
from .amdsmi_interface import amdsmi_set_clk_freq
|
||||
from .amdsmi_interface import amdsmi_set_gpu_overdrive_level
|
||||
from .amdsmi_interface import amdsmi_get_soc_pstate
|
||||
from .amdsmi_interface import amdsmi_set_soc_pstate
|
||||
from .amdsmi_interface import amdsmi_set_xgmi_plpd
|
||||
from .amdsmi_interface import amdsmi_get_xgmi_plpd
|
||||
from .amdsmi_interface import amdsmi_clean_gpu_local_data
|
||||
from .amdsmi_interface import amdsmi_get_gpu_process_isolation
|
||||
from .amdsmi_interface import amdsmi_set_gpu_process_isolation
|
||||
|
||||
# # Physical State Queries
|
||||
@@ -193,6 +207,7 @@ from .amdsmi_interface import amdsmi_get_gpu_compute_process_info_by_pid
|
||||
from .amdsmi_interface import amdsmi_get_gpu_compute_process_gpus
|
||||
from .amdsmi_interface import amdsmi_gpu_xgmi_error_status
|
||||
from .amdsmi_interface import amdsmi_reset_gpu_xgmi_error
|
||||
from .amdsmi_interface import amdsmi_get_esmi_err_msg
|
||||
|
||||
# # PCIE information
|
||||
from .amdsmi_interface import amdsmi_get_gpu_bdf_id
|
||||
@@ -255,6 +270,7 @@ from .amdsmi_interface import amdsmi_get_lib_version
|
||||
from .amdsmi_interface import amdsmi_get_rocm_version
|
||||
|
||||
# # Enums
|
||||
from .amdsmi_interface import AmdSmiStatus
|
||||
from .amdsmi_interface import AmdSmiInitFlags
|
||||
from .amdsmi_interface import AmdSmiContainerTypes
|
||||
from .amdsmi_interface import AmdSmiDeviceType
|
||||
|
||||
+583
-332
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -63,7 +63,6 @@
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
|
||||
|
||||
// a global instance of std::mutex to protect data passed during threads
|
||||
std::mutex myMutex;
|
||||
|
||||
@@ -495,8 +494,7 @@ amdsmi_status_t amdsmi_get_processor_count_from_handles(amdsmi_processor_handle*
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < *processor_count; i++) {
|
||||
amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i],
|
||||
&processor_type);
|
||||
amdsmi_status_t r = amdsmi_get_processor_type(processor_handles[i], &processor_type);
|
||||
if (r != AMDSMI_STATUS_SUCCESS) return r;
|
||||
|
||||
if(processor_type == AMDSMI_PROCESSOR_TYPE_AMD_CPU) {
|
||||
@@ -546,7 +544,7 @@ amdsmi_status_t amdsmi_get_processor_handles_by_type(amdsmi_socket_handle socket
|
||||
|
||||
#endif
|
||||
|
||||
amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle ,
|
||||
amdsmi_status_t amdsmi_get_processor_type(amdsmi_processor_handle processor_handle,
|
||||
processor_type_t* processor_type) {
|
||||
|
||||
AMDSMI_CHECK_INIT();
|
||||
|
||||
+2012
-2695
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
Referens i nytt ärende
Block a user