[SWDEV-531902] python docs need exception type updated (#1895)

* add parameter checks

* remove AmdSmiRetryException and AMDSMI_STATUS_RETRY

* remove bdf exception

* revert retry exception

* add parameter checks

* remove AmdSmiRetryException and AMDSMI_STATUS_RETRY

* remove bdf exception

* revert retry exception

* wip

* wip

* add missing error codes

* wip

* Updated amdsmi-py-api.md file and amdsmi_exception.py

* Updated amdsmi-py-api.md file

* "Deleted backup related files"

* updated amdsmi_interface.py file

* amdsmi_interface.py file changes

* updated amdsmi_interface.py file to fix check issues

* updated amdsmi-py-api.md file

* Reverted AmdSmiBdfFormatException definition

---------

Co-authored-by: Oosman Saeed <oossaeed@amd.com>
Co-authored-by: ssaka_amdeng <SitharamMurthy.Saka@amd.com>
Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com>
Co-authored-by: gabrpham <Gabriel.Pham@amd.com>
このコミットが含まれているのは:
systems-assistant[bot]
2025-12-08 12:57:23 -06:00
committed by GitHub
コミット eb357fcd45
7個のファイルの変更1333行の追加191行の削除
+22 -14
ファイルの表示
@@ -125,26 +125,34 @@ Exceptions that can be thrown by AMD SMI are:
When this exception is thrown, `err_code` and `err_info` are set. `err_code` is an integer that corresponds to errors that can occur
in amdsmi-lib and `err_info` is a string that explains the error that occurred.
For example:
```python
try:
num_of_GPUs = len(amdsmi_get_processor_handles())
if num_of_GPUs == 0:
print("No GPUs on machine")
except AmdSmiException as e:
print("Error code: {}".format(e.err_code))
if e.err_code == amdsmi_wrapper.AMDSMI_STATUS_RETRY:
print("Error info: {}".format(e.err_info))
except amdsmi_exception.AmdSmiLibraryException as e:
print("Unable to get processor handles, error: {} {}".format(str(e.get_error_code()), e.err_info))
```
* `AmdSmiRetryException` : Derives `AmdSmiLibraryException` class and signals
device is busy and call should be retried.
* `AmdSmiTimeoutException` : Derives `AmdSmiLibraryException` class and
represents that call had timed out.
* `AmdSmiParameterException`: Derives base `AmdSmiException` class and
represents errors related to invaild parameters passed to functions. When this
exception is thrown, `err_msg` is set and it explains what is the actual and
expected type of the parameters.
* `AmdSmiBdfFormatException`: Derives base `AmdSmiException` class and
represents invalid bdf format.
For example:
```python
try:
processor_handles = amdsmi_get_cpusocket_handles()
if len(processor_handles) == 0:
print("No CPU sockets on machine")
else:
for processor in processor_handles:
temperature = amdsmi_get_cpu_socket_temperature(processor)
print(temperature)
except amdsmi_exception.AmdSmiParameterException as e:
print("Invalid parameter error: {} {}".format(str(e.get_error_code()), e.err_msg))
except amdsmi_exception.AmdSmiLibraryException as e:
print("Unable to get processor handles, error: {} {}".format(str(e.get_error_code()), e.err_info))
```
ファイル差分が大きすぎるため省略します 差分を読み込み
-1
ファイルの表示
@@ -319,6 +319,5 @@ from .amdsmi_exception import AmdSmiLibraryException
from .amdsmi_exception import AmdSmiRetryException
from .amdsmi_exception import AmdSmiParameterException
from .amdsmi_exception import AmdSmiKeyException
from .amdsmi_exception import AmdSmiBdfFormatException
from .amdsmi_exception import AmdSmiTimeoutException
from .amdsmi_exception import AmdSmiException
+3 -3
ファイルの表示
@@ -79,6 +79,7 @@ class AmdSmiLibraryException(AmdSmiException):
amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT : "AMDSMI_STATUS_NOT_INIT - Device not initialized",
amdsmi_wrapper.AMDSMI_STATUS_NO_SLOT : "AMDSMI_STATUS_NO_SLOT - No more free slot",
amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED : "AMDSMI_STATUS_DRIVER_NOT_LOADED - Driver not loaded",
amdsmi_wrapper.AMDSMI_STATUS_MORE_DATA : "AMDSMI_STATUS_MORE_DATA - There is more data than the buffer size the user passed",
amdsmi_wrapper.AMDSMI_STATUS_NO_DATA : "AMDSMI_STATUS_NO_DATA - No data was found for given input",
amdsmi_wrapper.AMDSMI_STATUS_INSUFFICIENT_SIZE : "AMDSMI_STATUS_INSUFFICIENT_SIZE - Insufficient size for operation",
amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_SIZE : "AMDSMI_STATUS_UNEXPECTED_SIZE - unexpected size of data was read",
@@ -93,10 +94,10 @@ class AmdSmiLibraryException(AmdSmiException):
amdsmi_wrapper.AMDSMI_STATUS_NO_DRV : "AMDSMI_STATUS_NO_DRV - No Energy and HSMP driver present",
amdsmi_wrapper.AMDSMI_STATUS_FILE_NOT_FOUND : "AMDSMI_STATUS_FILE_NOT_FOUND - File or directory not found",
amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL : "AMDSMI_STATUS_ARG_PTR_NULL - Parsed argument is invalid",
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR: "AMDSMI_STATUS_AMDGPU_RESTART_ERR - AMDGPU restart failed, please check dmsg for errors",
amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE: "AMDSMI_STATUS_SETTING_UNAVAILABLE - Setting is not available",
amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM: "AMDSMI_STATUS_CORRUPTED_EEPROM - Setting is not available",
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR : "AMDSMI_STATUS_UNKNOWN_ERROR - An unknown error occurred"
}
@@ -146,7 +147,6 @@ class AmdSmiKeyException(AmdSmiException):
def __str__(self):
return self.err_msg
class AmdSmiBdfFormatException(AmdSmiException):
def __init__(self, bdf):
super().__init__()
@@ -161,4 +161,4 @@ class AmdSmiBdfFormatException(AmdSmiException):
+ "\t<bus> is 2 hex digits long from 00-FF interval\n"
+ "\t<device> is 2 hex digits long from 00-1F interval\n"
+ "\t<function> is 1 hex digit long from 0-7 interval"
).format(self.bdf)
).format(self.bdf)
+44 -14
ファイルの表示
@@ -1728,6 +1728,10 @@ def amdsmi_get_cpu_current_io_bandwidth(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(encoding, int):
raise AmdSmiParameterException(encoding, int)
if not isinstance(link_name, str):
raise AmdSmiParameterException(link_name, str)
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
link.bw_type = ctypes.c_uint32(encoding)
@@ -1750,6 +1754,10 @@ def amdsmi_get_cpu_current_xgmi_bw(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(encoding, int):
raise AmdSmiParameterException(encoding, int)
if not isinstance(link_name, str):
raise AmdSmiParameterException(link_name, str)
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
link.bw_type = ctypes.c_uint32(encoding)
@@ -2304,6 +2312,8 @@ def amdsmi_get_gpu_reg_table_info(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(reg_type, AmdSmiRegType):
raise AmdSmiParameterException(reg_type, AmdSmiRegType)
reg_metrics = POINTER(amdsmi_wrapper.amdsmi_name_value_t)()
num_regs = ctypes.c_uint32(0)
@@ -2709,16 +2719,6 @@ def amdsmi_get_gpu_total_ecc_count(
"deferred_count": ec.deferred_count,
}
def notifyTypeToString(notify_type_b):
idx = 0
guid = []
for i in notify_type_b:
guid.append(format(i, '02x'))
if idx == 7:
break
idx = idx +1
return "".join(guid[::-1])
def amdsmi_get_gpu_cper_entries(
processor_handle: processor_handle_t,
severity_mask: int,
@@ -2730,6 +2730,12 @@ def amdsmi_get_gpu_cper_entries(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(severity_mask, int):
raise AmdSmiParameterException(severity_mask, int)
if not isinstance(buffer_size, int):
raise AmdSmiParameterException(buffer_size, int)
if not isinstance(cursor, int):
raise AmdSmiParameterException(cursor, int)
# Allocate a buffer for CPER data.
buf = ctypes.create_string_buffer(buffer_size)
@@ -2840,6 +2846,11 @@ def amdsmi_get_afids_from_cper(
"bytes": list(cper_afid_data),
"size": len(cper_afid_data)
}]
elif isinstance(cper_afid_data, List[Dict[str, Any]]):
cper_records = cper_afid_data
else:
raise AmdSmiParameterException(cper_afid_data, bytes)
all_afids: List[int] = []
for record in cper_records:
@@ -3229,7 +3240,6 @@ def amdsmi_get_processor_handle_from_bdf(bdf):
amdsmi_bdf, ctypes.byref(processor_handle)))
return processor_handle
def amdsmi_get_gpu_vendor_name(
processor_handle: processor_handle_t,
) -> str:
@@ -3520,7 +3530,6 @@ def amdsmi_is_P2P_accessible(
return accessible.value
def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle_t):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
@@ -4035,6 +4044,8 @@ def amdsmi_set_soc_pstate(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(policy_id, int):
raise AmdSmiParameterException(policy_id, int)
_check_res(
amdsmi_wrapper.amdsmi_set_soc_pstate(
processor_handle, policy_id
@@ -4050,6 +4061,8 @@ def amdsmi_set_xgmi_plpd(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(policy_id, int):
raise AmdSmiParameterException(policy_id, int)
_check_res(
amdsmi_wrapper.amdsmi_set_xgmi_plpd(
processor_handle, policy_id
@@ -4066,6 +4079,8 @@ def amdsmi_set_gpu_process_isolation(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(pisolate, int):
raise AmdSmiParameterException(pisolate, int)
_check_res(
amdsmi_wrapper.amdsmi_set_gpu_process_isolation(
processor_handle, pisolate
@@ -4326,6 +4341,10 @@ def amdsmi_set_gpu_clk_limit(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(clk_type, str):
raise AmdSmiParameterException(clk_type, str)
if not isinstance(limit_type, str):
raise AmdSmiParameterException(limit_type, str)
if not isinstance(value, int):
raise AmdSmiParameterException(value, int)
if clk_type.lower() == "sclk":
@@ -5510,6 +5529,13 @@ def amdsmi_get_link_topology_nearest(
link_type: AmdSmiLinkType,
)-> Dict[str, Any]:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(link_type, AmdSmiLinkType):
raise AmdSmiParameterException(link_type, AmdSmiLinkType)
topology_nearest_list = amdsmi_wrapper.amdsmi_topology_nearest_t()
_check_res(
amdsmi_wrapper.amdsmi_get_link_topology_nearest(
@@ -5532,6 +5558,11 @@ def amdsmi_get_gpu_virtualization_mode(
processor_handle: processor_handle_t
) -> Dict[str, AmdSmiVirtualizationMode]:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
# make info struct here
mode = amdsmi_wrapper.amdsmi_virtualization_mode_t()
@@ -5896,5 +5927,4 @@ def amdsmi_get_gpu_busy_percent(processor_handle: processor_handle_t):
gpu_busy_percent = ctypes.c_uint32(0)
_check_res(amdsmi_wrapper.amdsmi_get_gpu_busy_percent(processor_handle, ctypes.byref(gpu_busy_percent)))
return gpu_busy_percent.value
return gpu_busy_percent.value
+1 -2
ファイルの表示
@@ -304,7 +304,6 @@ amdsmi_status_t__enumvalues = {
6: 'AMDSMI_STATUS_DRM_ERROR',
7: 'AMDSMI_STATUS_API_FAILED',
8: 'AMDSMI_STATUS_TIMEOUT',
9: 'AMDSMI_STATUS_RETRY',
10: 'AMDSMI_STATUS_NO_PERM',
11: 'AMDSMI_STATUS_INTERRUPT',
12: 'AMDSMI_STATUS_IO',
@@ -3334,7 +3333,7 @@ __all__ = \
'AMDSMI_STATUS_NO_HSMP_SUP', 'AMDSMI_STATUS_NO_MSR_DRV',
'AMDSMI_STATUS_NO_PERM', 'AMDSMI_STATUS_NO_SLOT',
'AMDSMI_STATUS_OUT_OF_RESOURCES',
'AMDSMI_STATUS_REFCOUNT_OVERFLOW', 'AMDSMI_STATUS_RETRY',
'AMDSMI_STATUS_REFCOUNT_OVERFLOW',
'AMDSMI_STATUS_SETTING_UNAVAILABLE', 'AMDSMI_STATUS_SUCCESS',
'AMDSMI_STATUS_TIMEOUT', 'AMDSMI_STATUS_UNEXPECTED_DATA',
'AMDSMI_STATUS_UNEXPECTED_SIZE', 'AMDSMI_STATUS_UNKNOWN_ERROR',
+2
ファイルの表示
@@ -156,6 +156,8 @@ static auto amdsmi_read_cper_file(const std::string &filepath) -> CperFileCtx {
GUID_INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \
0x24, 0x2B, 0x6E, 0x1D)
static amdsmi_cper_guid_t mce = CPER_NOTIFY_MCE;
static amdsmi_cper_guid_t cmc = CPER_NOTIFY_CMC;
static amdsmi_cper_guid_t bt = BOOT_TYPE;
static amdsmi_cper_guid_t cr = AMD_OOB_CRASHDUMP;
static amdsmi_cper_guid_t nonstd = AMD_GPU_NONSTANDARD_ERROR;