[SWDEV-531902] python docs need exception type updated (#1895)
* add parameter checks * remove AmdSmiRetryException and AMDSMI_STATUS_RETRY * remove bdf exception * revert retry exception * add parameter checks * remove AmdSmiRetryException and AMDSMI_STATUS_RETRY * remove bdf exception * revert retry exception * wip * wip * add missing error codes * wip * Updated amdsmi-py-api.md file and amdsmi_exception.py * Updated amdsmi-py-api.md file * "Deleted backup related files" * updated amdsmi_interface.py file * amdsmi_interface.py file changes * updated amdsmi_interface.py file to fix check issues * updated amdsmi-py-api.md file * Reverted AmdSmiBdfFormatException definition --------- Co-authored-by: Oosman Saeed <oossaeed@amd.com> Co-authored-by: ssaka_amdeng <SitharamMurthy.Saka@amd.com> Co-authored-by: systems-assistant[bot] <systems-assistant[bot]@users.noreply.github.com> Co-authored-by: gabrpham <Gabriel.Pham@amd.com>
このコミットが含まれているのは:
@@ -125,26 +125,34 @@ Exceptions that can be thrown by AMD SMI are:
|
||||
When this exception is thrown, `err_code` and `err_info` are set. `err_code` is an integer that corresponds to errors that can occur
|
||||
in amdsmi-lib and `err_info` is a string that explains the error that occurred.
|
||||
|
||||
For example:
|
||||
|
||||
```python
|
||||
try:
|
||||
num_of_GPUs = len(amdsmi_get_processor_handles())
|
||||
if num_of_GPUs == 0:
|
||||
print("No GPUs on machine")
|
||||
except AmdSmiException as e:
|
||||
print("Error code: {}".format(e.err_code))
|
||||
if e.err_code == amdsmi_wrapper.AMDSMI_STATUS_RETRY:
|
||||
print("Error info: {}".format(e.err_info))
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
print("Unable to get processor handles, error: {} {}".format(str(e.get_error_code()), e.err_info))
|
||||
```
|
||||
|
||||
* `AmdSmiRetryException` : Derives `AmdSmiLibraryException` class and signals
|
||||
device is busy and call should be retried.
|
||||
* `AmdSmiTimeoutException` : Derives `AmdSmiLibraryException` class and
|
||||
represents that call had timed out.
|
||||
|
||||
* `AmdSmiParameterException`: Derives base `AmdSmiException` class and
|
||||
represents errors related to invaild parameters passed to functions. When this
|
||||
exception is thrown, `err_msg` is set and it explains what is the actual and
|
||||
expected type of the parameters.
|
||||
* `AmdSmiBdfFormatException`: Derives base `AmdSmiException` class and
|
||||
represents invalid bdf format.
|
||||
|
||||
For example:
|
||||
|
||||
```python
|
||||
try:
|
||||
processor_handles = amdsmi_get_cpusocket_handles()
|
||||
if len(processor_handles) == 0:
|
||||
print("No CPU sockets on machine")
|
||||
else:
|
||||
for processor in processor_handles:
|
||||
temperature = amdsmi_get_cpu_socket_temperature(processor)
|
||||
print(temperature)
|
||||
except amdsmi_exception.AmdSmiParameterException as e:
|
||||
print("Invalid parameter error: {} {}".format(str(e.get_error_code()), e.err_msg))
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
print("Unable to get processor handles, error: {} {}".format(str(e.get_error_code()), e.err_info))
|
||||
```
|
||||
|
||||
|
||||
|
||||
ファイル差分が大きすぎるため省略します
差分を読み込み
@@ -319,6 +319,5 @@ from .amdsmi_exception import AmdSmiLibraryException
|
||||
from .amdsmi_exception import AmdSmiRetryException
|
||||
from .amdsmi_exception import AmdSmiParameterException
|
||||
from .amdsmi_exception import AmdSmiKeyException
|
||||
from .amdsmi_exception import AmdSmiBdfFormatException
|
||||
from .amdsmi_exception import AmdSmiTimeoutException
|
||||
from .amdsmi_exception import AmdSmiException
|
||||
|
||||
@@ -79,6 +79,7 @@ class AmdSmiLibraryException(AmdSmiException):
|
||||
amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT : "AMDSMI_STATUS_NOT_INIT - Device not initialized",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_NO_SLOT : "AMDSMI_STATUS_NO_SLOT - No more free slot",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED : "AMDSMI_STATUS_DRIVER_NOT_LOADED - Driver not loaded",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_MORE_DATA : "AMDSMI_STATUS_MORE_DATA - There is more data than the buffer size the user passed",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_NO_DATA : "AMDSMI_STATUS_NO_DATA - No data was found for given input",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_INSUFFICIENT_SIZE : "AMDSMI_STATUS_INSUFFICIENT_SIZE - Insufficient size for operation",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_SIZE : "AMDSMI_STATUS_UNEXPECTED_SIZE - unexpected size of data was read",
|
||||
@@ -93,10 +94,10 @@ class AmdSmiLibraryException(AmdSmiException):
|
||||
amdsmi_wrapper.AMDSMI_STATUS_NO_DRV : "AMDSMI_STATUS_NO_DRV - No Energy and HSMP driver present",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_FILE_NOT_FOUND : "AMDSMI_STATUS_FILE_NOT_FOUND - File or directory not found",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL : "AMDSMI_STATUS_ARG_PTR_NULL - Parsed argument is invalid",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR: "AMDSMI_STATUS_AMDGPU_RESTART_ERR - AMDGPU restart failed, please check dmsg for errors",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE: "AMDSMI_STATUS_SETTING_UNAVAILABLE - Setting is not available",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM: "AMDSMI_STATUS_CORRUPTED_EEPROM - Setting is not available",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR : "AMDSMI_STATUS_UNKNOWN_ERROR - An unknown error occurred"
|
||||
}
|
||||
|
||||
@@ -146,7 +147,6 @@ class AmdSmiKeyException(AmdSmiException):
|
||||
def __str__(self):
|
||||
return self.err_msg
|
||||
|
||||
|
||||
class AmdSmiBdfFormatException(AmdSmiException):
|
||||
def __init__(self, bdf):
|
||||
super().__init__()
|
||||
@@ -161,4 +161,4 @@ class AmdSmiBdfFormatException(AmdSmiException):
|
||||
+ "\t<bus> is 2 hex digits long from 00-FF interval\n"
|
||||
+ "\t<device> is 2 hex digits long from 00-1F interval\n"
|
||||
+ "\t<function> is 1 hex digit long from 0-7 interval"
|
||||
).format(self.bdf)
|
||||
).format(self.bdf)
|
||||
@@ -1728,6 +1728,10 @@ def amdsmi_get_cpu_current_io_bandwidth(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(encoding, int):
|
||||
raise AmdSmiParameterException(encoding, int)
|
||||
if not isinstance(link_name, str):
|
||||
raise AmdSmiParameterException(link_name, str)
|
||||
|
||||
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
|
||||
link.bw_type = ctypes.c_uint32(encoding)
|
||||
@@ -1750,6 +1754,10 @@ def amdsmi_get_cpu_current_xgmi_bw(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(encoding, int):
|
||||
raise AmdSmiParameterException(encoding, int)
|
||||
if not isinstance(link_name, str):
|
||||
raise AmdSmiParameterException(link_name, str)
|
||||
|
||||
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
|
||||
link.bw_type = ctypes.c_uint32(encoding)
|
||||
@@ -2304,6 +2312,8 @@ def amdsmi_get_gpu_reg_table_info(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(reg_type, AmdSmiRegType):
|
||||
raise AmdSmiParameterException(reg_type, AmdSmiRegType)
|
||||
|
||||
reg_metrics = POINTER(amdsmi_wrapper.amdsmi_name_value_t)()
|
||||
num_regs = ctypes.c_uint32(0)
|
||||
@@ -2709,16 +2719,6 @@ def amdsmi_get_gpu_total_ecc_count(
|
||||
"deferred_count": ec.deferred_count,
|
||||
}
|
||||
|
||||
def notifyTypeToString(notify_type_b):
|
||||
idx = 0
|
||||
guid = []
|
||||
for i in notify_type_b:
|
||||
guid.append(format(i, '02x'))
|
||||
if idx == 7:
|
||||
break
|
||||
idx = idx +1
|
||||
return "".join(guid[::-1])
|
||||
|
||||
def amdsmi_get_gpu_cper_entries(
|
||||
processor_handle: processor_handle_t,
|
||||
severity_mask: int,
|
||||
@@ -2730,6 +2730,12 @@ def amdsmi_get_gpu_cper_entries(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(severity_mask, int):
|
||||
raise AmdSmiParameterException(severity_mask, int)
|
||||
if not isinstance(buffer_size, int):
|
||||
raise AmdSmiParameterException(buffer_size, int)
|
||||
if not isinstance(cursor, int):
|
||||
raise AmdSmiParameterException(cursor, int)
|
||||
|
||||
# Allocate a buffer for CPER data.
|
||||
buf = ctypes.create_string_buffer(buffer_size)
|
||||
@@ -2840,6 +2846,11 @@ def amdsmi_get_afids_from_cper(
|
||||
"bytes": list(cper_afid_data),
|
||||
"size": len(cper_afid_data)
|
||||
}]
|
||||
elif isinstance(cper_afid_data, List[Dict[str, Any]]):
|
||||
cper_records = cper_afid_data
|
||||
else:
|
||||
raise AmdSmiParameterException(cper_afid_data, bytes)
|
||||
|
||||
all_afids: List[int] = []
|
||||
|
||||
for record in cper_records:
|
||||
@@ -3229,7 +3240,6 @@ def amdsmi_get_processor_handle_from_bdf(bdf):
|
||||
amdsmi_bdf, ctypes.byref(processor_handle)))
|
||||
return processor_handle
|
||||
|
||||
|
||||
def amdsmi_get_gpu_vendor_name(
|
||||
processor_handle: processor_handle_t,
|
||||
) -> str:
|
||||
@@ -3520,7 +3530,6 @@ def amdsmi_is_P2P_accessible(
|
||||
|
||||
return accessible.value
|
||||
|
||||
|
||||
def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle_t):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
@@ -4035,6 +4044,8 @@ def amdsmi_set_soc_pstate(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(policy_id, int):
|
||||
raise AmdSmiParameterException(policy_id, int)
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_soc_pstate(
|
||||
processor_handle, policy_id
|
||||
@@ -4050,6 +4061,8 @@ def amdsmi_set_xgmi_plpd(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(policy_id, int):
|
||||
raise AmdSmiParameterException(policy_id, int)
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_xgmi_plpd(
|
||||
processor_handle, policy_id
|
||||
@@ -4066,6 +4079,8 @@ def amdsmi_set_gpu_process_isolation(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(pisolate, int):
|
||||
raise AmdSmiParameterException(pisolate, int)
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_process_isolation(
|
||||
processor_handle, pisolate
|
||||
@@ -4326,6 +4341,10 @@ def amdsmi_set_gpu_clk_limit(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(clk_type, str):
|
||||
raise AmdSmiParameterException(clk_type, str)
|
||||
if not isinstance(limit_type, str):
|
||||
raise AmdSmiParameterException(limit_type, str)
|
||||
if not isinstance(value, int):
|
||||
raise AmdSmiParameterException(value, int)
|
||||
if clk_type.lower() == "sclk":
|
||||
@@ -5510,6 +5529,13 @@ def amdsmi_get_link_topology_nearest(
|
||||
link_type: AmdSmiLinkType,
|
||||
)-> Dict[str, Any]:
|
||||
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
if not isinstance(link_type, AmdSmiLinkType):
|
||||
raise AmdSmiParameterException(link_type, AmdSmiLinkType)
|
||||
|
||||
topology_nearest_list = amdsmi_wrapper.amdsmi_topology_nearest_t()
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_link_topology_nearest(
|
||||
@@ -5532,6 +5558,11 @@ def amdsmi_get_gpu_virtualization_mode(
|
||||
processor_handle: processor_handle_t
|
||||
) -> Dict[str, AmdSmiVirtualizationMode]:
|
||||
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
# make info struct here
|
||||
mode = amdsmi_wrapper.amdsmi_virtualization_mode_t()
|
||||
|
||||
@@ -5896,5 +5927,4 @@ def amdsmi_get_gpu_busy_percent(processor_handle: processor_handle_t):
|
||||
|
||||
gpu_busy_percent = ctypes.c_uint32(0)
|
||||
_check_res(amdsmi_wrapper.amdsmi_get_gpu_busy_percent(processor_handle, ctypes.byref(gpu_busy_percent)))
|
||||
return gpu_busy_percent.value
|
||||
|
||||
return gpu_busy_percent.value
|
||||
@@ -304,7 +304,6 @@ amdsmi_status_t__enumvalues = {
|
||||
6: 'AMDSMI_STATUS_DRM_ERROR',
|
||||
7: 'AMDSMI_STATUS_API_FAILED',
|
||||
8: 'AMDSMI_STATUS_TIMEOUT',
|
||||
9: 'AMDSMI_STATUS_RETRY',
|
||||
10: 'AMDSMI_STATUS_NO_PERM',
|
||||
11: 'AMDSMI_STATUS_INTERRUPT',
|
||||
12: 'AMDSMI_STATUS_IO',
|
||||
@@ -3334,7 +3333,7 @@ __all__ = \
|
||||
'AMDSMI_STATUS_NO_HSMP_SUP', 'AMDSMI_STATUS_NO_MSR_DRV',
|
||||
'AMDSMI_STATUS_NO_PERM', 'AMDSMI_STATUS_NO_SLOT',
|
||||
'AMDSMI_STATUS_OUT_OF_RESOURCES',
|
||||
'AMDSMI_STATUS_REFCOUNT_OVERFLOW', 'AMDSMI_STATUS_RETRY',
|
||||
'AMDSMI_STATUS_REFCOUNT_OVERFLOW',
|
||||
'AMDSMI_STATUS_SETTING_UNAVAILABLE', 'AMDSMI_STATUS_SUCCESS',
|
||||
'AMDSMI_STATUS_TIMEOUT', 'AMDSMI_STATUS_UNEXPECTED_DATA',
|
||||
'AMDSMI_STATUS_UNEXPECTED_SIZE', 'AMDSMI_STATUS_UNKNOWN_ERROR',
|
||||
|
||||
@@ -156,6 +156,8 @@ static auto amdsmi_read_cper_file(const std::string &filepath) -> CperFileCtx {
|
||||
GUID_INIT(0xDC3EA0B0, 0xA144, 0x4797, 0xB9, 0x5B, 0x53, 0xFA, \
|
||||
0x24, 0x2B, 0x6E, 0x1D)
|
||||
|
||||
static amdsmi_cper_guid_t mce = CPER_NOTIFY_MCE;
|
||||
static amdsmi_cper_guid_t cmc = CPER_NOTIFY_CMC;
|
||||
static amdsmi_cper_guid_t bt = BOOT_TYPE;
|
||||
static amdsmi_cper_guid_t cr = AMD_OOB_CRASHDUMP;
|
||||
static amdsmi_cper_guid_t nonstd = AMD_GPU_NONSTANDARD_ERROR;
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする