Renamed API amdsmi_dev_reset_xgmi_error
amdsmi_dev_reset_xgmi_error -> amdsmi_reset_gpu_xgmi_error
grep -rli 'amdsmi_dev_reset_xgmi_error' * | xargs -i@ sed -i
's/amdsmi_dev_reset_xgmi_error/amdsmi_reset_gpu_xgmi_error/g' @
Change-Id: Ic7e4c4b345fdf6187aed42d53fb7ae8536c2edea
[ROCm/amdsmi commit: 6256bf6f1a]
这个提交包含在:
@@ -3107,7 +3107,7 @@ amdsmi_gpu_xgmi_error_status(amdsmi_processor_handle processor_handle, amdsmi_xg
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_dev_reset_xgmi_error(amdsmi_processor_handle processor_handle);
|
||||
amdsmi_reset_gpu_xgmi_error(amdsmi_processor_handle processor_handle);
|
||||
|
||||
/** @} End SysInfo */
|
||||
|
||||
|
||||
@@ -2849,7 +2849,7 @@ except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
## amdsmi_dev_reset_xgmi_error
|
||||
## amdsmi_reset_gpu_xgmi_error
|
||||
Description: Reset the XGMI error status for a device
|
||||
|
||||
Input parameters:
|
||||
@@ -2857,7 +2857,7 @@ Input parameters:
|
||||
|
||||
Output: None
|
||||
|
||||
Exceptions that can be thrown by `amdsmi_dev_reset_xgmi_error` function:
|
||||
Exceptions that can be thrown by `amdsmi_reset_gpu_xgmi_error` function:
|
||||
* `AmdSmiLibraryException`
|
||||
* `AmdSmiRetryException`
|
||||
* `AmdSmiParameterException`
|
||||
@@ -2870,7 +2870,7 @@ try:
|
||||
print("No GPUs on machine")
|
||||
else:
|
||||
for device in devices:
|
||||
amdsmi_dev_reset_xgmi_error(device)
|
||||
amdsmi_reset_gpu_xgmi_error(device)
|
||||
except AmdSmiException as e:
|
||||
print(e)
|
||||
```
|
||||
|
||||
@@ -134,7 +134,7 @@ from .amdsmi_interface import amdsmi_get_gpu_compute_process_info
|
||||
from .amdsmi_interface import amdsmi_get_gpu_compute_process_info_by_pid
|
||||
from .amdsmi_interface import amdsmi_get_gpu_compute_process_gpus
|
||||
from .amdsmi_interface import amdsmi_gpu_xgmi_error_status
|
||||
from .amdsmi_interface import amdsmi_dev_reset_xgmi_error
|
||||
from .amdsmi_interface import amdsmi_reset_gpu_xgmi_error
|
||||
|
||||
# # PCIE information
|
||||
from .amdsmi_interface import amdsmi_get_gpu_pci_id
|
||||
|
||||
@@ -2620,7 +2620,7 @@ def amdsmi_gpu_xgmi_error_status(
|
||||
return AmdSmiXgmiStatus(status.value)
|
||||
|
||||
|
||||
def amdsmi_dev_reset_xgmi_error(
|
||||
def amdsmi_reset_gpu_xgmi_error(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
) -> None:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
@@ -2628,7 +2628,7 @@ def amdsmi_dev_reset_xgmi_error(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
_check_res(amdsmi_wrapper.amdsmi_dev_reset_xgmi_error(processor_handle))
|
||||
_check_res(amdsmi_wrapper.amdsmi_reset_gpu_xgmi_error(processor_handle))
|
||||
|
||||
|
||||
def amdsmi_get_gpu_memory_reserved_pages(
|
||||
|
||||
@@ -1619,9 +1619,9 @@ amdsmi_get_gpu_compute_process_gpus.argtypes = [uint32_t, ctypes.POINTER(ctypes.
|
||||
amdsmi_gpu_xgmi_error_status = _libraries['libamd_smi.so'].amdsmi_gpu_xgmi_error_status
|
||||
amdsmi_gpu_xgmi_error_status.restype = amdsmi_status_t
|
||||
amdsmi_gpu_xgmi_error_status.argtypes = [amdsmi_processor_handle, ctypes.POINTER(c__EA_amdsmi_xgmi_status_t)]
|
||||
amdsmi_dev_reset_xgmi_error = _libraries['libamd_smi.so'].amdsmi_dev_reset_xgmi_error
|
||||
amdsmi_dev_reset_xgmi_error.restype = amdsmi_status_t
|
||||
amdsmi_dev_reset_xgmi_error.argtypes = [amdsmi_processor_handle]
|
||||
amdsmi_reset_gpu_xgmi_error = _libraries['libamd_smi.so'].amdsmi_reset_gpu_xgmi_error
|
||||
amdsmi_reset_gpu_xgmi_error.restype = amdsmi_status_t
|
||||
amdsmi_reset_gpu_xgmi_error.argtypes = [amdsmi_processor_handle]
|
||||
amdsmi_topo_get_numa_node_number = _libraries['libamd_smi.so'].amdsmi_topo_get_numa_node_number
|
||||
amdsmi_topo_get_numa_node_number.restype = amdsmi_status_t
|
||||
amdsmi_topo_get_numa_node_number.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)]
|
||||
@@ -1874,7 +1874,7 @@ __all__ = \
|
||||
'amdsmi_dev_open_supported_variant_iterator',
|
||||
'amdsmi_dev_perf_level_t', 'amdsmi_dev_perf_level_t__enumvalues',
|
||||
'amdsmi_reset_gpu_fan', 'amdsmi_reset_gpu',
|
||||
'amdsmi_dev_reset_xgmi_error', 'amdsmi_set_clk_freq',
|
||||
'amdsmi_reset_gpu_xgmi_error', 'amdsmi_set_clk_freq',
|
||||
'amdsmi_set_gpu_clk_range', 'amdsmi_set_gpu_fan_speed',
|
||||
'amdsmi_set_gpu_od_clk_info', 'amdsmi_set_gpu_od_volt_info',
|
||||
'amdsmi_set_gpu_overdrive_level',
|
||||
|
||||
@@ -329,7 +329,7 @@ class Formatter:
|
||||
| """ + self.style.text("40 Get compute process info by pid. Api: amdsmi_get_gpu_compute_process_info_by_pid <pid>") + """ |
|
||||
| """ + self.style.text("41 Get compute process gpus. Api: amdsmi_get_gpu_compute_process_gpus <pid>") + """ |
|
||||
| """ + self.style.text("42 Get device xgmi_error_status. Api: amdsmi_gpu_xgmi_error_status <bdf>") + """ |
|
||||
| """ + self.style.text("43 Get device xgmi error reset. Api: amdsmi_dev_reset_xgmi_error <bdf>") + """ |
|
||||
| """ + self.style.text("43 Get device xgmi error reset. Api: amdsmi_reset_gpu_xgmi_error <bdf>") + """ |
|
||||
| """ + self.style.text("44 Get topo get numa node number. Api: amdsmi_topo_get_numa_node_number <bdf>") + """ |
|
||||
| """ + self.style.text("45 Get topo get link weight. Api: amdsmi_topo_get_link_weight <bdf><bdf>") + """ |
|
||||
| """ + self.style.text("46 Get minmax_bandwidth_get. Api: amdsmi_get_minmax_bandwidth <bdf><bdf>") + """ |
|
||||
@@ -849,7 +849,7 @@ commands = {
|
||||
42: [smi_api.amdsmi_gpu_xgmi_error_status, {
|
||||
"device_identifier1": [None, True]
|
||||
}],
|
||||
43: [smi_api.amdsmi_dev_reset_xgmi_error, {
|
||||
43: [smi_api.amdsmi_reset_gpu_xgmi_error, {
|
||||
"device_identifier1": [None, True]
|
||||
}],
|
||||
44: [smi_api.amdsmi_topo_get_numa_node_number, {
|
||||
|
||||
@@ -830,7 +830,7 @@ amdsmi_gpu_xgmi_error_status(amdsmi_processor_handle processor_handle, amdsmi_xg
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_dev_reset_xgmi_error(amdsmi_processor_handle processor_handle) {
|
||||
amdsmi_reset_gpu_xgmi_error(amdsmi_processor_handle processor_handle) {
|
||||
return rsmi_wrapper(rsmi_dev_xgmi_error_reset, processor_handle);
|
||||
}
|
||||
|
||||
@@ -926,7 +926,7 @@ amdsmi_get_func_iter_value(amdsmi_func_id_iter_handle_t handle,
|
||||
{"rsmi_dev_counter_group_supported", "amdsmi_gpu_counter_group_supported"},
|
||||
{"rsmi_dev_counter_create", "amdsmi_gpu_create_counter"},
|
||||
{"rsmi_dev_xgmi_error_status", "amdsmi_gpu_xgmi_error_status"},
|
||||
{"rsmi_dev_xgmi_error_reset", "amdsmi_dev_reset_xgmi_error"},
|
||||
{"rsmi_dev_xgmi_error_reset", "amdsmi_reset_gpu_xgmi_error"},
|
||||
{"rsmi_dev_memory_reserved_pages_get", "amdsmi_get_gpu_memory_reserved_pages"},
|
||||
{"rsmi_topo_numa_affinity_get", "amdsmi_get_gpu_topo_numa_affinity"},
|
||||
{"rsmi_dev_gpu_metrics_info_get", " amdsmi_get_gpu_metrics_info"},
|
||||
|
||||
@@ -302,7 +302,7 @@ void TestMutualExclusion::Run(void) {
|
||||
amdsmi_gpu_counter_group_supported
|
||||
amdsmi_get_gpu_memory_reserved_pages
|
||||
amdsmi_gpu_xgmi_error_status
|
||||
amdsmi_dev_reset_xgmi_error
|
||||
amdsmi_reset_gpu_xgmi_error
|
||||
amdsmi_dev_xgmi_hive_id_get
|
||||
amdsmi_topo_get_link_weight
|
||||
amdsmi_set_gpu_event_notification_mask
|
||||
|
||||
@@ -140,7 +140,7 @@ void TestXGMIReadWrite::Run(void) {
|
||||
|
||||
// TODO(cfree) We need to find a way to generate xgmi errors so this
|
||||
// test won't be meaningless
|
||||
err = amdsmi_dev_reset_xgmi_error(device);
|
||||
err = amdsmi_reset_gpu_xgmi_error(device);
|
||||
CHK_ERR_ASRT(err)
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Successfully reset XGMI Error Status: " << std::endl;
|
||||
|
||||
在新工单中引用
屏蔽一个用户