diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index b43da2919c..348a41cfe2 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -3107,7 +3107,7 @@ amdsmi_gpu_xgmi_error_status(amdsmi_processor_handle processor_handle, amdsmi_xg * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ amdsmi_status_t -amdsmi_dev_reset_xgmi_error(amdsmi_processor_handle processor_handle); +amdsmi_reset_gpu_xgmi_error(amdsmi_processor_handle processor_handle); /** @} End SysInfo */ diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index e97aa59e52..9977a11e08 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -2849,7 +2849,7 @@ except AmdSmiException as e: print(e) ``` -## amdsmi_dev_reset_xgmi_error +## amdsmi_reset_gpu_xgmi_error Description: Reset the XGMI error status for a device Input parameters: @@ -2857,7 +2857,7 @@ Input parameters: Output: None -Exceptions that can be thrown by `amdsmi_dev_reset_xgmi_error` function: +Exceptions that can be thrown by `amdsmi_reset_gpu_xgmi_error` function: * `AmdSmiLibraryException` * `AmdSmiRetryException` * `AmdSmiParameterException` @@ -2870,7 +2870,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_dev_reset_xgmi_error(device) + amdsmi_reset_gpu_xgmi_error(device) except AmdSmiException as e: print(e) ``` diff --git a/projects/amdsmi/py-interface/__init__.py b/projects/amdsmi/py-interface/__init__.py index 19892b78d5..03b4e5f210 100644 --- a/projects/amdsmi/py-interface/__init__.py +++ b/projects/amdsmi/py-interface/__init__.py @@ -134,7 +134,7 @@ from .amdsmi_interface import amdsmi_get_gpu_compute_process_info from .amdsmi_interface import amdsmi_get_gpu_compute_process_info_by_pid from .amdsmi_interface import amdsmi_get_gpu_compute_process_gpus from .amdsmi_interface import amdsmi_gpu_xgmi_error_status -from .amdsmi_interface import amdsmi_dev_reset_xgmi_error +from .amdsmi_interface import amdsmi_reset_gpu_xgmi_error # # PCIE information from .amdsmi_interface import amdsmi_get_gpu_pci_id diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index 8946d4b12f..c4d4d4924a 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -2620,7 +2620,7 @@ def amdsmi_gpu_xgmi_error_status( return AmdSmiXgmiStatus(status.value) -def amdsmi_dev_reset_xgmi_error( +def amdsmi_reset_gpu_xgmi_error( processor_handle: amdsmi_wrapper.amdsmi_processor_handle, ) -> None: if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): @@ -2628,7 +2628,7 @@ def amdsmi_dev_reset_xgmi_error( processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - _check_res(amdsmi_wrapper.amdsmi_dev_reset_xgmi_error(processor_handle)) + _check_res(amdsmi_wrapper.amdsmi_reset_gpu_xgmi_error(processor_handle)) def amdsmi_get_gpu_memory_reserved_pages( diff --git a/projects/amdsmi/py-interface/amdsmi_wrapper.py b/projects/amdsmi/py-interface/amdsmi_wrapper.py index 2ba92c0b0e..64eb00d497 100644 --- a/projects/amdsmi/py-interface/amdsmi_wrapper.py +++ b/projects/amdsmi/py-interface/amdsmi_wrapper.py @@ -1619,9 +1619,9 @@ amdsmi_get_gpu_compute_process_gpus.argtypes = [uint32_t, ctypes.POINTER(ctypes. amdsmi_gpu_xgmi_error_status = _libraries['libamd_smi.so'].amdsmi_gpu_xgmi_error_status amdsmi_gpu_xgmi_error_status.restype = amdsmi_status_t amdsmi_gpu_xgmi_error_status.argtypes = [amdsmi_processor_handle, ctypes.POINTER(c__EA_amdsmi_xgmi_status_t)] -amdsmi_dev_reset_xgmi_error = _libraries['libamd_smi.so'].amdsmi_dev_reset_xgmi_error -amdsmi_dev_reset_xgmi_error.restype = amdsmi_status_t -amdsmi_dev_reset_xgmi_error.argtypes = [amdsmi_processor_handle] +amdsmi_reset_gpu_xgmi_error = _libraries['libamd_smi.so'].amdsmi_reset_gpu_xgmi_error +amdsmi_reset_gpu_xgmi_error.restype = amdsmi_status_t +amdsmi_reset_gpu_xgmi_error.argtypes = [amdsmi_processor_handle] amdsmi_topo_get_numa_node_number = _libraries['libamd_smi.so'].amdsmi_topo_get_numa_node_number amdsmi_topo_get_numa_node_number.restype = amdsmi_status_t amdsmi_topo_get_numa_node_number.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)] @@ -1874,7 +1874,7 @@ __all__ = \ 'amdsmi_dev_open_supported_variant_iterator', 'amdsmi_dev_perf_level_t', 'amdsmi_dev_perf_level_t__enumvalues', 'amdsmi_reset_gpu_fan', 'amdsmi_reset_gpu', - 'amdsmi_dev_reset_xgmi_error', 'amdsmi_set_clk_freq', + 'amdsmi_reset_gpu_xgmi_error', 'amdsmi_set_clk_freq', 'amdsmi_set_gpu_clk_range', 'amdsmi_set_gpu_fan_speed', 'amdsmi_set_gpu_od_clk_info', 'amdsmi_set_gpu_od_volt_info', 'amdsmi_set_gpu_overdrive_level', diff --git a/projects/amdsmi/py-interface/rocm_smi_tool.py b/projects/amdsmi/py-interface/rocm_smi_tool.py index 34fffee4f1..f02bb83db1 100644 --- a/projects/amdsmi/py-interface/rocm_smi_tool.py +++ b/projects/amdsmi/py-interface/rocm_smi_tool.py @@ -329,7 +329,7 @@ class Formatter: | """ + self.style.text("40 Get compute process info by pid. Api: amdsmi_get_gpu_compute_process_info_by_pid ") + """ | | """ + self.style.text("41 Get compute process gpus. Api: amdsmi_get_gpu_compute_process_gpus ") + """ | | """ + self.style.text("42 Get device xgmi_error_status. Api: amdsmi_gpu_xgmi_error_status ") + """ | - | """ + self.style.text("43 Get device xgmi error reset. Api: amdsmi_dev_reset_xgmi_error ") + """ | + | """ + self.style.text("43 Get device xgmi error reset. Api: amdsmi_reset_gpu_xgmi_error ") + """ | | """ + self.style.text("44 Get topo get numa node number. Api: amdsmi_topo_get_numa_node_number ") + """ | | """ + self.style.text("45 Get topo get link weight. Api: amdsmi_topo_get_link_weight ") + """ | | """ + self.style.text("46 Get minmax_bandwidth_get. Api: amdsmi_get_minmax_bandwidth ") + """ | @@ -849,7 +849,7 @@ commands = { 42: [smi_api.amdsmi_gpu_xgmi_error_status, { "device_identifier1": [None, True] }], - 43: [smi_api.amdsmi_dev_reset_xgmi_error, { + 43: [smi_api.amdsmi_reset_gpu_xgmi_error, { "device_identifier1": [None, True] }], 44: [smi_api.amdsmi_topo_get_numa_node_number, { diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 82623cfb34..c73f2d37a6 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -830,7 +830,7 @@ amdsmi_gpu_xgmi_error_status(amdsmi_processor_handle processor_handle, amdsmi_xg } amdsmi_status_t -amdsmi_dev_reset_xgmi_error(amdsmi_processor_handle processor_handle) { +amdsmi_reset_gpu_xgmi_error(amdsmi_processor_handle processor_handle) { return rsmi_wrapper(rsmi_dev_xgmi_error_reset, processor_handle); } @@ -926,7 +926,7 @@ amdsmi_get_func_iter_value(amdsmi_func_id_iter_handle_t handle, {"rsmi_dev_counter_group_supported", "amdsmi_gpu_counter_group_supported"}, {"rsmi_dev_counter_create", "amdsmi_gpu_create_counter"}, {"rsmi_dev_xgmi_error_status", "amdsmi_gpu_xgmi_error_status"}, - {"rsmi_dev_xgmi_error_reset", "amdsmi_dev_reset_xgmi_error"}, + {"rsmi_dev_xgmi_error_reset", "amdsmi_reset_gpu_xgmi_error"}, {"rsmi_dev_memory_reserved_pages_get", "amdsmi_get_gpu_memory_reserved_pages"}, {"rsmi_topo_numa_affinity_get", "amdsmi_get_gpu_topo_numa_affinity"}, {"rsmi_dev_gpu_metrics_info_get", " amdsmi_get_gpu_metrics_info"}, diff --git a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc index 404968ba17..26a2d97590 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc @@ -302,7 +302,7 @@ void TestMutualExclusion::Run(void) { amdsmi_gpu_counter_group_supported amdsmi_get_gpu_memory_reserved_pages amdsmi_gpu_xgmi_error_status - amdsmi_dev_reset_xgmi_error + amdsmi_reset_gpu_xgmi_error amdsmi_dev_xgmi_hive_id_get amdsmi_topo_get_link_weight amdsmi_set_gpu_event_notification_mask diff --git a/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc index 926e88ac54..756e793db3 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/xgmi_read_write.cc @@ -140,7 +140,7 @@ void TestXGMIReadWrite::Run(void) { // TODO(cfree) We need to find a way to generate xgmi errors so this // test won't be meaningless - err = amdsmi_dev_reset_xgmi_error(device); + err = amdsmi_reset_gpu_xgmi_error(device); CHK_ERR_ASRT(err) IF_VERB(STANDARD) { std::cout << "\t**Successfully reset XGMI Error Status: " << std::endl;