Renamed APIs

amdsmi_dev_get_gpu_ecc_status -> amdsmi_get_gpu_ecc_status
amdsmi_dev_get_gpu_ecc_enabled -> amdsmi_get_gpu_ecc_enabled
amdsmi_dev_get_gpu_ecc_count -> amdsmi_get_gpu_ecc_count

Change-Id: I84e6489f82bae115e1a13c9e4fce8029888ca379


[ROCm/amdsmi commit: d9ba131f73]
This commit is contained in:
Suma Hegde
2023-02-27 10:39:42 -05:00
committed by Naveen Krishna Chatradhi
parent e0d2d9b909
commit 0be5a5fb82
9 changed files with 55 additions and 55 deletions
+3 -3
View File
@@ -2679,7 +2679,7 @@ amdsmi_get_version_str(amdsmi_sw_component_t component, char *ver_str,
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_dev_get_gpu_ecc_count(amdsmi_processor_handle processor_handle,
amdsmi_status_t amdsmi_get_gpu_ecc_count(amdsmi_processor_handle processor_handle,
amdsmi_gpu_block_t block, amdsmi_error_count_t *ec);
/**
@@ -2706,7 +2706,7 @@ amdsmi_status_t amdsmi_dev_get_gpu_ecc_count(amdsmi_processor_handle processor_
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_dev_get_gpu_ecc_enabled(amdsmi_processor_handle processor_handle,
amdsmi_status_t amdsmi_get_gpu_ecc_enabled(amdsmi_processor_handle processor_handle,
uint64_t *enabled_blocks);
/**
@@ -2730,7 +2730,7 @@ amdsmi_status_t amdsmi_dev_get_gpu_ecc_enabled(amdsmi_processor_handle processo
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t amdsmi_dev_get_gpu_ecc_status(amdsmi_processor_handle processor_handle, amdsmi_gpu_block_t block,
amdsmi_status_t amdsmi_get_gpu_ecc_status(amdsmi_processor_handle processor_handle, amdsmi_gpu_block_t block,
amdsmi_ras_err_state_t *state);
/**
+9 -9
View File
@@ -2631,7 +2631,7 @@ except AmdSmiException as e:
print(e)
```
## amdsmi_dev_get_gpu_ecc_count
## amdsmi_get_gpu_ecc_count
Description: Retrieve the error counts for a GPU block
Input parameters:
@@ -2645,7 +2645,7 @@ Field | Description
`correctable_count`| Count of correctable errors
`uncorrectable_count`| Count of uncorrectable errors
Exceptions that can be thrown by ` amdsmi_dev_get_gpu_ecc_count` function:
Exceptions that can be thrown by ` amdsmi_get_gpu_ecc_count` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
@@ -2658,13 +2658,13 @@ try:
print("No GPUs on machine")
else:
for device in devices:
ecc_count = amdsmi_dev_get_gpu_ecc_count(device, AmdSmiGpuBlock.UMC)
ecc_count = amdsmi_get_gpu_ecc_count(device, AmdSmiGpuBlock.UMC)
print(ecc_count)
except AmdSmiException as e:
print(e)
```
## amdsmi_dev_get_gpu_ecc_enabled
## amdsmi_get_gpu_ecc_enabled
Description: Retrieve the enabled ECC bit-mask
Input parameters:
@@ -2672,7 +2672,7 @@ Input parameters:
Output: Enabled ECC bit-mask
Exceptions that can be thrown by ` amdsmi_dev_get_gpu_ecc_enabled` function:
Exceptions that can be thrown by ` amdsmi_get_gpu_ecc_enabled` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
@@ -2685,13 +2685,13 @@ try:
print("No GPUs on machine")
else:
for device in devices:
enabled = amdsmi_dev_get_gpu_ecc_enabled(device)
enabled = amdsmi_get_gpu_ecc_enabled(device)
print(enabled)
except AmdSmiException as e:
print(e)
```
## amdsmi_dev_get_gpu_ecc_status
## amdsmi_get_gpu_ecc_status
Description: Retrieve the ECC status for a GPU block
Input parameters:
@@ -2700,7 +2700,7 @@ Input parameters:
Output: ECC status for a requested GPU block
Exceptions that can be thrown by ` amdsmi_dev_get_gpu_ecc_status` function:
Exceptions that can be thrown by ` amdsmi_get_gpu_ecc_status` function:
* `AmdSmiLibraryException`
* `AmdSmiRetryException`
* `AmdSmiParameterException`
@@ -2713,7 +2713,7 @@ try:
print("No GPUs on machine")
else:
for device in devices:
status = amdsmi_dev_get_gpu_ecc_status(device, AmdSmiGpuBlock.UMC)
status = amdsmi_get_gpu_ecc_status(device, AmdSmiGpuBlock.UMC)
print(status)
except AmdSmiException as e:
print(e)
+3 -3
View File
@@ -124,9 +124,9 @@ from .amdsmi_interface import amdsmi_gpu_read_counter
from .amdsmi_interface import amdsmi_get_gpu_available_counters
# # Error Query
from .amdsmi_interface import amdsmi_dev_get_gpu_ecc_count
from .amdsmi_interface import amdsmi_dev_get_gpu_ecc_enabled
from .amdsmi_interface import amdsmi_dev_get_gpu_ecc_status
from .amdsmi_interface import amdsmi_get_gpu_ecc_count
from .amdsmi_interface import amdsmi_get_gpu_ecc_enabled
from .amdsmi_interface import amdsmi_get_gpu_ecc_status
from .amdsmi_interface import amdsmi_status_string
# # System Information Query
@@ -2462,7 +2462,7 @@ def amdsmi_get_gpu_power_profile_presets(
}
def amdsmi_dev_get_gpu_ecc_count(
def amdsmi_get_gpu_ecc_count(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, block: AmdSmiGpuBlock
) -> Dict[str, int]:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
@@ -2475,7 +2475,7 @@ def amdsmi_dev_get_gpu_ecc_count(
ec = amdsmi_wrapper.amdsmi_error_count_t()
_check_res(
amdsmi_wrapper. amdsmi_dev_get_gpu_ecc_count(
amdsmi_wrapper. amdsmi_get_gpu_ecc_count(
processor_handle, block, ctypes.byref(ec))
)
@@ -2485,7 +2485,7 @@ def amdsmi_dev_get_gpu_ecc_count(
}
def amdsmi_dev_get_gpu_ecc_enabled(
def amdsmi_get_gpu_ecc_enabled(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
) -> int:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
@@ -2495,14 +2495,14 @@ def amdsmi_dev_get_gpu_ecc_enabled(
blocks = ctypes.c_uint64(0)
_check_res(
amdsmi_wrapper. amdsmi_dev_get_gpu_ecc_enabled(
amdsmi_wrapper. amdsmi_get_gpu_ecc_enabled(
processor_handle, ctypes.byref(blocks))
)
return blocks.value
def amdsmi_dev_get_gpu_ecc_status(
def amdsmi_get_gpu_ecc_status(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle, block: AmdSmiGpuBlock
) -> AmdSmiRasErrState:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
@@ -2515,7 +2515,7 @@ def amdsmi_dev_get_gpu_ecc_status(
state = amdsmi_wrapper.amdsmi_ras_err_state_t()
_check_res(
amdsmi_wrapper. amdsmi_dev_get_gpu_ecc_status(
amdsmi_wrapper. amdsmi_get_gpu_ecc_status(
processor_handle, block, ctypes.byref(state)
)
)
+11 -11
View File
@@ -1577,15 +1577,15 @@ amdsmi_get_version.argtypes = [ctypes.POINTER(struct_c__SA_amdsmi_version_t)]
amdsmi_get_version_str = _libraries['libamd_smi.so'].amdsmi_get_version_str
amdsmi_get_version_str.restype = amdsmi_status_t
amdsmi_get_version_str.argtypes = [amdsmi_sw_component_t, ctypes.POINTER(ctypes.c_char), uint32_t]
amdsmi_dev_get_gpu_ecc_count = _libraries['libamd_smi.so'].amdsmi_dev_get_gpu_ecc_count
amdsmi_dev_get_gpu_ecc_count.restype = amdsmi_status_t
amdsmi_dev_get_gpu_ecc_count.argtypes = [amdsmi_processor_handle, amdsmi_gpu_block_t, ctypes.POINTER(struct_c__SA_amdsmi_error_count_t)]
amdsmi_dev_get_gpu_ecc_enabled = _libraries['libamd_smi.so'].amdsmi_dev_get_gpu_ecc_enabled
amdsmi_dev_get_gpu_ecc_enabled.restype = amdsmi_status_t
amdsmi_dev_get_gpu_ecc_enabled.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint64)]
amdsmi_dev_get_gpu_ecc_status = _libraries['libamd_smi.so'].amdsmi_dev_get_gpu_ecc_status
amdsmi_dev_get_gpu_ecc_status.restype = amdsmi_status_t
amdsmi_dev_get_gpu_ecc_status.argtypes = [amdsmi_processor_handle, amdsmi_gpu_block_t, ctypes.POINTER(c__EA_amdsmi_ras_err_state_t)]
amdsmi_get_gpu_ecc_count = _libraries['libamd_smi.so'].amdsmi_get_gpu_ecc_count
amdsmi_get_gpu_ecc_count.restype = amdsmi_status_t
amdsmi_get_gpu_ecc_count.argtypes = [amdsmi_processor_handle, amdsmi_gpu_block_t, ctypes.POINTER(struct_c__SA_amdsmi_error_count_t)]
amdsmi_get_gpu_ecc_enabled = _libraries['libamd_smi.so'].amdsmi_get_gpu_ecc_enabled
amdsmi_get_gpu_ecc_enabled.restype = amdsmi_status_t
amdsmi_get_gpu_ecc_enabled.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint64)]
amdsmi_get_gpu_ecc_status = _libraries['libamd_smi.so'].amdsmi_get_gpu_ecc_status
amdsmi_get_gpu_ecc_status.restype = amdsmi_status_t
amdsmi_get_gpu_ecc_status.argtypes = [amdsmi_processor_handle, amdsmi_gpu_block_t, ctypes.POINTER(c__EA_amdsmi_ras_err_state_t)]
amdsmi_status_string = _libraries['libamd_smi.so'].amdsmi_status_string
amdsmi_status_string.restype = amdsmi_status_t
amdsmi_status_string.argtypes = [amdsmi_status_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
@@ -1852,8 +1852,8 @@ __all__ = \
'amdsmi_dev_close_supported_func_iterator',
'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter',
'amdsmi_gpu_destroy_counter', 'amdsmi_get_busy_percent',
'amdsmi_get_gpu_drm_render_minor', 'amdsmi_dev_get_gpu_ecc_count',
'amdsmi_dev_get_gpu_ecc_enabled', 'amdsmi_dev_get_gpu_ecc_status',
'amdsmi_get_gpu_drm_render_minor', 'amdsmi_get_gpu_ecc_count',
'amdsmi_get_gpu_ecc_enabled', 'amdsmi_get_gpu_ecc_status',
'amdsmi_get_energy_count', 'amdsmi_get_gpu_fan_rpms',
'amdsmi_get_gpu_fan_speed', 'amdsmi_get_gpu_fan_speed_max',
'amdsmi_get_clk_freq', 'amdsmi_get_gpu_metrics_info',
@@ -321,9 +321,9 @@ class Formatter:
| """ + self.style.text("32 Get device power profile presets. Api: amdsmi_get_gpu_power_profile_presets <bdf><sensor_idx>") + """ |
| """ + self.style.text("33 Get the build version. Api: amdsmi_get_version <None>") + """ |
| """ + self.style.text("34 Get version string. Api: amdsmi_get_version_str <None>") + """ |
| """ + self.style.text("35 Get device ecc counter. Api: amdsmi_dev_get_gpu_ecc_count <bdf>") + """ |
| """ + self.style.text("36 Get device ecc enable. Api: amdsmi_dev_get_gpu_ecc_enabled <bdf>") + """ |
| """ + self.style.text("37 Get device ecc status. Api: amdsmi_dev_get_gpu_ecc_status <bdf>") + """ |
| """ + self.style.text("35 Get device ecc counter. Api: amdsmi_get_gpu_ecc_count <bdf>") + """ |
| """ + self.style.text("36 Get device ecc enable. Api: amdsmi_get_gpu_ecc_enabled <bdf>") + """ |
| """ + self.style.text("37 Get device ecc status. Api: amdsmi_get_gpu_ecc_status <bdf>") + """ |
| """ + self.style.text("38 Get status string. Api: amdsmi_status_string <status>") + """ |
| """ + self.style.text("39 Get compute process info. Api: amdsmi_get_gpu_compute_process_info <None>") + """ |
| """ + self.style.text("40 Get compute process info by pid. Api: amdsmi_get_gpu_compute_process_info_by_pid <pid>") + """ |
@@ -509,7 +509,7 @@ def amdsmi_tool_dev_ecc_count_get(dev):
result = {}
for gpu_block in smi_api.AmdSmiGpuBlock:
try:
value = smi_api. amdsmi_dev_get_gpu_ecc_count(dev, gpu_block)
value = smi_api. amdsmi_get_gpu_ecc_count(dev, gpu_block)
result.update({gpu_block.name: value})
except smi_api.AmdSmiException as e:
print("{}:\t{}".format(gpu_block.name, e))
@@ -520,7 +520,7 @@ def amdsmi_tool_dev_ecc_status_get(dev):
result = {}
for gpu_block in smi_api.AmdSmiGpuBlock:
try:
value = smi_api. amdsmi_dev_get_gpu_ecc_status(dev, gpu_block)
value = smi_api. amdsmi_get_gpu_ecc_status(dev, gpu_block)
result.update({gpu_block.name: value})
except smi_api.AmdSmiException as e:
print("{}:\t{}".format(gpu_block.name, e))
@@ -830,7 +830,7 @@ commands = {
35: [amdsmi_tool_dev_ecc_count_get, {
"device_identifier1": [None, True]
}],
36: [smi_api. amdsmi_dev_get_gpu_ecc_enabled, {
36: [smi_api. amdsmi_get_gpu_ecc_enabled, {
"device_identifier1": [None, True]
}],
37: [amdsmi_tool_dev_ecc_status_get, {
+6 -6
View File
@@ -921,8 +921,8 @@ amdsmi_get_func_iter_value(amdsmi_func_id_iter_handle_t handle,
{"rsmi_dev_od_volt_info_get", " amdsmi_get_gpu_od_volt_info"},
{"rsmi_dev_od_volt_info_set", " amdsmi_set_gpu_od_volt_info"},
{"rsmi_dev_od_volt_curve_regions_get", " amdsmi_get_gpu_od_volt_curve_regions"},
{"rsmi_dev_ecc_enabled_get", " amdsmi_dev_get_gpu_ecc_enabled"},
{"rsmi_dev_ecc_status_get", " amdsmi_dev_get_gpu_ecc_status"},
{"rsmi_dev_ecc_enabled_get", " amdsmi_get_gpu_ecc_enabled"},
{"rsmi_dev_ecc_status_get", " amdsmi_get_gpu_ecc_status"},
{"rsmi_dev_counter_group_supported", "amdsmi_gpu_counter_group_supported"},
{"rsmi_dev_counter_create", "amdsmi_gpu_create_counter"},
{"rsmi_dev_xgmi_error_status", "amdsmi_gpu_xgmi_error_status"},
@@ -936,7 +936,7 @@ amdsmi_get_func_iter_value(amdsmi_func_id_iter_handle_t handle,
{"rsmi_dev_gpu_clk_freq_get", " amdsmi_get_clk_freq"},
{"rsmi_dev_gpu_clk_freq_set", " amdsmi_set_clk_freq"},
{"rsmi_dev_firmware_version_get", "amdsmi_get_fw_info"},
{"rsmi_dev_ecc_count_get", " amdsmi_dev_get_gpu_ecc_count"},
{"rsmi_dev_ecc_count_get", " amdsmi_get_gpu_ecc_count"},
{"rsmi_counter_available_counters_get", " amdsmi_get_gpu_available_counters"},
{"rsmi_dev_power_ave_get", "amdsmi_get_power_ave"},
{"rsmi_dev_power_cap_get", "amdsmi_get_power_cap_info"},
@@ -1008,7 +1008,7 @@ amdsmi_get_gpu_compute_process_gpus(uint32_t pid, uint32_t *dv_indices,
return amd::smi::rsmi_to_amdsmi_status(r);
}
amdsmi_status_t amdsmi_dev_get_gpu_ecc_count(amdsmi_processor_handle processor_handle,
amdsmi_status_t amdsmi_get_gpu_ecc_count(amdsmi_processor_handle processor_handle,
amdsmi_gpu_block_t block, amdsmi_error_count_t *ec) {
AMDSMI_CHECK_INIT();
@@ -1018,7 +1018,7 @@ amdsmi_status_t amdsmi_dev_get_gpu_ecc_count(amdsmi_processor_handle processor_
static_cast<rsmi_gpu_block_t>(block),
reinterpret_cast<rsmi_error_count_t*>(ec));
}
amdsmi_status_t amdsmi_dev_get_gpu_ecc_enabled(amdsmi_processor_handle processor_handle,
amdsmi_status_t amdsmi_get_gpu_ecc_enabled(amdsmi_processor_handle processor_handle,
uint64_t *enabled_blocks) {
AMDSMI_CHECK_INIT();
@@ -1027,7 +1027,7 @@ amdsmi_status_t amdsmi_dev_get_gpu_ecc_enabled(amdsmi_processor_handle processo
return rsmi_wrapper(rsmi_dev_ecc_enabled_get, processor_handle,
enabled_blocks);
}
amdsmi_status_t amdsmi_dev_get_gpu_ecc_status(amdsmi_processor_handle processor_handle,
amdsmi_status_t amdsmi_get_gpu_ecc_status(amdsmi_processor_handle processor_handle,
amdsmi_gpu_block_t block,
amdsmi_ras_err_state_t *state) {
AMDSMI_CHECK_INIT();
@@ -100,7 +100,7 @@ void TestErrCntRead::Run(void) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(processor_handles_[i]);
err = amdsmi_dev_get_gpu_ecc_enabled(processor_handles_[i], &enabled_mask);
err = amdsmi_get_gpu_ecc_enabled(processor_handles_[i], &enabled_mask);
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
@@ -108,7 +108,7 @@ void TestErrCntRead::Run(void) {
<< std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_get_gpu_ecc_enabled(processor_handles_[i], nullptr);
err = amdsmi_get_gpu_ecc_enabled(processor_handles_[i], nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
continue;
@@ -116,7 +116,7 @@ void TestErrCntRead::Run(void) {
CHK_ERR_ASRT(err)
// Verify api support checking functionality is working
err = amdsmi_dev_get_gpu_ecc_enabled(processor_handles_[i], nullptr);
err = amdsmi_get_gpu_ecc_enabled(processor_handles_[i], nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
IF_VERB(STANDARD) {
@@ -126,7 +126,7 @@ void TestErrCntRead::Run(void) {
}
for (uint32_t b = AMDSMI_GPU_BLOCK_FIRST;
b <= AMDSMI_GPU_BLOCK_LAST; b = b*2) {
err = amdsmi_dev_get_gpu_ecc_status(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
err = amdsmi_get_gpu_ecc_status(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
&err_state);
CHK_ERR_ASRT(err)
IF_VERB(STANDARD) {
@@ -135,11 +135,11 @@ void TestErrCntRead::Run(void) {
" block: " << GetErrStateNameStr(err_state) << std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_get_gpu_ecc_status(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
err = amdsmi_get_gpu_ecc_status(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
err = amdsmi_dev_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b), &ec);
err = amdsmi_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b), &ec);
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
@@ -148,7 +148,7 @@ void TestErrCntRead::Run(void) {
": Not supported for this device" << std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
err = amdsmi_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
@@ -164,7 +164,7 @@ void TestErrCntRead::Run(void) {
<< std::endl;
}
// Verify api support checking functionality is working
err = amdsmi_dev_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
err = amdsmi_get_gpu_ecc_count(processor_handles_[i], static_cast<amdsmi_gpu_block_t>(b),
nullptr);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
}
@@ -249,11 +249,11 @@ void TestMutualExclusion::Run(void) {
CHECK_RET(ret, AMDSMI_STATUS_BUSY);
ret = amdsmi_set_clk_freq(processor_handles_[0], CLK_TYPE_SYS, 0);
CHECK_RET(ret, AMDSMI_STATUS_BUSY);
ret = amdsmi_dev_get_gpu_ecc_count(processor_handles_[0], AMDSMI_GPU_BLOCK_UMC, &dmy_err_cnt);
ret = amdsmi_get_gpu_ecc_count(processor_handles_[0], AMDSMI_GPU_BLOCK_UMC, &dmy_err_cnt);
CHECK_RET(ret, AMDSMI_STATUS_BUSY);
ret = amdsmi_dev_get_gpu_ecc_enabled(processor_handles_[0], &dmy_ui64);
ret = amdsmi_get_gpu_ecc_enabled(processor_handles_[0], &dmy_ui64);
CHECK_RET(ret, AMDSMI_STATUS_BUSY);
ret = amdsmi_dev_get_gpu_ecc_status(processor_handles_[0], AMDSMI_GPU_BLOCK_UMC, &dmy_ras_err_st);
ret = amdsmi_get_gpu_ecc_status(processor_handles_[0], AMDSMI_GPU_BLOCK_UMC, &dmy_ras_err_st);
CHECK_RET(ret, AMDSMI_STATUS_BUSY);
/* Other functions holding device mutexes. Listed for reference.