diff --git a/include/rocm_smi/kfd_ioctl.h b/include/rocm_smi/kfd_ioctl.h index 3b781ce129..6477f44898 100755 --- a/include/rocm_smi/kfd_ioctl.h +++ b/include/rocm_smi/kfd_ioctl.h @@ -553,6 +553,7 @@ enum kfd_smi_event { KFD_SMI_EVENT_THERMAL_THROTTLE = 2, KFD_SMI_EVENT_GPU_PRE_RESET = 3, KFD_SMI_EVENT_GPU_POST_RESET = 4, + KFD_SMI_EVENT_RING_HANG = 5, }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 963d1e0c28..0ecfc20acc 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -316,8 +316,9 @@ typedef enum { RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE, RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET, RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET, + RSMI_EVT_NOTIF_RING_HANG = KFD_SMI_EVENT_RING_HANG, - RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET + RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_RING_HANG } rsmi_evt_notification_type_t; /** diff --git a/python_smi_tools/rsmiBindings.py b/python_smi_tools/rsmiBindings.py index 0483dacf59..3dfab10feb 100644 --- a/python_smi_tools/rsmiBindings.py +++ b/python_smi_tools/rsmiBindings.py @@ -102,16 +102,18 @@ class rsmi_dev_perf_level_t(c_int): RSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 -notification_type_names = ['VM_FAULT', 'THERMAL_THROTTLE', 'GPU_RESET'] +notification_type_names = ['VM_FAULT', 'THERMAL_THROTTLE', 'GPU_PRE_RESET', 'GPU_POST_RESET', 'RING_HANG'] class rsmi_evt_notification_type_t(c_int): - RSMI_EVT_NOTIF_VMFAULT = 0 - RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT - RSMI_EVT_NOTIF_THERMAL_THROTTLE = 1 - RSMI_EVT_NOTIF_GPU_PRE_RESET = 2 - RSMI_EVT_NOTIF_GPU_POST_RESET = 3 - RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET + RSMI_EVT_NOTIF_NONE = 0 + RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_NONE + RSMI_EVT_NOTIF_VMFAULT = 1 + RSMI_EVT_NOTIF_THERMAL_THROTTLE = 2 + RSMI_EVT_NOTIF_GPU_PRE_RESET = 3 + RSMI_EVT_NOTIF_GPU_POST_RESET = 4 + RSMI_EVT_NOTIF_RING_HANG = 5 + RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_RING_HANG class rsmi_voltage_metric_t(c_int): diff --git a/python_smi_tools/rsmiBindings.py.in b/python_smi_tools/rsmiBindings.py.in index d53010f4df..b24665cf9a 100644 --- a/python_smi_tools/rsmiBindings.py.in +++ b/python_smi_tools/rsmiBindings.py.in @@ -138,12 +138,13 @@ notification_type_names = ['VM_FAULT', 'THERMAL_THROTTLE', 'GPU_RESET'] class rsmi_evt_notification_type_t(c_int): - RSMI_EVT_NOTIF_VMFAULT = 0 + RSMI_EVT_NOTIF_VMFAULT = 1 RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT - RSMI_EVT_NOTIF_THERMAL_THROTTLE = 1 - RSMI_EVT_NOTIF_GPU_PRE_RESET = 2 - RSMI_EVT_NOTIF_GPU_POST_RESET = 3 - RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET + RSMI_EVT_NOTIF_THERMAL_THROTTLE = 2 + RSMI_EVT_NOTIF_GPU_PRE_RESET = 3 + RSMI_EVT_NOTIF_GPU_POST_RESET = 4 + RSMI_EVT_NOTIF_RING_HANG = 5 + RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_RING_HANG class rsmi_voltage_metric_t(c_int): diff --git a/tests/rocm_smi_test/test_utils.cc b/tests/rocm_smi_test/test_utils.cc index d27f185bba..c844cd2581 100755 --- a/tests/rocm_smi_test/test_utils.cc +++ b/tests/rocm_smi_test/test_utils.cc @@ -85,6 +85,7 @@ static const std::map {RSMI_EVT_NOTIF_THERMAL_THROTTLE, "RSMI_EVT_NOTIF_THERMAL_THROTTLE"}, {RSMI_EVT_NOTIF_GPU_PRE_RESET, "RSMI_EVT_NOTIF_GPU_PRE_RESET"}, {RSMI_EVT_NOTIF_GPU_POST_RESET, "RSMI_EVT_NOTIF_GPU_POST_RESET"}, + {RSMI_EVT_NOTIF_RING_HANG, "RSMI_EVT_NOTIF_RING_HANG"}, }; const char * NameFromEvtNotifType(rsmi_evt_notification_type_t evt) {