From 8b95705e6ffec2a5a43a99657bcba43abb7c0d42 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 24 Aug 2020 22:28:19 -0400 Subject: [PATCH] Add support for GPU reset SMI events Add handling for both pre GPU reset and post GPU reset SMI events. Change-Id: I64d5e006bef58cb28b1c580c75f482a4590427da --- include/rocm_smi/kfd_ioctl.h | 2 ++ include/rocm_smi/rocm_smi.h | 4 +++- tests/rocm_smi_test/test_utils.cc | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/rocm_smi/kfd_ioctl.h b/include/rocm_smi/kfd_ioctl.h index 1af726d75a..5817833eae 100755 --- a/include/rocm_smi/kfd_ioctl.h +++ b/include/rocm_smi/kfd_ioctl.h @@ -545,6 +545,8 @@ enum kfd_smi_event { KFD_SMI_EVENT_NONE = 0, /* not used */ KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ KFD_SMI_EVENT_THERMAL_THROTTLE = 2, + KFD_SMI_EVENT_GPU_PRE_RESET = 3, + KFD_SMI_EVENT_GPU_POST_RESET = 4, }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index ffe77cd49b..2e544e7c02 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -281,8 +281,10 @@ typedef enum { RSMI_EVT_NOTIF_VMFAULT = KFD_SMI_EVENT_VMFAULT, //!< VM page fault RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT, RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE, + RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET, + RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET, - RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_THERMAL_THROTTLE + RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET } rsmi_evt_notification_type_t; /* diff --git a/tests/rocm_smi_test/test_utils.cc b/tests/rocm_smi_test/test_utils.cc index 9618b8f0e3..c1b9a8de86 100755 --- a/tests/rocm_smi_test/test_utils.cc +++ b/tests/rocm_smi_test/test_utils.cc @@ -81,6 +81,8 @@ static const std::map kEvtNotifEvntNameMap = { {RSMI_EVT_NOTIF_VMFAULT, "RSMI_EVT_NOTIF_VMFAULT"}, {RSMI_EVT_NOTIF_THERMAL_THROTTLE, "RSMI_EVT_NOTIF_THERMAL_THROTTLE"}, + {RSMI_EVT_NOTIF_GPU_PRE_RESET, "RSMI_EVT_NOTIF_GPU_PRE_RESET"}, + {RSMI_EVT_NOTIF_GPU_POST_RESET, "RSMI_EVT_NOTIF_GPU_POST_RESET"}, }; const char * NameFromEvtNotifType(rsmi_evt_notification_type_t evt) {