From 406859ca8a40ec8cd0a7a80ad2b13fee2da0b2ad Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 7 Aug 2020 13:13:37 -0400 Subject: [PATCH] Update KFD SMI event notification handling Event bitmask in KFD SMI event is now replaced with event index in the SMI event message. Sending a event bitmask, which was a 64-bit field with only 1 bit set, was quite wasteful of memory and also potentially limiting to 64 events. Instead the kernel would send event index in the SMI event message. As a result, update the KFD SMI event handling to expect the event index in the message. Change-Id: I3e74620788d3c1f7c0bdaa69e9d9ab3d1aba2c92 --- include/rocm_smi/kfd_ioctl.h | 8 ++++++-- include/rocm_smi/rocm_smi.h | 15 +++++++++++++-- src/rocm_smi.cc | 4 ++-- .../functional/evt_notif_read_write.cc | 6 +++--- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/rocm_smi/kfd_ioctl.h b/include/rocm_smi/kfd_ioctl.h index 9356cd1600..da9b985304 100755 --- a/include/rocm_smi/kfd_ioctl.h +++ b/include/rocm_smi/kfd_ioctl.h @@ -541,8 +541,12 @@ struct kfd_ioctl_import_dmabuf_args { /* * KFD SMI(System Management Interface) events */ -/* Event type (defined by bitmask) */ -#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 +enum kfd_smi_event { + KFD_SMI_EVENT_NONE = 0, /* not used */ + KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */ +}; + +#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) struct kfd_ioctl_smi_events_args { __u32 gpuid; /* to KFD */ diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index e933d55c98..ef1025bebc 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -284,6 +284,11 @@ typedef enum { RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_VMFAULT } rsmi_evt_notification_type_t; +/* + * Macro to generate event bitmask from event id + */ +#define RSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) + //! Maximum number of characters an event notification message will be #define MAX_EVENT_NOTIFICATION_MSG_SIZE 64 @@ -3217,8 +3222,14 @@ rsmi_event_notification_init(uint32_t dv_ind); * @param dv_ind a device index corresponding to the device on which to * listen for events * - * @param mask 0 or more elements of ::rsmi_evt_notification_type_t OR'd - * together that indicate which event types to listen for. + * @param mask Bitmask generated by OR'ing 1 or more elements of + * ::rsmi_evt_notification_type_t indicating which event types to listen for, + * where the rsmi_evt_notification_type_t value indicates the bit field, with + * bit position starting from 1. + * For example, if the mask field is 0x0000000000000003, which means first bit, + * bit 1 (bit position start from 1) and bit 2 are set, which indicate interest + * in receiving RSMI_EVT_NOTIF_VMFAULT (which has a value of 1) and the next + * event in that list which has a value of 2. * * @retval ::RSMI_STATUS_INIT_ERROR is returned if * ::rsmi_event_notification_init() has not been called before a call to this diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 177532e250..60451f0865 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -3595,8 +3595,8 @@ rsmi_event_notification_get(int timeout_ms, data_item = reinterpret_cast(&data[*num_elem]); - uint64_t event; - while (fscanf(anon_fp, "%lx %63s\n", &event, + uint32_t event; + while (fscanf(anon_fp, "%x %63s\n", &event, reinterpret_cast(&data_item->message)) == 2) { /* Output is in format as "event information\n" * Both event are expressed in hex. diff --git a/tests/rocm_smi_test/functional/evt_notif_read_write.cc b/tests/rocm_smi_test/functional/evt_notif_read_write.cc index 520bd5ab05..cab9f07b4a 100755 --- a/tests/rocm_smi_test/functional/evt_notif_read_write.cc +++ b/tests/rocm_smi_test/functional/evt_notif_read_write.cc @@ -101,11 +101,11 @@ void TestEvtNotifReadWrite::Run(void) { } rsmi_evt_notification_type_t evt_type = RSMI_EVT_NOTIF_FIRST; - uint64_t mask = evt_type; + uint64_t mask = RSMI_EVENT_MASK_FROM_INDEX(evt_type); while (evt_type != RSMI_EVT_NOTIF_LAST) { - mask |= evt_type; + mask |= RSMI_EVENT_MASK_FROM_INDEX(evt_type); evt_type = static_cast( - static_cast(evt_type)*2); + static_cast(evt_type)+1); } for (dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {