libhsakmt: add event age tracking

Keeping last signaled event age to avoid race conditions
for HSA_EVENTTYPE_SIGNAL when event age init value is non-zero.

Change-Id: Ifb9a11a6868e5762a9f92f579e45a0a2c8fa1017
Signed-off-by: James Zhu <James.Zhu@amd.com>


[ROCm/ROCR-Runtime commit: a0cbf90b90]
Цей коміт міститься в:
James Zhu
2023-05-17 16:30:30 -04:00
джерело 0f44742bc4
коміт 498b718e83
3 змінених файлів з 78 додано та 3 видалено
+43
Переглянути файл
@@ -243,6 +243,25 @@ hsaKmtWaitOnEvent(
HSAuint32 Milliseconds //IN
);
/**
Checks the current state of the event object. If the object's state is
nonsignaled, the calling thread enters the wait state. event_age can
help avoiding race conditions.
The function returns when one of the following occurs:
- The specified event object is in the signaled state.
- The time-out interval elapses.
- Tracking event age
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnEvent_Ext(
HsaEvent* Event, //IN
HSAuint32 Milliseconds, //IN
uint64_t *event_age //IN/OUT
);
/**
Checks the current state of multiple event objects.
@@ -264,6 +283,30 @@ hsaKmtWaitOnMultipleEvents(
HSAuint32 Milliseconds //IN
);
/**
Checks the current state of multiple event objects.
event_age can help avoiding race conditions.
The function returns when one of the following occurs:
- Either any one or all of the specified objects are in the signaled state
- if "WaitOnAll" is "true" the function returns when the state of all
objects in array is signaled
- if "WaitOnAll" is "false" the function returns when the state of any
one of the objects is set to signaled
- The time-out interval elapses.
- Tracking event age
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtWaitOnMultipleEvents_Ext(
HsaEvent* Events[], //IN
HSAuint32 NumEvents, //IN
bool WaitOnAll, //IN
HSAuint32 Milliseconds, //IN
uint64_t *event_age //IN/OUT
);
/**
new TEMPORARY function definition - to be used only on "Triniti + Southern Islands" platform
If used on other platforms the function will return HSAKMT_STATUS_ERROR
+11 -2
Переглянути файл
@@ -39,9 +39,10 @@
* - 1.11 - Add unified memory for ctx save/restore area
* - 1.12 - Add DMA buf export ioctl
* - 1.13 - Add debugger API
* - 1.14 - Update kfd_event_data
*/
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 13
#define KFD_IOCTL_MINOR_VERSION 14
/*
* Debug revision change log
@@ -979,12 +980,20 @@ struct kfd_hsa_hw_exception_data {
__u32 gpu_id;
};
/* hsa signal event data */
struct kfd_hsa_signal_event_data {
__u64 last_event_age; /* to and from KFD */
};
/* Event data */
struct kfd_event_data {
union {
/* From KFD */
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
}; /* From KFD */
/* To and From KFD */
struct kfd_hsa_signal_event_data signal_event_data;
};
__u64 kfd_event_data_ext; /* pointer to an extension structure
for future exception types */
__u32 event_id; /* to KFD */
+24 -1
Переглянути файл
@@ -213,11 +213,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtQueryEventState(HsaEvent *Event)
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent(HsaEvent *Event,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnEvent_Ext(Event, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnEvent_Ext(HsaEvent *Event,
HSAuint32 Milliseconds, uint64_t *event_age)
{
if (!Event)
return HSAKMT_STATUS_INVALID_HANDLE;
return hsaKmtWaitOnMultipleEvents(&Event, 1, true, Milliseconds);
return hsaKmtWaitOnMultipleEvents_Ext(&Event, 1, true, Milliseconds, event_age);
}
static HSAKMT_STATUS get_mem_info_svm_api(uint64_t address, uint32_t gpu_id)
@@ -365,6 +371,15 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds)
{
return hsaKmtWaitOnMultipleEvents_Ext(Events, NumEvents, WaitOnAll, Milliseconds, NULL);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents_Ext(HsaEvent *Events[],
HSAuint32 NumEvents,
bool WaitOnAll,
HSAuint32 Milliseconds,
uint64_t *event_age)
{
CHECK_KFD_OPEN();
@@ -376,6 +391,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
for (HSAuint32 i = 0; i < NumEvents; i++) {
event_data[i].event_id = Events[i]->EventId;
event_data[i].kfd_event_data_ext = (uint64_t)(uintptr_t)NULL;
if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
event_data[i].signal_event_data.last_event_age = event_age[i];
}
struct kfd_ioctl_wait_events_args args = {0};
@@ -413,6 +430,12 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtWaitOnMultipleEvents(HsaEvent *Events[],
}
}
out:
for (HSAuint32 i = 0; i < NumEvents; i++) {
if (event_age && Events[i]->EventData.EventType == HSA_EVENTTYPE_SIGNAL)
event_age[i] = event_data[i].signal_event_data.last_event_age;
}
free(event_data);
return result;