Separate AsyncEventsLoop into two separate threads
This fixes an issue for missing HW events when out of HW events.
We cannot determine whether a HW event has occurred unless we call the
underlying drivers with hsaKmtWaitOnMultipleEvents_Ext. Previous logic
in Signal::WaitAny would switch to ACTIVE_WAIT state if we run out of
hardware events (signal->EopEvent() == NULL) and this would cause the
hsaKmtWaitOnMultipleEvents_Ext call to be skipped. But also, when we
have some signals without hardware events, calling
hsaKmtWaitOnMultipleEvents_Ext with a timeout of 0 so that we can poll
for remaining signals adds overhead with an IOCTL call and may cause
extra delay. Separating AsyncEventLoop into two separate threads so
that:
1. We can have a new Signal::WaitAnyExceptions to wait for HW events
This function can be simpler as it does not have to perform all the
timer calculations because it is expected to be always waiting on
hsaKmtWaitOnMultipleEvents_Ext through the lifetime of a process.
2. Signal::WaitAny does not need to have extra code to check for HW
exceptions as it only needs to handle HSA_EVENTTYPE_SIGNAL events. It
can also skip the calls to hsaKmtWaitOnMultipleEvents_Ext if needed.
Change-Id: I52ba99fd6e483e0cb477b7931a0dcc03520aa523
Signed-off-by: David Yat Sin <David.YatSin@amd.com>
[ROCm/ROCR-Runtime commit: 88eaa834d0]
Dieser Commit ist enthalten in:
@@ -669,11 +669,15 @@ class Runtime {
|
||||
// Deprecated HSA Region API GPU (for legacy APU support only)
|
||||
Agent* region_gpu_;
|
||||
|
||||
AsyncEventsControl async_events_control_;
|
||||
struct AsyncEventsInfo {
|
||||
AsyncEventsControl control;
|
||||
AsyncEvents events;
|
||||
AsyncEvents new_events;
|
||||
bool monitor_exceptions;
|
||||
};
|
||||
|
||||
AsyncEvents async_events_;
|
||||
|
||||
AsyncEvents new_async_events_;
|
||||
struct AsyncEventsInfo asyncSignals_;
|
||||
struct AsyncEventsInfo asyncExceptions_;
|
||||
|
||||
// System clock frequency.
|
||||
uint64_t sys_clock_freq_;
|
||||
|
||||
@@ -356,6 +356,20 @@ class Signal {
|
||||
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
|
||||
hsa_signal_value_t* satisfying_value);
|
||||
|
||||
/// @brief Dedicated funtion to wait on signals that are not of type HSA_EVENTTYPE_SIGNAL
|
||||
/// these events can only be received by calling the underlying driver (i.e via the hsaKmtWaitOnMultipleEvents_Ext
|
||||
/// function call). We still need to have 1 signal of type HSA_EVENT_TYPE_SIGNAL attached to the list of signals
|
||||
/// to be able to force hsaKmtWaitOnMultipleEvents_Ext to return.
|
||||
/// @param signal_count Number of hsa_signals
|
||||
/// @param hsa_signals Pointer to array of signals. All signals should have a valid EopEvent()
|
||||
/// @param conds list of conditions
|
||||
/// @param values list of values
|
||||
/// @param satisfying_value value to be satisfied
|
||||
/// @return index of signal that satisfies condition
|
||||
static uint32_t WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
|
||||
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
|
||||
hsa_signal_value_t* satisfying_value);
|
||||
|
||||
__forceinline bool IsType(rtti_t id) { return _IsA(id); }
|
||||
|
||||
/// @brief Prevents the signal from being destroyed until the matching Release().
|
||||
|
||||
@@ -802,35 +802,44 @@ hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal,
|
||||
hsa_signal_value_t value,
|
||||
hsa_amd_signal_handler handler,
|
||||
void* arg) {
|
||||
// Indicate that this signal is in use.
|
||||
if (signal.handle != 0) hsa_signal_handle(signal)->Retain();
|
||||
|
||||
ScopedAcquire<HybridMutex> scope_lock(&async_events_control_.lock);
|
||||
struct AsyncEventsInfo* asyncInfo = &asyncSignals_;
|
||||
|
||||
if (signal.handle != 0) {
|
||||
// Indicate that this signal is in use.
|
||||
hsa_signal_handle(signal)->Retain();
|
||||
|
||||
core::Signal* coreSignal = core::Signal::Convert(signal);
|
||||
if (coreSignal->EopEvent() && coreSignal->EopEvent()->EventData.EventType != HSA_EVENTTYPE_SIGNAL)
|
||||
asyncInfo = &asyncExceptions_;
|
||||
}
|
||||
|
||||
ScopedAcquire<HybridMutex> scope_lock(&asyncInfo->control.lock);
|
||||
|
||||
// Lazy initializer
|
||||
if (async_events_control_.async_events_thread_ == NULL) {
|
||||
if (asyncInfo->control.async_events_thread_ == NULL) {
|
||||
// Create monitoring thread control signal
|
||||
auto err = HSA::hsa_signal_create(0, 0, NULL, &async_events_control_.wake);
|
||||
auto err = HSA::hsa_signal_create(0, 0, NULL, &asyncInfo->control.wake);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
assert(false && "Asyncronous events control signal creation error.");
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
async_events_.PushBack(async_events_control_.wake, HSA_SIGNAL_CONDITION_NE,
|
||||
0, NULL, NULL);
|
||||
asyncInfo->events.PushBack(asyncInfo->control.wake, HSA_SIGNAL_CONDITION_NE,
|
||||
0, NULL, NULL);
|
||||
|
||||
// Start event monitoring thread
|
||||
async_events_control_.exit = false;
|
||||
async_events_control_.async_events_thread_ =
|
||||
os::CreateThread(AsyncEventsLoop, NULL);
|
||||
if (async_events_control_.async_events_thread_ == NULL) {
|
||||
asyncInfo->control.exit = false;
|
||||
asyncInfo->control.async_events_thread_ =
|
||||
os::CreateThread(AsyncEventsLoop, asyncInfo);
|
||||
if (asyncInfo->control.async_events_thread_ == NULL) {
|
||||
assert(false && "Asyncronous events thread creation error.");
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
new_async_events_.PushBack(signal, cond, value, handler, arg);
|
||||
asyncInfo->new_events.PushBack(signal, cond, value, handler, arg);
|
||||
|
||||
hsa_signal_handle(async_events_control_.wake)->StoreRelease(1);
|
||||
hsa_signal_handle(asyncInfo->control.wake)->StoreRelease(1);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -1546,18 +1555,35 @@ hsa_status_t Runtime::IPCDetach(void* ptr) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void Runtime::AsyncEventsLoop(void*) {
|
||||
auto& async_events_control_ = runtime_singleton_->async_events_control_;
|
||||
auto& async_events_ = runtime_singleton_->async_events_;
|
||||
auto& new_async_events_ = runtime_singleton_->new_async_events_;
|
||||
void Runtime::AsyncEventsLoop(void* _eventsInfo) {
|
||||
struct AsyncEventsInfo* eventsInfo = reinterpret_cast<struct AsyncEventsInfo*>(_eventsInfo);
|
||||
|
||||
auto& async_events_control_ = eventsInfo->control;
|
||||
auto& async_events_ = eventsInfo->events;
|
||||
auto& new_async_events_ = eventsInfo->new_events;
|
||||
|
||||
while (!async_events_control_.exit) {
|
||||
// Wait for a signal
|
||||
hsa_signal_value_t value;
|
||||
uint32_t index = AMD::hsa_amd_signal_wait_any(
|
||||
uint32_t(async_events_.Size()), &async_events_.signal_[0],
|
||||
&async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1),
|
||||
HSA_WAIT_STATE_BLOCKED, &value);
|
||||
uint32_t index = 0;
|
||||
|
||||
if (eventsInfo->monitor_exceptions) {
|
||||
index = Signal::WaitAnyExceptions(
|
||||
uint32_t(async_events_.Size()),
|
||||
&async_events_.signal_[0],
|
||||
&async_events_.cond_[0],
|
||||
&async_events_.value_[0],
|
||||
&value);
|
||||
} else {
|
||||
index = AMD::hsa_amd_signal_wait_any(
|
||||
uint32_t(async_events_.Size()),
|
||||
&async_events_.signal_[0],
|
||||
&async_events_.cond_[0],
|
||||
&async_events_.value_[0],
|
||||
uint64_t(-1),
|
||||
HSA_WAIT_STATE_BLOCKED,
|
||||
&value);
|
||||
}
|
||||
|
||||
// Reset the control signal
|
||||
if (index == 0) {
|
||||
@@ -1922,7 +1948,11 @@ Runtime::Runtime()
|
||||
hw_exception_event_(nullptr),
|
||||
hw_exception_signal_(nullptr),
|
||||
ref_count_(0),
|
||||
kfd_version{} {}
|
||||
kfd_version{} {
|
||||
|
||||
asyncSignals_.monitor_exceptions = false;
|
||||
asyncExceptions_.monitor_exceptions = true;
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::Load() {
|
||||
os::cpuid_t cpuinfo;
|
||||
@@ -2000,7 +2030,8 @@ void Runtime::Unload() {
|
||||
std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject());
|
||||
disabled_gpu_agents_.clear();
|
||||
|
||||
async_events_control_.Shutdown();
|
||||
asyncSignals_.control.Shutdown();
|
||||
asyncExceptions_.control.Shutdown();
|
||||
|
||||
if (vm_fault_signal_ != nullptr) {
|
||||
vm_fault_signal_->DestroySignal();
|
||||
|
||||
@@ -255,23 +255,8 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
|
||||
while (true) {
|
||||
// Cannot mwaitx - polling multiple signals
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
if (!signals[i]->IsValid()) return uint32_t(-1);
|
||||
|
||||
// Handling special event.
|
||||
if (signals[i]->EopEvent() != NULL) {
|
||||
const HSA_EVENTTYPE event_type =
|
||||
signals[i]->EopEvent()->EventData.EventType;
|
||||
if (event_type == HSA_EVENTTYPE_MEMORY) {
|
||||
const HsaMemoryAccessFault& fault =
|
||||
signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
|
||||
if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) {
|
||||
return i;
|
||||
}
|
||||
} else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) {
|
||||
const HsaHwException& exception = signals[i]->EopEvent()->EventData.EventData.HwException;
|
||||
if (exception.MemoryLost) return i;
|
||||
}
|
||||
}
|
||||
if (!signals[i]->IsValid())
|
||||
return uint32_t(-1);
|
||||
|
||||
value =
|
||||
atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
|
||||
@@ -325,6 +310,111 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Special handler to wait listen for exceptions from underlying driver.
|
||||
*/
|
||||
uint32_t Signal::WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
|
||||
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
|
||||
hsa_signal_value_t* satisfying_value) {
|
||||
|
||||
uint32_t wait_ms = uint32_t(-1);
|
||||
hsa_signal_handle* signals =
|
||||
reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));
|
||||
|
||||
for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();
|
||||
|
||||
MAKE_SCOPE_GUARD([&]() {
|
||||
for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
|
||||
});
|
||||
|
||||
uint32_t prior = 0;
|
||||
for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);
|
||||
|
||||
|
||||
MAKE_SCOPE_GUARD([&]() {
|
||||
for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
|
||||
});
|
||||
|
||||
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
|
||||
// Allow only the first waiter to sleep. Without event age tracking,
|
||||
// race condition can cause some threads to sleep without wakeup since missing interrupt.
|
||||
if (prior != 0) wait_ms = 0;
|
||||
|
||||
HsaEvent** evts = new HsaEvent* [signal_count];
|
||||
MAKE_SCOPE_GUARD([&]() { delete[] evts; });
|
||||
|
||||
uint32_t unique_evts = 0;
|
||||
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
assert(signals[i]->EopEvent() != NULL);
|
||||
evts[i] = signals[i]->EopEvent();
|
||||
}
|
||||
|
||||
std::sort(evts, evts + signal_count);
|
||||
HsaEvent** end = std::unique(evts, evts + signal_count);
|
||||
unique_evts = uint32_t(end - evts);
|
||||
|
||||
uint64_t event_age[unique_evts];
|
||||
memset(event_age, 0, unique_evts * sizeof(uint64_t));
|
||||
if (core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
|
||||
for (uint32_t i = 0; i < unique_evts; i++)
|
||||
event_age[i] = 1;
|
||||
|
||||
int64_t value;
|
||||
|
||||
bool condition_met = false;
|
||||
while (true) {
|
||||
// Cannot mwaitx - polling multiple signals
|
||||
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
if (!signals[i]->IsValid())
|
||||
return uint32_t(-1);
|
||||
|
||||
const HSA_EVENTTYPE event_type = signals[i]->EopEvent()->EventData.EventType;
|
||||
if (event_type == HSA_EVENTTYPE_MEMORY) {
|
||||
const HsaMemoryAccessFault& fault =
|
||||
signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
|
||||
if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) return i;
|
||||
} else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) {
|
||||
const HsaHwException& exception =
|
||||
signals[i]->EopEvent()->EventData.EventData.HwException;
|
||||
if (exception.MemoryLost) return i;
|
||||
}
|
||||
|
||||
value = atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
|
||||
|
||||
switch (conds[i]) {
|
||||
case HSA_SIGNAL_CONDITION_EQ: {
|
||||
condition_met = (value == values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_NE: {
|
||||
condition_met = (value != values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_GTE: {
|
||||
condition_met = (value >= values[i]);
|
||||
break;
|
||||
}
|
||||
case HSA_SIGNAL_CONDITION_LT: {
|
||||
condition_met = (value < values[i]);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
return uint32_t(-1);
|
||||
}
|
||||
}
|
||||
if (condition_met) {
|
||||
if (satisfying_value != NULL) *satisfying_value = value;
|
||||
// Some other signal in the list satisfied condition
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, false, wait_ms, event_age);
|
||||
} //while
|
||||
}
|
||||
|
||||
SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals)
|
||||
: count(num_signals) {
|
||||
if (count != 0) {
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren