diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h index 6bbff2121c..b9af49cac3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/runtime.h @@ -669,11 +669,15 @@ class Runtime { // Deprecated HSA Region API GPU (for legacy APU support only) Agent* region_gpu_; - AsyncEventsControl async_events_control_; + struct AsyncEventsInfo { + AsyncEventsControl control; + AsyncEvents events; + AsyncEvents new_events; + bool monitor_exceptions; + }; - AsyncEvents async_events_; - - AsyncEvents new_async_events_; + struct AsyncEventsInfo asyncSignals_; + struct AsyncEventsInfo asyncExceptions_; // System clock frequency. uint64_t sys_clock_freq_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/signal.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/signal.h index 39e5321867..f960bc34b1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/signal.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/signal.h @@ -356,6 +356,20 @@ class Signal { uint64_t timeout_hint, hsa_wait_state_t wait_hint, hsa_signal_value_t* satisfying_value); + /// @brief Dedicated funtion to wait on signals that are not of type HSA_EVENTTYPE_SIGNAL + /// these events can only be received by calling the underlying driver (i.e via the hsaKmtWaitOnMultipleEvents_Ext + /// function call). We still need to have 1 signal of type HSA_EVENT_TYPE_SIGNAL attached to the list of signals + /// to be able to force hsaKmtWaitOnMultipleEvents_Ext to return. + /// @param signal_count Number of hsa_signals + /// @param hsa_signals Pointer to array of signals. All signals should have a valid EopEvent() + /// @param conds list of conditions + /// @param values list of values + /// @param satisfying_value value to be satisfied + /// @return index of signal that satisfies condition + static uint32_t WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals, + const hsa_signal_condition_t* conds, const hsa_signal_value_t* values, + hsa_signal_value_t* satisfying_value); + __forceinline bool IsType(rtti_t id) { return _IsA(id); } /// @brief Prevents the signal from being destroyed until the matching Release(). diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index ed136e4038..92a5eb7912 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -802,35 +802,44 @@ hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal, hsa_signal_value_t value, hsa_amd_signal_handler handler, void* arg) { - // Indicate that this signal is in use. - if (signal.handle != 0) hsa_signal_handle(signal)->Retain(); - ScopedAcquire scope_lock(&async_events_control_.lock); + struct AsyncEventsInfo* asyncInfo = &asyncSignals_; + + if (signal.handle != 0) { + // Indicate that this signal is in use. + hsa_signal_handle(signal)->Retain(); + + core::Signal* coreSignal = core::Signal::Convert(signal); + if (coreSignal->EopEvent() && coreSignal->EopEvent()->EventData.EventType != HSA_EVENTTYPE_SIGNAL) + asyncInfo = &asyncExceptions_; + } + + ScopedAcquire scope_lock(&asyncInfo->control.lock); // Lazy initializer - if (async_events_control_.async_events_thread_ == NULL) { + if (asyncInfo->control.async_events_thread_ == NULL) { // Create monitoring thread control signal - auto err = HSA::hsa_signal_create(0, 0, NULL, &async_events_control_.wake); + auto err = HSA::hsa_signal_create(0, 0, NULL, &asyncInfo->control.wake); if (err != HSA_STATUS_SUCCESS) { assert(false && "Asyncronous events control signal creation error."); return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - async_events_.PushBack(async_events_control_.wake, HSA_SIGNAL_CONDITION_NE, - 0, NULL, NULL); + asyncInfo->events.PushBack(asyncInfo->control.wake, HSA_SIGNAL_CONDITION_NE, + 0, NULL, NULL); // Start event monitoring thread - async_events_control_.exit = false; - async_events_control_.async_events_thread_ = - os::CreateThread(AsyncEventsLoop, NULL); - if (async_events_control_.async_events_thread_ == NULL) { + asyncInfo->control.exit = false; + asyncInfo->control.async_events_thread_ = + os::CreateThread(AsyncEventsLoop, asyncInfo); + if (asyncInfo->control.async_events_thread_ == NULL) { assert(false && "Asyncronous events thread creation error."); return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } } - new_async_events_.PushBack(signal, cond, value, handler, arg); + asyncInfo->new_events.PushBack(signal, cond, value, handler, arg); - hsa_signal_handle(async_events_control_.wake)->StoreRelease(1); + hsa_signal_handle(asyncInfo->control.wake)->StoreRelease(1); return HSA_STATUS_SUCCESS; } @@ -1546,18 +1555,35 @@ hsa_status_t Runtime::IPCDetach(void* ptr) { return HSA_STATUS_SUCCESS; } -void Runtime::AsyncEventsLoop(void*) { - auto& async_events_control_ = runtime_singleton_->async_events_control_; - auto& async_events_ = runtime_singleton_->async_events_; - auto& new_async_events_ = runtime_singleton_->new_async_events_; +void Runtime::AsyncEventsLoop(void* _eventsInfo) { + struct AsyncEventsInfo* eventsInfo = reinterpret_cast(_eventsInfo); + + auto& async_events_control_ = eventsInfo->control; + auto& async_events_ = eventsInfo->events; + auto& new_async_events_ = eventsInfo->new_events; while (!async_events_control_.exit) { // Wait for a signal hsa_signal_value_t value; - uint32_t index = AMD::hsa_amd_signal_wait_any( - uint32_t(async_events_.Size()), &async_events_.signal_[0], - &async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1), - HSA_WAIT_STATE_BLOCKED, &value); + uint32_t index = 0; + + if (eventsInfo->monitor_exceptions) { + index = Signal::WaitAnyExceptions( + uint32_t(async_events_.Size()), + &async_events_.signal_[0], + &async_events_.cond_[0], + &async_events_.value_[0], + &value); + } else { + index = AMD::hsa_amd_signal_wait_any( + uint32_t(async_events_.Size()), + &async_events_.signal_[0], + &async_events_.cond_[0], + &async_events_.value_[0], + uint64_t(-1), + HSA_WAIT_STATE_BLOCKED, + &value); + } // Reset the control signal if (index == 0) { @@ -1922,7 +1948,11 @@ Runtime::Runtime() hw_exception_event_(nullptr), hw_exception_signal_(nullptr), ref_count_(0), - kfd_version{} {} + kfd_version{} { + + asyncSignals_.monitor_exceptions = false; + asyncExceptions_.monitor_exceptions = true; +} hsa_status_t Runtime::Load() { os::cpuid_t cpuinfo; @@ -2000,7 +2030,8 @@ void Runtime::Unload() { std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject()); disabled_gpu_agents_.clear(); - async_events_control_.Shutdown(); + asyncSignals_.control.Shutdown(); + asyncExceptions_.control.Shutdown(); if (vm_fault_signal_ != nullptr) { vm_fault_signal_->DestroySignal(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/signal.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/signal.cpp index 065f8ccc9f..e8fbe76993 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/signal.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/signal.cpp @@ -255,23 +255,8 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals, while (true) { // Cannot mwaitx - polling multiple signals for (uint32_t i = 0; i < signal_count; i++) { - if (!signals[i]->IsValid()) return uint32_t(-1); - - // Handling special event. - if (signals[i]->EopEvent() != NULL) { - const HSA_EVENTTYPE event_type = - signals[i]->EopEvent()->EventData.EventType; - if (event_type == HSA_EVENTTYPE_MEMORY) { - const HsaMemoryAccessFault& fault = - signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault; - if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) { - return i; - } - } else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) { - const HsaHwException& exception = signals[i]->EopEvent()->EventData.EventData.HwException; - if (exception.MemoryLost) return i; - } - } + if (!signals[i]->IsValid()) + return uint32_t(-1); value = atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed); @@ -325,6 +310,111 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals, } } +/* + * Special handler to wait listen for exceptions from underlying driver. + */ +uint32_t Signal::WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals, + const hsa_signal_condition_t* conds, const hsa_signal_value_t* values, + hsa_signal_value_t* satisfying_value) { + + uint32_t wait_ms = uint32_t(-1); + hsa_signal_handle* signals = + reinterpret_cast(const_cast(hsa_signals)); + + for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain(); + + MAKE_SCOPE_GUARD([&]() { + for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release(); + }); + + uint32_t prior = 0; + for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++); + + + MAKE_SCOPE_GUARD([&]() { + for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--; + }); + + if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age) + // Allow only the first waiter to sleep. Without event age tracking, + // race condition can cause some threads to sleep without wakeup since missing interrupt. + if (prior != 0) wait_ms = 0; + + HsaEvent** evts = new HsaEvent* [signal_count]; + MAKE_SCOPE_GUARD([&]() { delete[] evts; }); + + uint32_t unique_evts = 0; + + for (uint32_t i = 0; i < signal_count; i++) { + assert(signals[i]->EopEvent() != NULL); + evts[i] = signals[i]->EopEvent(); + } + + std::sort(evts, evts + signal_count); + HsaEvent** end = std::unique(evts, evts + signal_count); + unique_evts = uint32_t(end - evts); + + uint64_t event_age[unique_evts]; + memset(event_age, 0, unique_evts * sizeof(uint64_t)); + if (core::Runtime::runtime_singleton_->KfdVersion().supports_event_age) + for (uint32_t i = 0; i < unique_evts; i++) + event_age[i] = 1; + + int64_t value; + + bool condition_met = false; + while (true) { + // Cannot mwaitx - polling multiple signals + + for (uint32_t i = 0; i < signal_count; i++) { + if (!signals[i]->IsValid()) + return uint32_t(-1); + + const HSA_EVENTTYPE event_type = signals[i]->EopEvent()->EventData.EventType; + if (event_type == HSA_EVENTTYPE_MEMORY) { + const HsaMemoryAccessFault& fault = + signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault; + if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) return i; + } else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) { + const HsaHwException& exception = + signals[i]->EopEvent()->EventData.EventData.HwException; + if (exception.MemoryLost) return i; + } + + value = atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed); + + switch (conds[i]) { + case HSA_SIGNAL_CONDITION_EQ: { + condition_met = (value == values[i]); + break; + } + case HSA_SIGNAL_CONDITION_NE: { + condition_met = (value != values[i]); + break; + } + case HSA_SIGNAL_CONDITION_GTE: { + condition_met = (value >= values[i]); + break; + } + case HSA_SIGNAL_CONDITION_LT: { + condition_met = (value < values[i]); + break; + } + default: { + return uint32_t(-1); + } + } + if (condition_met) { + if (satisfying_value != NULL) *satisfying_value = value; + // Some other signal in the list satisfied condition + return i; + } + } + + hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, false, wait_ms, event_age); + } //while +} + SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals) : count(num_signals) { if (count != 0) {