2
0

SWDEV-284671 - Add HW event wait to improve hipDeviceSynchronize

If AMD event contains a reference to a HW event, then runtime
could check/wait for HW event. CPU status update will occur later
after HSA signal callback, but it's not important for the result.

Change-Id: I591391a953bbdba6a25ac07e2cd98aeb17cd4596


[ROCm/clr commit: 85c70a7495]
Este cometimento está contido em:
German Andryeyev
2021-06-07 15:56:47 -04:00
cometido por Maneesh Gupta
ascendente 8d7a745246
cometimento a9abd850ea
5 ficheiros modificados com 41 adições e 8 eliminações
+9
Ver ficheiro
@@ -1696,6 +1696,15 @@ class Device : public RuntimeObject {
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
return true;
};
// Returns the status of HW event, associated with amd::Event
virtual bool IsHwEventReady(
const amd::Event& event, //!< AMD event for HW status validation
bool wait = false //!< If true then forces the event completion
) const {
return false;
};
//! Returns TRUE if the device is available for computations
bool isOnline() const { return online_; }
+15
Ver ficheiro
@@ -2517,6 +2517,20 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
return result;
}
// ================================================================================================
bool Device::IsHwEventReady(const amd::Event& event, bool wait) const {
void* hw_event = (event.NotifyEvent() != nullptr) ?
event.NotifyEvent()->HwEvent() : event.HwEvent();
if (hw_event == nullptr) {
return false;
} else if (wait) {
WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_);
return true;
}
return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) <= 0);
}
// ================================================================================================
static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) {
if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) {
// Abort on device exceptions.
@@ -2528,6 +2542,7 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) {
}
}
// ================================================================================================
hsa_queue_t* Device::getQueueFromPool(const uint qIndex) {
if (qIndex < QueuePriority::Total && queuePool_[qIndex].size() > 0) {
typedef decltype(queuePool_)::value_type::const_reference PoolRef;
+7 -1
Ver ficheiro
@@ -232,7 +232,11 @@ class NullDevice : public amd::Device {
return true;
}
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
virtual bool SetClockMode(
const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; }
protected:
//! Initialize compiler instance and handle
@@ -400,6 +404,8 @@ class Device : public NullDevice {
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const;
//! Allocate host memory in terms of numa policy set by user
void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const;
+4 -1
Ver ficheiro
@@ -65,6 +65,10 @@ Event::~Event() {
delete callback;
callback = next;
}
// Release the notify event
if (notify_event_ != nullptr) {
notify_event_->release();
}
}
// ================================================================================================
@@ -272,7 +276,6 @@ bool Event::notifyCmdQueue() {
}
ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue);
command->enqueue();
command->release();
// Save notification, associated with the current event
notify_event_ = command;
}
+6 -6
Ver ficheiro
@@ -107,10 +107,6 @@ void HostQueue::finish() {
Command* command = nullptr;
if (IS_HIP) {
command = getLastQueuedCommand(true);
if (nullptr != command) {
command->awaitCompletion();
command->release();
}
}
if (nullptr == command) {
// Send a finish to make sure we finished all commands
@@ -120,9 +116,13 @@ void HostQueue::finish() {
}
ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued");
command->enqueue();
command->awaitCompletion();
command->release();
}
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
static constexpr bool kWaitCompletion = true;
if (!device().IsHwEventReady(command->event(), kWaitCompletion)) {
command->awaitCompletion();
}
command->release();
ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished");
}