SWDEV-284671 - Add HW event wait to improve hipDeviceSynchronize
If AMD event contains a reference to a HW event, then runtime
could check/wait for HW event. CPU status update will occur later
after HSA signal callback, but it's not important for the result.
Change-Id: I591391a953bbdba6a25ac07e2cd98aeb17cd4596
[ROCm/clr commit: 85c70a7495]
Este cometimento está contido em:
cometido por
Maneesh Gupta
ascendente
8d7a745246
cometimento
a9abd850ea
@@ -1696,6 +1696,15 @@ class Device : public RuntimeObject {
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
|
||||
return true;
|
||||
};
|
||||
|
||||
// Returns the status of HW event, associated with amd::Event
|
||||
virtual bool IsHwEventReady(
|
||||
const amd::Event& event, //!< AMD event for HW status validation
|
||||
bool wait = false //!< If true then forces the event completion
|
||||
) const {
|
||||
return false;
|
||||
};
|
||||
|
||||
//! Returns TRUE if the device is available for computations
|
||||
bool isOnline() const { return online_; }
|
||||
|
||||
|
||||
@@ -2517,6 +2517,20 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
|
||||
return result;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::IsHwEventReady(const amd::Event& event, bool wait) const {
|
||||
void* hw_event = (event.NotifyEvent() != nullptr) ?
|
||||
event.NotifyEvent()->HwEvent() : event.HwEvent();
|
||||
if (hw_event == nullptr) {
|
||||
return false;
|
||||
} else if (wait) {
|
||||
WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_);
|
||||
return true;
|
||||
}
|
||||
return (hsa_signal_load_relaxed(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_) <= 0);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) {
|
||||
if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) {
|
||||
// Abort on device exceptions.
|
||||
@@ -2528,6 +2542,7 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) {
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hsa_queue_t* Device::getQueueFromPool(const uint qIndex) {
|
||||
if (qIndex < QueuePriority::Total && queuePool_[qIndex].size() > 0) {
|
||||
typedef decltype(queuePool_)::value_type::const_reference PoolRef;
|
||||
|
||||
@@ -232,7 +232,11 @@ class NullDevice : public amd::Device {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
|
||||
virtual bool SetClockMode(
|
||||
const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
|
||||
|
||||
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; }
|
||||
|
||||
protected:
|
||||
//! Initialize compiler instance and handle
|
||||
@@ -400,6 +404,8 @@ class Device : public NullDevice {
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
|
||||
|
||||
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const;
|
||||
|
||||
//! Allocate host memory in terms of numa policy set by user
|
||||
void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const;
|
||||
|
||||
|
||||
@@ -65,6 +65,10 @@ Event::~Event() {
|
||||
delete callback;
|
||||
callback = next;
|
||||
}
|
||||
// Release the notify event
|
||||
if (notify_event_ != nullptr) {
|
||||
notify_event_->release();
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -272,7 +276,6 @@ bool Event::notifyCmdQueue() {
|
||||
}
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue);
|
||||
command->enqueue();
|
||||
command->release();
|
||||
// Save notification, associated with the current event
|
||||
notify_event_ = command;
|
||||
}
|
||||
|
||||
@@ -107,10 +107,6 @@ void HostQueue::finish() {
|
||||
Command* command = nullptr;
|
||||
if (IS_HIP) {
|
||||
command = getLastQueuedCommand(true);
|
||||
if (nullptr != command) {
|
||||
command->awaitCompletion();
|
||||
command->release();
|
||||
}
|
||||
}
|
||||
if (nullptr == command) {
|
||||
// Send a finish to make sure we finished all commands
|
||||
@@ -120,9 +116,13 @@ void HostQueue::finish() {
|
||||
}
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued");
|
||||
command->enqueue();
|
||||
command->awaitCompletion();
|
||||
command->release();
|
||||
}
|
||||
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
|
||||
static constexpr bool kWaitCompletion = true;
|
||||
if (!device().IsHwEventReady(command->event(), kWaitCompletion)) {
|
||||
command->awaitCompletion();
|
||||
}
|
||||
command->release();
|
||||
ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished");
|
||||
}
|
||||
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador