From a9abd850ea5b3a18f6cc80de100806e42a83b030 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 7 Jun 2021 15:56:47 -0400 Subject: [PATCH] SWDEV-284671 - Add HW event wait to improve hipDeviceSynchronize If AMD event contains a reference to a HW event, then runtime could check/wait for HW event. CPU status update will occur later after HSA signal callback, but it's not important for the result. Change-Id: I591391a953bbdba6a25ac07e2cd98aeb17cd4596 [ROCm/clr commit: 85c70a74959e22371683ecd9c4bbf9af4c13c91d] --- projects/clr/rocclr/device/device.hpp | 9 +++++++++ projects/clr/rocclr/device/rocm/rocdevice.cpp | 15 +++++++++++++++ projects/clr/rocclr/device/rocm/rocdevice.hpp | 8 +++++++- projects/clr/rocclr/platform/command.cpp | 5 ++++- projects/clr/rocclr/platform/commandqueue.cpp | 12 ++++++------ 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 991cfc4bc1..53c2e39fcb 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1696,6 +1696,15 @@ class Device : public RuntimeObject { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }; + + // Returns the status of HW event, associated with amd::Event + virtual bool IsHwEventReady( + const amd::Event& event, //!< AMD event for HW status validation + bool wait = false //!< If true then forces the event completion + ) const { + return false; + }; + //! Returns TRUE if the device is available for computations bool isOnline() const { return online_; } diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 512f8e74f9..3ece9bfcf2 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -2517,6 +2517,20 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI return result; } +// ================================================================================================ +bool Device::IsHwEventReady(const amd::Event& event, bool wait) const { + void* hw_event = (event.NotifyEvent() != nullptr) ? + event.NotifyEvent()->HwEvent() : event.HwEvent(); + if (hw_event == nullptr) { + return false; + } else if (wait) { + WaitForSignal(reinterpret_cast(hw_event)->signal_); + return true; + } + return (hsa_signal_load_relaxed(reinterpret_cast(hw_event)->signal_) <= 0); +} + +// ================================================================================================ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { if (status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) { // Abort on device exceptions. @@ -2528,6 +2542,7 @@ static void callbackQueue(hsa_status_t status, hsa_queue_t* queue, void* data) { } } +// ================================================================================================ hsa_queue_t* Device::getQueueFromPool(const uint qIndex) { if (qIndex < QueuePriority::Total && queuePool_[qIndex].size() > 0) { typedef decltype(queuePool_)::value_type::const_reference PoolRef; diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index b3d009c523..78cfbb6241 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -232,7 +232,11 @@ class NullDevice : public amd::Device { return true; } - virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + virtual bool SetClockMode( + const cl_set_device_clock_mode_input_amd setClockModeInput, + cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } + + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } protected: //! Initialize compiler instance and handle @@ -400,6 +404,8 @@ class Device : public NullDevice { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput); + virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const; + //! Allocate host memory in terms of numa policy set by user void* hostNumaAlloc(size_t size, size_t alignment, bool atomics = false) const; diff --git a/projects/clr/rocclr/platform/command.cpp b/projects/clr/rocclr/platform/command.cpp index 7fccab9e60..389a73ad02 100644 --- a/projects/clr/rocclr/platform/command.cpp +++ b/projects/clr/rocclr/platform/command.cpp @@ -65,6 +65,10 @@ Event::~Event() { delete callback; callback = next; } + // Release the notify event + if (notify_event_ != nullptr) { + notify_event_->release(); + } } // ================================================================================================ @@ -272,7 +276,6 @@ bool Event::notifyCmdQueue() { } ClPrint(LOG_DEBUG, LOG_CMD, "queue marker to command queue: %p", queue); command->enqueue(); - command->release(); // Save notification, associated with the current event notify_event_ = command; } diff --git a/projects/clr/rocclr/platform/commandqueue.cpp b/projects/clr/rocclr/platform/commandqueue.cpp index 358e79df73..85bb08979c 100644 --- a/projects/clr/rocclr/platform/commandqueue.cpp +++ b/projects/clr/rocclr/platform/commandqueue.cpp @@ -107,10 +107,6 @@ void HostQueue::finish() { Command* command = nullptr; if (IS_HIP) { command = getLastQueuedCommand(true); - if (nullptr != command) { - command->awaitCompletion(); - command->release(); - } } if (nullptr == command) { // Send a finish to make sure we finished all commands @@ -120,9 +116,13 @@ void HostQueue::finish() { } ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued"); command->enqueue(); - command->awaitCompletion(); - command->release(); } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status + static constexpr bool kWaitCompletion = true; + if (!device().IsHwEventReady(command->event(), kWaitCompletion)) { + command->awaitCompletion(); + } + command->release(); ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); }