diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index c3662a9982..a85867047b 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1287,6 +1287,9 @@ class VirtualDevice : public amd::HeapObject { //! Returns true if device has active wait setting bool ActiveWait() const; + //! Returns the status of queue handler callback + virtual bool isHandlerPending() const = 0; + //! Returns fence state of the VirtualGPU virtual bool isFenceDirty() const = 0; diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 808e6c2616..c43b7be6be 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -341,6 +341,8 @@ class VirtualGPU : public device::VirtualDevice { void profilerAttach(bool enable = false) {} + bool isHandlerPending() const { return false; } + bool isFenceDirty() const { return false; } inline bool dispatchAqlPacket(uint8_t* aqlpacket, amd::AccumulateCommand* vcmd = nullptr) { diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 609fb4cf7a..e3854538da 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -468,10 +468,12 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", prof_signal->signal_.handle, prof_signal); } + SetHandlerPending(false); // Update the current command/marker with HW event prof_signal->retain(); ts->command().SetHwEvent(prof_signal); } else if (ts->command().profilingInfo().marker_ts_) { + SetHandlerPending(true); // Update the current command/marker with HW event prof_signal->retain(); ts->command().SetHwEvent(prof_signal); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index bfae393726..609715934d 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -223,7 +223,7 @@ class VirtualGPU : public device::VirtualDevice { class HwQueueTracker : public amd::EmbeddedObject { public: - HwQueueTracker(const VirtualGPU& gpu): gpu_(gpu) {} + HwQueueTracker(const VirtualGPU& gpu): gpu_(gpu), handlerPending_(false) {} ~HwQueueTracker(); @@ -265,6 +265,12 @@ class VirtualGPU : public device::VirtualDevice { //! Empty check for external signals bool IsExternalSignalListEmpty() const { return external_signals_.empty(); } + //! Set the status to indicate a pending handler + void SetHandlerPending(bool pending) { handlerPending_ = pending; } + + //! Check if callback has been queued + bool IsHandlerPending() const { return handlerPending_; } + //! Get/Set SDMA profiling bool GetSDMAProfiling() { return sdma_profiling_; } void SetSDMAProfiling(bool profile) { @@ -289,6 +295,7 @@ class VirtualGPU : public device::VirtualDevice { const VirtualGPU& gpu_; //!< VirtualGPU, associated with this tracker std::vector external_signals_; //!< External signals for a wait in this queue std::vector waiting_signals_; //!< Current waiting signals in this queue + bool handlerPending_; //!< This indicates if we have queued a callback handler }; VirtualGPU(Device& device, bool profiling = false, bool cooperative = false, @@ -406,6 +413,10 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } + //! Indicates the status of the callback handler. The callback would process the commands + //! and would collect profiling data, update refcounts + bool isHandlerPending() const { return barriers_.IsHandlerPending(); } + void* allocKernArg(size_t size, size_t alignment); bool isFenceDirty() const { return fence_dirty_; } void resetFenceDirty() { fence_dirty_ = false; } diff --git a/projects/clr/rocclr/platform/commandqueue.cpp b/projects/clr/rocclr/platform/commandqueue.cpp index 4dd294c821..9375b4324f 100644 --- a/projects/clr/rocclr/platform/commandqueue.cpp +++ b/projects/clr/rocclr/platform/commandqueue.cpp @@ -131,7 +131,8 @@ void HostQueue::finish(bool cpu_wait) { (command->NotifyEvent() != nullptr) ? command->NotifyEvent()->HwEvent() : command->HwEvent(); force_marker = (hw_event == nullptr); } - if (nullptr == command || force_marker || vdev()->isFenceDirty()) { + if (nullptr == command || force_marker || + vdev()->isHandlerPending() || vdev()->isFenceDirty()) { if (nullptr != command) { command->release(); }