From 1338ff37e89b4d1fb36d23d48ccdf55998e15df5 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Wed, 1 Nov 2023 05:22:47 +0000 Subject: [PATCH] SWDEV-301667 - Cleanup unused paths - Refactor code and cleanup logic for callback saving for event records Change-Id: I5c56aa8e9c968a5bca70fb07ad1796da318e9e89 --- rocclr/device/device.hpp | 3 --- rocclr/device/pal/palvirtual.hpp | 2 -- rocclr/device/rocm/rocvirtual.cpp | 2 -- rocclr/device/rocm/rocvirtual.hpp | 13 +------------ rocclr/platform/command.cpp | 19 ++++--------------- rocclr/platform/commandqueue.cpp | 6 ++---- rocclr/platform/commandqueue.hpp | 12 ------------ 7 files changed, 7 insertions(+), 50 deletions(-) diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 81bf3d0ed9..c1459d24cf 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -1285,9 +1285,6 @@ class VirtualDevice : public amd::HeapObject { //! Returns true if device has active wait setting bool ActiveWait() const; - //! Returns the status of queue handler callback - virtual bool isHandlerPending() const = 0; - //! Returns fence state of the VirtualGPU virtual bool isFenceDirty() const = 0; virtual bool dispatchAqlPacket(uint8_t* aqlpacket) = 0; diff --git a/rocclr/device/pal/palvirtual.hpp b/rocclr/device/pal/palvirtual.hpp index f1c078d640..dbed5809db 100644 --- a/rocclr/device/pal/palvirtual.hpp +++ b/rocclr/device/pal/palvirtual.hpp @@ -340,8 +340,6 @@ class VirtualGPU : public device::VirtualDevice { void profilerAttach(bool enable = false) {} - bool isHandlerPending() const { return false; } - bool isFenceDirty() const { return false; } inline bool dispatchAqlPacket(uint8_t* aqlpacket) { return false; } diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index a0725e6b0e..e8a75e9df4 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -462,12 +462,10 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal( ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)", prof_signal->signal_.handle, prof_signal); } - SetHandlerPending(false); // Update the current command/marker with HW event prof_signal->retain(); ts->command().SetHwEvent(prof_signal); } else if (ts->command().profilingInfo().marker_ts_) { - SetHandlerPending(true); // Update the current command/marker with HW event prof_signal->retain(); ts->command().SetHwEvent(prof_signal); diff --git a/rocclr/device/rocm/rocvirtual.hpp b/rocclr/device/rocm/rocvirtual.hpp index c2638cadab..98071de0a0 100644 --- a/rocclr/device/rocm/rocvirtual.hpp +++ b/rocclr/device/rocm/rocvirtual.hpp @@ -223,7 +223,7 @@ class VirtualGPU : public device::VirtualDevice { class HwQueueTracker : public amd::EmbeddedObject { public: - HwQueueTracker(const VirtualGPU& gpu): gpu_(gpu), handlerPending_(false) {} + HwQueueTracker(const VirtualGPU& gpu): gpu_(gpu) {} ~HwQueueTracker(); @@ -265,12 +265,6 @@ class VirtualGPU : public device::VirtualDevice { //! Empty check for external signals bool IsExternalSignalListEmpty() const { return external_signals_.empty(); } - //! Set the status to indicate a pending handler - void SetHandlerPending(bool pending) { handlerPending_ = pending; } - - //! Check if callback has been queued - bool IsHandlerPending() const { return handlerPending_; } - //! Get/Set SDMA profiling bool GetSDMAProfiling() { return sdma_profiling_; } void SetSDMAProfiling(bool profile) { @@ -295,7 +289,6 @@ class VirtualGPU : public device::VirtualDevice { const VirtualGPU& gpu_; //!< VirtualGPU, associated with this tracker std::vector external_signals_; //!< External signals for a wait in this queue std::vector waiting_signals_; //!< Current waiting signals in this queue - bool handlerPending_; //!< This indicates if we have queued a callback handler }; VirtualGPU(Device& device, bool profiling = false, bool cooperative = false, @@ -413,10 +406,6 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } - //! Indicates the status of the callback handler. The callback would process the commands - //! and would collect profiling data, update refcounts - bool isHandlerPending() const { return barriers_.IsHandlerPending(); } - void* allocKernArg(size_t size, size_t alignment); bool isFenceDirty() const { return fence_dirty_; } void resetFenceDirty() { fence_dirty_ = false; } diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index d63ccf2663..c455acd58c 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -335,7 +335,6 @@ void Command::releaseResources() { } } -static constexpr uint32_t kMarkerTsCount = 1; // ================================================================================================ void Command::enqueue() { assert(queue_ != NULL && "Cannot be enqueued"); @@ -344,7 +343,8 @@ void Command::enqueue() { Agent::postEventCreate(as_cl(static_cast(this)), type_); } - ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p", getOclCommandKindString(this->type()), this); + ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p", + getOclCommandKindString(this->type()), this); // Direct dispatch logic below will submit the command immediately, but the command status // update will occur later after flush() with a wait @@ -362,21 +362,9 @@ void Command::enqueue() { ScopedLock sl(queue_->vdev()->execution()); queue_->FormSubmissionBatch(this); - bool isMarker = (type() == CL_COMMAND_MARKER || type() == 0); - if (isMarker) { + if (type() == CL_COMMAND_MARKER || type() == 0) { // The current HSA signal tracking logic requires profiling enabled for the markers EnableProfiling(); - } - - bool submitBatch = !profilingInfo().marker_ts_; - // Flush the batch if ther marker_ts have been continuously submitted until a threashold - // is reached. This helps recycling the commands and frees memory. - if (queue_->GetMarkerTsCount() >= kMarkerTsCount) { - submitBatch = true; - queue_->ResetMarkerTsCount(); - } - - if (isMarker && submitBatch) { // Update batch head for the current marker. Hence the status of all commands can be // updated upon the marker completion SetBatchHead(queue_->GetSubmittionBatch()); @@ -394,6 +382,7 @@ void Command::enqueue() { queue_->append(*this); queue_->flush(); } + if ((queue_->device().settings().waitCommand_ && (type_ != 0)) || ((commandWaitBits_ & 0x2) != 0)) { awaitCompletion(); diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index a20aa74bca..4dd294c821 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -40,8 +40,7 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti lastEnqueueCommand_(nullptr), head_(nullptr), tail_(nullptr), - isActive_(false), - markerTsCount_(0) { + isActive_(false) { if (GPU_FORCE_QUEUE_PROFILING) { properties().set(CL_QUEUE_PROFILING_ENABLE); } @@ -132,8 +131,7 @@ void HostQueue::finish(bool cpu_wait) { (command->NotifyEvent() != nullptr) ? command->NotifyEvent()->HwEvent() : command->HwEvent(); force_marker = (hw_event == nullptr); } - if (nullptr == command || force_marker || - vdev()->isHandlerPending() || vdev()->isFenceDirty()) { + if (nullptr == command || force_marker || vdev()->isFenceDirty()) { if (nullptr != command) { command->release(); } diff --git a/rocclr/platform/commandqueue.hpp b/rocclr/platform/commandqueue.hpp index 3f3d4f5e1b..c149f171f7 100644 --- a/rocclr/platform/commandqueue.hpp +++ b/rocclr/platform/commandqueue.hpp @@ -264,9 +264,6 @@ class HostQueue : public CommandQueue { // an invalid access command->retain(); - if (command->profilingInfo().marker_ts_) { - markerTsCount_++; - } // Release the last command in the batch if (lastEnqueueCommand_ != nullptr) { lastEnqueueCommand_->release(); @@ -287,23 +284,14 @@ class HostQueue : public CommandQueue { //! Get queue status bool GetQueueStatus() { return isActive_; } - //! Get markerTsCount - uint32_t GetMarkerTsCount() const { return markerTsCount_; } - - //! Reset counter - void ResetMarkerTsCount() { markerTsCount_ = 0; } - private: Command* head_; //!< Head of the batch list Command* tail_; //!< Tail of the batch list //! True if this command queue is active bool isActive_; - - uint32_t markerTsCount_; //!< Count of TS markers }; - class DeviceQueue : public CommandQueue { public: DeviceQueue(Context& context, //!< Context object