diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index 7dd5aa27f4..6ad49c43e4 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -373,7 +373,7 @@ void Command::enqueue() { EnableProfiling(); // Update batch head for the current marker. Hence the status of all commands can be // updated upon the marker completion - SetBatchHead(queue_->GetSubmittionBatch()); + SetBatchHead(queue_->GetSubmissionBatch()); submit(*queue_->vdev()); @@ -381,6 +381,7 @@ void Command::enqueue() { queue_->ResetSubmissionBatch(); } else { submit(*queue_->vdev()); + queue_->FlushSubmissionBatch(this); } } else { queue_->append(*this); diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 33ee53c8c8..49f3029c36 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -66,7 +66,7 @@ bool HostQueue::terminate() { Command* lastCommand = getLastQueuedCommand(true); if (lastCommand != nullptr) { // Check if CPU batch wasn't flushed for completion with the last command - if (GetSubmittionBatch() != nullptr) { + if (GetSubmissionBatch() != nullptr) { auto command = new Marker(*this, false); if (command != nullptr) { ClPrint(LOG_DEBUG, LOG_CMD, "Marker queued to ensure finish"); @@ -137,13 +137,13 @@ void HostQueue::finish(bool cpu_wait) { if (IS_HIP) { command = getLastQueuedCommand(true); if (command == nullptr) { - assert(GetSubmittionBatch() == nullptr && + assert(GetSubmissionBatch() == nullptr && "Can't claim the queue is finished with the active batch!"); return; } } // Force marker if the batch wasn't sent for CPU update or fence is dirty - if (nullptr == command || (GetSubmittionBatch() != nullptr) || vdev()->isFenceDirty()) { + if (nullptr == command || (GetSubmissionBatch() != nullptr) || vdev()->isFenceDirty()) { if (nullptr != command) { command->release(); } diff --git a/rocclr/platform/commandqueue.hpp b/rocclr/platform/commandqueue.hpp index 9500fc883b..1c2118fe17 100644 --- a/rocclr/platform/commandqueue.hpp +++ b/rocclr/platform/commandqueue.hpp @@ -246,7 +246,7 @@ class HostQueue : public CommandQueue { Command* getLastQueuedCommand(bool retain); //! Get the submitted batch - Command* GetSubmittionBatch() const { return head_; } + Command* GetSubmissionBatch() const { return head_; } //! Insert a command into the linked list of submitted commands void FormSubmissionBatch(Command* command) { @@ -257,6 +257,7 @@ class HostQueue : public CommandQueue { tail_->setNext(command); tail_ = command; } + size_++; command->setStatus(CL_SUBMITTED); command->retain(); // @note: runtime needs double retain in order to maintain the batch, @@ -278,8 +279,14 @@ class HostQueue : public CommandQueue { lastEnqueueCommand_ = command; } + //! Flushes submitted commands if the batch size significantly grew + void FlushSubmissionBatch(Command* command) { + if (size_ > DEBUG_CLR_MAX_BATCH_SIZE) { + command->notifyCmdQueue(); + } + } //! Reset the command batch list - void ResetSubmissionBatch() { head_ = nullptr; } + void ResetSubmissionBatch() { head_ = nullptr; size_ = 0; } //! Set queue status void SetQueueStatus() { isActive_ = true; } @@ -288,8 +295,9 @@ class HostQueue : public CommandQueue { bool GetQueueStatus() { return isActive_; } private: - Command* head_; //!< Head of the batch list - Command* tail_; //!< Tail of the batch list + Command* head_; //!< Head of the batch list + Command* tail_; //!< Tail of the batch list + size_t size_ = 0; //!< The current batch size //! True if this command queue is active bool isActive_; diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index 0f50070442..5d39181b8f 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -259,6 +259,8 @@ release(bool, HIP_ALWAYS_USE_NEW_COMGR_UNBUNDLING_ACTION, false, \ "Force to always use new comgr unbundling action") \ release(uint, DEBUG_HIP_BLOCK_SYNC, 50, \ "Blocks synchronization on CPU until the callback processing is done")\ +release(uint, DEBUG_CLR_MAX_BATCH_SIZE, 1000, \ + "Forces the callback to clean-up CPU submission queue") \ release(bool, DEBUG_HIP_KERNARG_COPY_OPT, true, \ "Enable/Disable multiple kern arg copies") \ release(bool, DEBUG_CLR_USE_STDMUTEX_IN_AMD_MONITOR, false, \