diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index 9f8751401a..f3e7943414 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -258,17 +258,21 @@ void Command::enqueue() { } ClPrint(LOG_DEBUG, LOG_CMD, "command is enqueued: %p", this); + + // Direct dispatch logic below will submit the command immediately, but the command status + // update will occur later after flush() with a wait if (AMD_DIRECT_DISPATCH) { + // The batch update must be lock protected to avoid a race condition + // when multiple threads submit/flush/update the batch at the same time + ScopedLock sl(queue_->lock()); + queue_->FormSubmissionBatch(this); if (type() == CL_COMMAND_MARKER || type() == 0) { - setStatus(CL_SUBMITTED); - queue_->vdev()->flush(); - retain(); - setStatus(CL_COMPLETE); + // Flush the current batch and wait for the results, since it's a marker. + // @todo: The condition requires a reevaluation to determine if any marker needs a wait. + queue_->vdev()->flush(queue_->GetSubmittionBatch()); + queue_->ResetSubmissionBatch(); } else { - setStatus(CL_SUBMITTED); submit(*queue_->vdev()); - retain(); - setStatus(CL_COMPLETE); } } else { queue_->append(*this); diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 6f8d44393b..8ff9a67414 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -37,9 +37,11 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti uint queueRTCUs, Priority priority, const std::vector& cuMask) : CommandQueue(context, device, properties, device.info().queueProperties_, queueRTCUs, priority, cuMask), - lastEnqueueCommand_(nullptr) { + lastEnqueueCommand_(nullptr), + head_(nullptr), + tail_(nullptr) { if (AMD_DIRECT_DISPATCH) { - // Initialize the queue + // Initialize the queue thread_.Init(this); } else { if (thread_.state() >= Thread::INITIALIZED) { @@ -222,13 +224,25 @@ bool HostQueue::isEmpty() { } Command* HostQueue::getLastQueuedCommand(bool retain) { - // Get last submitted command - ScopedLock l(lastCmdLock_); + if (AMD_DIRECT_DISPATCH) { + // The batch update must be lock protected to avoid a race condition + // when multiple threads submit/flush/update the batch at the same time + ScopedLock sl(lock()); - // Since the lastCmdLock_ is acquired, it is safe to read and retain the lastEnqueueCommand. It is - // guaranteed that the pointer will not change. - if (retain && lastEnqueueCommand_ != nullptr) { - lastEnqueueCommand_->retain(); + // Since the lastCmdLock_ is acquired, it is safe to read and retain the lastEnqueueCommand. + // It is guaranteed that the pointer will not change. + if (retain && lastEnqueueCommand_ != nullptr) { + lastEnqueueCommand_->retain(); + } + } else { + // Get last submitted command + ScopedLock l(lastCmdLock_); + + // Since the lastCmdLock_ is acquired, it is safe to read and retain the lastEnqueueCommand. + // It is guaranteed that the pointer will not change. + if (retain && lastEnqueueCommand_ != nullptr) { + lastEnqueueCommand_->retain(); + } } return lastEnqueueCommand_; diff --git a/rocclr/platform/commandqueue.hpp b/rocclr/platform/commandqueue.hpp index 588485b2fe..a0e013c27f 100644 --- a/rocclr/platform/commandqueue.hpp +++ b/rocclr/platform/commandqueue.hpp @@ -111,6 +111,9 @@ class CommandQueue : public RuntimeObject { //! Returns the CU mask array const std::vector& cuMask() const { return cuMask_; } + //! Returns the queue lock + Monitor& lock() { return queueLock_; } + protected: //! CommandQueue constructor is protected //! to keep the CommandQueue class as a virtual interface @@ -241,6 +244,43 @@ class HostQueue : public CommandQueue { //! Get last enqueued command Command* getLastQueuedCommand(bool retain); + + //! Get the submitted batch + Command* GetSubmittionBatch() const { return head_; } + + //! Insert a command into the linked list of submitted commands + void FormSubmissionBatch(Command* command) { + // Insert the command to the linked list. + if (nullptr == head_) { // if the list is empty + head_ = tail_ = command; + } else { + tail_->setNext(command); + tail_ = command; + } + command->setStatus(CL_SUBMITTED); + command->retain(); + // @note: runtime needs double retain in order to maintain the batch, + // because setStatus(COMPLETE) releases command and batch update may have + // an invalid access + command->retain(); + + // Release the last command in the batch + if (lastEnqueueCommand_ != nullptr) { + lastEnqueueCommand_->release(); + } + + // Extra retain for the last command + command->retain(); + + lastEnqueueCommand_ = command; + } + + //! Reset the command batch list + void ResetSubmissionBatch() { head_ = nullptr; } + +private: + Command* head_; //!< Head of the batch list + Command* tail_; //!< Tail of the batch list }; @@ -271,9 +311,6 @@ class DeviceQueue : public CommandQueue { //! Returns virtual device for this device queue device::VirtualDevice* vDev() const { return virtualDevice_; } - //! Returns the queue lock - Monitor& lock() { return queueLock_; } - private: uint size_; //!< Device queue size device::VirtualDevice* virtualDevice_; //!< Virtual device for this queue