diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp index 4c42eedf41..36abf8f47a 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.cpp +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -66,6 +66,7 @@ std::unordered_set GraphExec::graphExecSet_; amd::Monitor GraphExec::graphExecSetLock_{"Guards global exec graph set"}; std::unordered_set UserObject::ObjectSet_; amd::Monitor UserObject::UserObjectLock_{"Guards global user object"}; +amd::Monitor GraphNode::WorkerThreadLock_{"Guards mem map add/remove against work thread"}; hipError_t GraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind) { diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp index c06a376286..4e0cce378c 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.hpp +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -221,6 +221,7 @@ struct GraphNode : public hipGraphNodeDOTAttribute { struct Graph* parentGraph_; static std::unordered_set nodeSet_; static amd::Monitor nodeSetLock_; + static amd::Monitor WorkerThreadLock_; unsigned int isEnabled_; bool signal_is_required_ = false; //!< This node requires a signal on the command std::vector gpuPackets_; //!< GPU Packet to enqueue during graph launch @@ -1544,7 +1545,13 @@ class GraphMemcpyNode1D : public GraphMemcpyNode { } commands_.reserve(1); amd::Command* command = nullptr; + if (!AMD_DIRECT_DISPATCH) { + WorkerThreadLock_.lock(); + } status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream); + if (!AMD_DIRECT_DISPATCH) { + WorkerThreadLock_.unlock(); + } commands_.emplace_back(command); return status; } @@ -2275,6 +2282,9 @@ class GraphMemAllocNode final : public GraphNode { virtual void submit(device::VirtualDevice& device) final { // Remove VA reference from the global mapping. Runtime has to keep a dummy reference for // validation logic during the capture or creation of the nodes + if (!AMD_DIRECT_DISPATCH) { + WorkerThreadLock_.lock(); + } if (amd::MemObjMap::FindMemObj(va_->getSvmPtr())) { amd::MemObjMap::RemoveMemObj(va_->getSvmPtr()); } @@ -2284,6 +2294,9 @@ class GraphMemAllocNode final : public GraphNode { auto dptr = graph_->AllocateMemory(aligned_size, static_cast(queue()), nullptr); if (dptr == nullptr) { setStatus(CL_INVALID_OPERATION); + if (!AMD_DIRECT_DISPATCH) { + WorkerThreadLock_.unlock(); + } return; } size_t offset = 0; @@ -2294,6 +2307,9 @@ class GraphMemAllocNode final : public GraphNode { size_ = aligned_size; // Execute the original mapping command VirtualMapCommand::submit(device); + if (!AMD_DIRECT_DISPATCH) { + WorkerThreadLock_.unlock(); + } amd::Memory* vaddr_sub_obj = amd::MemObjMap::FindMemObj(va_->getSvmPtr()); assert(vaddr_sub_obj != nullptr); queue()->device().SetMemAccess(vaddr_sub_obj->getSvmPtr(), aligned_size,