SWDEV-474937 - Fix race condition between main and work thread on windows.

Change-Id: I4d6b9de41d0e5a39094eb3babe47dffde72e0587


[ROCm/clr commit: 912de7ab44]
This commit is contained in:
Jaydeep Patel
2024-07-24 15:50:02 +00:00
committad av Jaydeepkumar Patel
förälder a381538161
incheckning c1f83df84c
2 ändrade filer med 17 tillägg och 0 borttagningar
@@ -66,6 +66,7 @@ std::unordered_set<GraphExec*> GraphExec::graphExecSet_;
amd::Monitor GraphExec::graphExecSetLock_{"Guards global exec graph set"};
std::unordered_set<UserObject*> UserObject::ObjectSet_;
amd::Monitor UserObject::UserObjectLock_{"Guards global user object"};
amd::Monitor GraphNode::WorkerThreadLock_{"Guards mem map add/remove against work thread"};
hipError_t GraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size_t count,
hipMemcpyKind kind) {
@@ -221,6 +221,7 @@ struct GraphNode : public hipGraphNodeDOTAttribute {
struct Graph* parentGraph_;
static std::unordered_set<GraphNode*> nodeSet_;
static amd::Monitor nodeSetLock_;
static amd::Monitor WorkerThreadLock_;
unsigned int isEnabled_;
bool signal_is_required_ = false; //!< This node requires a signal on the command
std::vector<uint8_t *> gpuPackets_; //!< GPU Packet to enqueue during graph launch
@@ -1544,7 +1545,13 @@ class GraphMemcpyNode1D : public GraphMemcpyNode {
}
commands_.reserve(1);
amd::Command* command = nullptr;
if (!AMD_DIRECT_DISPATCH) {
WorkerThreadLock_.lock();
}
status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream);
if (!AMD_DIRECT_DISPATCH) {
WorkerThreadLock_.unlock();
}
commands_.emplace_back(command);
return status;
}
@@ -2275,6 +2282,9 @@ class GraphMemAllocNode final : public GraphNode {
virtual void submit(device::VirtualDevice& device) final {
// Remove VA reference from the global mapping. Runtime has to keep a dummy reference for
// validation logic during the capture or creation of the nodes
if (!AMD_DIRECT_DISPATCH) {
WorkerThreadLock_.lock();
}
if (amd::MemObjMap::FindMemObj(va_->getSvmPtr())) {
amd::MemObjMap::RemoveMemObj(va_->getSvmPtr());
}
@@ -2284,6 +2294,9 @@ class GraphMemAllocNode final : public GraphNode {
auto dptr = graph_->AllocateMemory(aligned_size, static_cast<hip::Stream*>(queue()), nullptr);
if (dptr == nullptr) {
setStatus(CL_INVALID_OPERATION);
if (!AMD_DIRECT_DISPATCH) {
WorkerThreadLock_.unlock();
}
return;
}
size_t offset = 0;
@@ -2294,6 +2307,9 @@ class GraphMemAllocNode final : public GraphNode {
size_ = aligned_size;
// Execute the original mapping command
VirtualMapCommand::submit(device);
if (!AMD_DIRECT_DISPATCH) {
WorkerThreadLock_.unlock();
}
amd::Memory* vaddr_sub_obj = amd::MemObjMap::FindMemObj(va_->getSvmPtr());
assert(vaddr_sub_obj != nullptr);
queue()->device().SetMemAccess(vaddr_sub_obj->getSvmPtr(), aligned_size,