diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp index 036a75a995..84858beb3a 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp @@ -365,6 +365,7 @@ Device::Device() , lockAsyncOps_(NULL) , lockAsyncOpsForInitHeap_(NULL) , vgpusAccess_(NULL) + , scratchAlloc_(NULL) , xferRead_(NULL) , xferWrite_(NULL) , vaCacheAccess_(NULL) @@ -425,6 +426,7 @@ Device::~Device() delete lockAsyncOps_; delete lockAsyncOpsForInitHeap_; delete vgpusAccess_; + delete scratchAlloc_; delete vaCacheAccess_; delete vaCacheList_; @@ -793,6 +795,12 @@ Device::create(CALuint ordinal) if (NULL == vgpusAccess_) { return false; } + + scratchAlloc_ = new amd::Monitor("Scratch Allocation Lock", true); + if (NULL == scratchAlloc_) { + return false; + } + vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true); if (NULL == vaCacheAccess_) { return false; @@ -2283,7 +2291,7 @@ Device::allocScratch(uint regNum, const VirtualGPU* vgpu) { if (regNum > 0) { // Serialize the scratch buffer allocation code - amd::ScopedLock lk(*lockAsyncOps_); + amd::ScopedLock lk(*scratchAlloc_); uint sb = vgpu->hwRing(); // Check if the current buffer isn't big enough diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp index ffcc5f056a..c5657e4cb2 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp @@ -609,6 +609,7 @@ private: amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device amd::Monitor* lockAsyncOpsForInitHeap_; //!< Lock to serialise all async ops on initialization heap operation amd::Monitor* vgpusAccess_; //!< Lock to serialise virtual gpu list access + amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation XferBuffers* xferRead_; //!< Transfer buffers read XferBuffers* xferWrite_; //!< Transfer buffers write diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index b436dcaf16..20564e3cdb 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -2782,6 +2782,12 @@ VirtualGPU::awaitCompletion(CommandBatch* cb, const amd::Event* waitingEvent) void VirtualGPU::flush(amd::Command* list, bool wait) { + //! @note: Even flush() requires a lock, because GSL can + //! defer destruction of internal memory objects and releases them + //! on GSL flush. If runtime calls another GSL flush at the same time, + //! then double release can occur. + amd::ScopedLock lock(execution()); + CommandBatch* cb = NULL; bool gpuCommand = false; @@ -2805,8 +2811,8 @@ VirtualGPU::flush(amd::Command* list, bool wait) flushDMA(i); // Reset event so we won't try to wait again, // if runtime didn't submit any commands - // @note: it's safe to invalidate events, since - // we already saved them with the batch creation step above + //! @note: it's safe to invalidate events, since + //! we already saved them with the batch creation step above cal_.events_[i].invalidate(); }