diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
index 036a75a995..84858beb3a 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -365,6 +365,7 @@ Device::Device()
     , lockAsyncOps_(NULL)
     , lockAsyncOpsForInitHeap_(NULL)
     , vgpusAccess_(NULL)
+    , scratchAlloc_(NULL)
     , xferRead_(NULL)
     , xferWrite_(NULL)
     , vaCacheAccess_(NULL)
@@ -425,6 +426,7 @@ Device::~Device()
     delete lockAsyncOps_;
     delete lockAsyncOpsForInitHeap_;
     delete vgpusAccess_;
+    delete scratchAlloc_;
     delete vaCacheAccess_;
     delete vaCacheList_;
 
@@ -793,6 +795,12 @@ Device::create(CALuint ordinal)
     if (NULL == vgpusAccess_) {
         return false;
     }
+    
+    scratchAlloc_ = new amd::Monitor("Scratch Allocation Lock", true);
+    if (NULL == scratchAlloc_) {
+        return false;
+    }
+
     vaCacheAccess_ = new amd::Monitor("VA Cache Ops Lock", true);
     if (NULL == vaCacheAccess_) {
         return false;
@@ -2283,7 +2291,7 @@ Device::allocScratch(uint regNum, const VirtualGPU* vgpu)
 {
     if (regNum > 0) {
         // Serialize the scratch buffer allocation code
-        amd::ScopedLock lk(*lockAsyncOps_);
+        amd::ScopedLock lk(*scratchAlloc_);
         uint    sb = vgpu->hwRing();
 
         // Check if the current buffer isn't big enough
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
index ffcc5f056a..c5657e4cb2 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -609,6 +609,7 @@ private:
     amd::Monitor*   lockAsyncOps_;  //!< Lock to serialise all async ops on this device
     amd::Monitor*   lockAsyncOpsForInitHeap_;  //!< Lock to serialise all async ops on initialization heap operation
     amd::Monitor*   vgpusAccess_;   //!< Lock to serialise virtual gpu list access
+    amd::Monitor*   scratchAlloc_;  //!< Lock to serialise scratch allocation
 
     XferBuffers*    xferRead_;      //!< Transfer buffers read
     XferBuffers*    xferWrite_;     //!< Transfer buffers write
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index b436dcaf16..20564e3cdb 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -2782,6 +2782,12 @@ VirtualGPU::awaitCompletion(CommandBatch* cb, const amd::Event* waitingEvent)
 void
 VirtualGPU::flush(amd::Command* list, bool wait)
 {
+    //! @note: Even flush() requires a lock, because GSL can
+    //! defer destruction of internal memory objects and releases them
+    //! on GSL flush. If runtime calls another GSL flush at the same time,
+    //! then double release can occur.
+    amd::ScopedLock lock(execution());
+
     CommandBatch* cb = NULL;
     bool    gpuCommand = false;
 
@@ -2805,8 +2811,8 @@ VirtualGPU::flush(amd::Command* list, bool wait)
         flushDMA(i);
         // Reset event so we won't try to wait again,
         // if runtime didn't submit any commands
-        // @note: it's safe to invalidate events, since
-        // we already saved them with the batch creation step above
+        //! @note: it's safe to invalidate events, since
+        //! we already saved them with the batch creation step above
         cal_.events_[i].invalidate();
     }