diff --git a/rocclr/runtime/device/gpu/gpuvirtual.cpp b/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 20564e3cdb..7713f294c9 100644
--- a/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -2782,12 +2782,6 @@ VirtualGPU::awaitCompletion(CommandBatch* cb, const amd::Event* waitingEvent)
 void
 VirtualGPU::flush(amd::Command* list, bool wait)
 {
-    //! @note: Even flush() requires a lock, because GSL can
-    //! defer destruction of internal memory objects and releases them
-    //! on GSL flush. If runtime calls another GSL flush at the same time,
-    //! then double release can occur.
-    amd::ScopedLock lock(execution());
-
     CommandBatch* cb = NULL;
     bool    gpuCommand = false;
 
@@ -2807,13 +2801,20 @@ VirtualGPU::flush(amd::Command* list, bool wait)
         cb = new CommandBatch(list, cal()->events_, cal()->lastTS_);
     }
 
-    for (uint i = 0; i < AllEngines; ++i) {
-        flushDMA(i);
-        // Reset event so we won't try to wait again,
-        // if runtime didn't submit any commands
-        //! @note: it's safe to invalidate events, since
-        //! we already saved them with the batch creation step above
-        cal_.events_[i].invalidate();
+    {
+        //! @note: flushDMA() requires a lock, because GSL can
+        //! defer destruction of internal memory objects and releases them
+        //! on GSL flush. If runtime calls another GSL flush at the same time,
+        //! then double release can occur.
+        amd::ScopedLock lock(execution());
+        for (uint i = 0; i < AllEngines; ++i) {
+            flushDMA(i);
+            // Reset event so we won't try to wait again,
+            // if runtime didn't submit any commands
+            //! @note: it's safe to invalidate events, since
+            //! we already saved them with the batch creation step above
+            cal_.events_[i].invalidate();
+        }
     }
 
     // Mark last TS as NULL, so runtime won't process empty batches with the old TS