diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 414046eb5b..363a96c6e6 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -87,7 +87,7 @@ VirtualGPU::MemoryDependency::validate( if (flushL1Cache) { // Flush cache - gpu.flushL1Cache(); + gpu.flushCUCaches(); // Clear memory dependency state const static bool All = true; @@ -1724,8 +1724,6 @@ VirtualGPU::submitKernelInternalHSA( if (hsaKernel.dynamicParallelism()) { // Make sure exculsive access to the device queue amd::ScopedLock(defQueue->lock()); - //! \todo Remove flush. We start parent earlier. - flushDMA(MainEngine); if (GPU_PRINT_CHILD_KERNEL != 0) { waitForEvent(&gpuEvent); @@ -1819,6 +1817,8 @@ VirtualGPU::submitKernelInternalHSA( *gpuDefQueue->virtualQueue_, *gpuDefQueue->schedParams_, gpuDefQueue->schedParamIdx_, gpuDefQueue->vqHeader_->aql_slot_num); + const static bool FlushL2 = true; + gpuDefQueue->flushCUCaches(FlushL2); // Get the address of PM4 template and add write it to params //! @note DMA flush must not occur between patch and the scheduler @@ -3290,7 +3290,7 @@ VirtualGPU::processMemObjectsHSA( *reinterpret_cast(params + desc.offset_)); if (!svmMem) { //!\todo Do we have to sync cache coherency or wait for SDMA? - flushL1Cache(); + flushCUCaches(); break; } } diff --git a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp index 540aee8329..46c078df59 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp @@ -464,9 +464,9 @@ CALGSLContext::flushIOCaches() const } void -CALGSLContext::flushL1Cache() const +CALGSLContext::flushCUCaches(bool flushL2) const { - m_cs->FlushL1Cache(); + m_cs->FlushCUCaches(flushL2); } gslProgramObject diff --git a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.h b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.h index d7fd3c565b..849101423e 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.h +++ b/projects/clr/rocclr/runtime/device/gpu/gslbe/src/rt/GSLContext.h @@ -51,7 +51,7 @@ public: bool isDone(GpuEvent* event); void waitForEvent(GpuEvent* event); void flushIOCaches() const; - void flushL1Cache() const; + void flushCUCaches(bool flushL2 = false) const; void eventBegin(EngineType engId) { m_eventQueue[engId].begin();