From af4960039522f322c792162a55db7d8bf5edfb4a Mon Sep 17 00:00:00 2001 From: foreman Date: Thu, 1 Mar 2018 18:37:54 -0500 Subject: [PATCH] P4 to Git Change 1521567 by gandryey@gera-w8 on 2018/03/01 18:29:47 SWDEV-145716 - dGMA via OpenCL not working properly on AMD EPYC Ethanol platform with two WX9100/W9100s - Add a stall and cache flushes before a barrier write in GSL and PAL - Place temporary workarounds for CP prefetch of WRITE_DATA and the lack of SDMA marker in PAL Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#414 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#76 edit [ROCm/clr commit: 30eacbe2435e811c67e436c983267ddc679b05f9] --- projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp | 2 ++ projects/clr/rocclr/runtime/device/pal/palvirtual.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index cec860d7d2..0c0bf44400 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -2380,6 +2380,8 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) { uint64_t markerOffset = markerAddr - surfAddr; cs()->p2pMarkerOp(gpuMemory->gslResource(), vcmd.markerValue(), markerOffset, false); } else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) { + static constexpr bool FlushL2 = true; + flushCUCaches(FlushL2); cs()->p2pMarkerOp(gpuMemory->gslResource(), vcmd.markerValue(), vcmd.markerOffset(), true); } eventEnd(MainEngine, gpuEvent); diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index f80f730394..f7564b328f 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -2537,6 +2537,17 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) { iCmd()->CmdWaitBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value, 0xFFFFFFFF, Pal::CompareFunc::GreaterEqual); } else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) { + // Make sure GPU finished operation and data reached memory before the marker write + static constexpr bool FlushL2 = true; + addBarrier(FlushL2); + // \todo: Implement the right changes in PAL + // Workarounds: for CP overfetch issues and the lack of SDMA sync + { + // Flush CB associated with the DGMA buffer + isDone(pGpuMemory->getGpuEvent(*this)); + // Make sure SDMA is done on the DGMA buffer + pGpuMemory->wait(*this, true); + } iCmd()->CmdUpdateBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value); } eventEnd(MainEngine, gpuEvent);