From af4960039522f322c792162a55db7d8bf5edfb4a Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 1 Mar 2018 18:37:54 -0500
Subject: [PATCH] P4 to Git Change 1521567 by gandryey@gera-w8 on 2018/03/01
18:29:47
SWDEV-145716 - dGMA via OpenCL not working properly on AMD EPYC Ethanol platform with two WX9100/W9100s
- Add a stall and cache flushes before a barrier write in GSL and PAL
- Place temporary workarounds for CP prefetch of WRITE_DATA and the lack of SDMA marker in PAL
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#414 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#76 edit
[ROCm/clr commit: 30eacbe2435e811c67e436c983267ddc679b05f9]
---
projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp | 2 ++
projects/clr/rocclr/runtime/device/pal/palvirtual.cpp | 11 +++++++++++
2 files changed, 13 insertions(+)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index cec860d7d2..0c0bf44400 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -2380,6 +2380,8 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) {
uint64_t markerOffset = markerAddr - surfAddr;
cs()->p2pMarkerOp(gpuMemory->gslResource(), vcmd.markerValue(), markerOffset, false);
} else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
+ static constexpr bool FlushL2 = true;
+ flushCUCaches(FlushL2);
cs()->p2pMarkerOp(gpuMemory->gslResource(), vcmd.markerValue(), vcmd.markerOffset(), true);
}
eventEnd(MainEngine, gpuEvent);
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index f80f730394..f7564b328f 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -2537,6 +2537,17 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) {
iCmd()->CmdWaitBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value, 0xFFFFFFFF,
Pal::CompareFunc::GreaterEqual);
} else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
+ // Make sure GPU finished operation and data reached memory before the marker write
+ static constexpr bool FlushL2 = true;
+ addBarrier(FlushL2);
+ // \todo: Implement the right changes in PAL
+ // Workarounds: for CP overfetch issues and the lack of SDMA sync
+ {
+ // Flush CB associated with the DGMA buffer
+ isDone(pGpuMemory->getGpuEvent(*this));
+ // Make sure SDMA is done on the DGMA buffer
+ pGpuMemory->wait(*this, true);
+ }
iCmd()->CmdUpdateBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value);
}
eventEnd(MainEngine, gpuEvent);