From 11d44f103c52a96d3fd32cdcd2ef07d5457bd01f Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 27 Mar 2018 15:17:14 -0400
Subject: [PATCH] P4 to Git Change 1533139 by todli@todli-win-opencl-kv1 on
2018/03/27 15:01:15
SWDEV-147368 - Choose proper engine for markerWrite to SDI ExternalPhysical Memory according to last copy engine
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#80 edit
[ROCm/clr commit: 0f6d9fea875fcf2772a0baba070dedfa4b2cc057]
---
.../rocclr/runtime/device/pal/palvirtual.cpp | 33 ++++++++++++-------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index fd4e3fa67b..dbfa84e012 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -2542,29 +2542,40 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) {
pal::Memory* pGpuMemory = dev().getGpuMemory(&vcmd.memory());
GpuEvent gpuEvent;
- eventBegin(MainEngine);
-
uint32_t value = vcmd.markerValue();
- addVmMemory(pGpuMemory);
if (vcmd.type() == CL_COMMAND_WAIT_SIGNAL_AMD) {
+ eventBegin(MainEngine);
+ addVmMemory(pGpuMemory);
+
iCmd()->CmdWaitBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value, 0xFFFFFFFF,
Pal::CompareFunc::GreaterEqual);
+ eventEnd(MainEngine, gpuEvent);
+
} else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
+
+ EngineType activeEngineID = engineID_;
+ engineID_ = static_cast(pGpuMemory->getGpuEvent(*this)->engineId_);
+
// Make sure GPU finished operation and data reached memory before the marker write
static constexpr bool FlushL2 = true;
addBarrier(FlushL2);
- // \todo: Implement the right changes in PAL
- // Workarounds: for CP overfetch issues and the lack of SDMA sync
+ // Workarounds: We had systems where an extra delay was necessary.
{
- // Flush CB associated with the DGMA buffer
- isDone(pGpuMemory->getGpuEvent(*this));
- // Make sure SDMA is done on the DGMA buffer
- pGpuMemory->wait(*this, true);
+ // Flush CB associated with the DGMA buffer
+ isDone(pGpuMemory->getGpuEvent(*this));
}
- iCmd()->CmdUpdateBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value);
+
+ eventBegin(engineID_);
+ queues_[engineID_]->addCmdMemRef(pGpuMemory->memRef());
+
+ queues_[engineID_]->iCmd()->
+ CmdUpdateBusAddressableMemoryMarker(*(pGpuMemory->iMem()), value);
+ eventEnd(engineID_, gpuEvent);
+
+ // Restore the original engine
+ engineID_ = activeEngineID;
}
- eventEnd(MainEngine, gpuEvent);
// Update the global GPU event
setGpuEvent(gpuEvent);