From fcf63c43c8ab213e7a6385118bd6e00a147100d2 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 2 Feb 2017 18:23:12 -0500
Subject: [PATCH] P4 to Git Change 1368703 by todli@todli-win-opencl-kv1 on
2017/02/02 18:12:31
SWDEV-96241 - Support SDI on PAL (runtime changes)
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#42 edit
[ROCm/clr commit: 1eda21898892da65786c260991678fe7737133a8]
---
.../rocclr/runtime/device/pal/palmemory.cpp | 1 -
.../rocclr/runtime/device/pal/palresource.cpp | 16 +++-
.../rocclr/runtime/device/pal/palsettings.cpp | 7 +-
.../rocclr/runtime/device/pal/palvirtual.cpp | 78 ++++++++-----------
4 files changed, 49 insertions(+), 53 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palmemory.cpp b/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
index 8e1ab0a7ff..eb87409aef 100644
--- a/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
@@ -168,7 +168,6 @@ Memory::create(
if (result) {
switch (memoryType()) {
case Resource::Pinned:
- case Resource::ExternalPhysical:
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
break;
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
index b6557bf473..85663e5689 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
@@ -399,9 +399,9 @@ Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo)
createInfo->heaps[0] = Pal::GpuHeapGartCacheable;
desc_.cardMemory_ = false;
break;
- case Shader:
- case BusAddressable:
case ExternalPhysical:
+ desc_.cardMemory_ = false;
+ case Shader:
// Fall through to process the memory allocation ...
case Local:
createInfo->heapCount = 2;
@@ -1075,6 +1075,18 @@ Resource::create(MemoryType memType, CreateParams* params)
createInfo.alignment = MaxGpuAlignment;
createInfo.vaRange = Pal::VaRange::Default;
createInfo.priority = Pal::GpuMemPriority::Normal;
+
+ if (memoryType() == ExternalPhysical){
+ cl_bus_address_amd bus_address =
+ (reinterpret_cast(params->owner_))->busAddress();
+ createInfo.surfaceBusAddr = bus_address.surface_bus_address;
+ createInfo.markerBusAddr = bus_address.marker_bus_address;
+ createInfo.flags.sdiExternal = true;
+ }
+ else if (memoryType() == BusAddressable){
+ createInfo.flags.busAddressable = true;
+ }
+
memTypeToHeap(&createInfo);
// createInfo.priority;
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
index 284e14706d..54468881ee 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
@@ -353,13 +353,12 @@ Settings::create(
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
-//! @todo
-/*
- if (calAttr.totalSDIHeap > 0) {
+ if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) {
//Enable bus addressable memory extension
enableExtension(ClAMDBusAddressableMemory);
}
-
+//! @todo
+/*
if (calAttr.longIdleDetect) {
// KMD is unable to detect if we map the visible memory for CPU access, so
// accessing persistent staged buffer may fail if LongIdleDetct is enabled.
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 54bee9dbfa..caac438bcb 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -1264,6 +1264,7 @@ VirtualGPU::copyMemory(cl_command_type type
// Check if HW can be used for memory copy
switch (type) {
+ case CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD:
case CL_COMMAND_SVM_MEMCPY:
case CL_COMMAND_COPY_BUFFER: {
amd::Coord3D realSrcOrigin(srcOrigin[0]);
@@ -2731,28 +2732,28 @@ VirtualGPU::submitSignal(amd::SignalCommand & vcmd)
{
amd::ScopedLock lock(execution());
profilingBegin(vcmd);
- pal::Memory* gpuMemory = dev().getGpuMemory(&vcmd.memory());
- Unimplemented();
-/*
+ pal::Memory* pGpuMemory = dev().getGpuMemory(&vcmd.memory());
+
+ GpuEvent gpuEvent;
+ eventBegin(MainEngine);
+
+ uint32_t value = vcmd.markerValue();
+ uint32_t size = vcmd.memory().getSize();
+
+ addVmMemory(pGpuMemory);
+
if (vcmd.type() == CL_COMMAND_WAIT_SIGNAL_AMD) {
- uint64_t surfAddr = gpuMemory->iMem()->getPhysicalAddress(cs());
- uint64_t markerAddr = gpuMemory->iMem()->getMarkerAddress(cs());
- uint64_t markerOffset = markerAddr - surfAddr;
- cs()->p2pMarkerOp(gpuMemory->iMem(), vcmd.markerValue(),
- markerOffset, false);
+ iCmd()->CmdWaitMemoryValue(*(pGpuMemory->iMem()), size, value, 0xFFFFFFFF, Pal::CompareFunc::GreaterEqual);
}
else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
- GpuEvent gpuEvent;
- eventBegin(MainEngine);
- cs()->p2pMarkerOp(gpuMemory->iMem(), vcmd.markerValue(), vcmd.markerOffset(), true);
- //! @todo We don't need flush if an event is tracked.
- cs()->Flush();
- eventEnd(MainEngine, gpuEvent);
- gpuMemory->setBusy(*this, gpuEvent);
- // Update the global GPU event
- setGpuEvent(gpuEvent);
+ iCmd()->CmdUpdateMemory(*(pGpuMemory->iMem()), size, 4, &value);
}
-*/
+
+ eventEnd(MainEngine, gpuEvent);
+ pGpuMemory->setBusy(*this, gpuEvent);
+ // Update the global GPU event
+ setGpuEvent(gpuEvent);
+
profilingEnd(vcmd);
}
@@ -2761,39 +2762,24 @@ VirtualGPU::submitMakeBuffersResident(amd::MakeBuffersResidentCommand & vcmd)
{
amd::ScopedLock lock(execution());
profilingBegin(vcmd);
+
std::vector memObjects = vcmd.memObjects();
- cl_uint numObjects = memObjects.size();
- Pal::IGpuMemory** pGpuMemObjects = new Pal::IGpuMemory*[numObjects];
+ uint32_t numObjects = memObjects.size();
- for(cl_uint i = 0; i < numObjects; ++i)
+ for (int i = 0; i < numObjects; i++)
{
- pal::Memory* gpuMemory = dev().getGpuMemory(memObjects[i]);
- pGpuMemObjects[i] = gpuMemory->iMem();
- gpuMemory->syncCacheFromHost(*this);
- }
+ // dummy render into the SDI surfaces so that KMD will be able to provide the bus addresses
+ uint dummy = 0;
+ static_cast(dev().xferMgr())
+ .writeRawData(*(dev().getGpuMemory(memObjects[i])), sizeof(dummy), &dummy);
- uint64_t* surfBusAddr = new uint64_t[numObjects];
- uint64_t* markerBusAddr = new uint64_t[numObjects];
- Unimplemented();
-/*
- gslErrorCode res = cs()->makeBuffersResident(numObjects, pGpuMemObjects,
- surfBusAddr, markerBusAddr);
- if(res != GSL_NO_ERROR) {
- LogError("MakeBuffersResident failed");
- vcmd.setStatus(CL_INVALID_OPERATION);
+ pal::Memory* pGpuMemory = dev().getGpuMemory(memObjects[i]);
+
+ pGpuMemory->syncCacheFromHost(*this);
+
+ vcmd.busAddress()[i].surface_bus_address = pGpuMemory->iMem()->Desc().surfaceBusAddr;
+ vcmd.busAddress()[i].marker_bus_address = pGpuMemory->iMem()->Desc().markerBusAddr;
}
- else {
- cl_bus_address_amd* busAddr = vcmd.busAddress();
- for(cl_uint i = 0; i < numObjects; ++i)
- {
- busAddr[i].surface_bus_address = surfBusAddr[i];
- busAddr[i].marker_bus_address = markerBusAddr[i];
- }
- }
-*/
- delete[] pGpuMemObjects;
- delete[] surfBusAddr;
- delete[] markerBusAddr;
profilingEnd(vcmd);
}