P4 to Git Change 1368703 by todli@todli-win-opencl-kv1 on 2017/02/02 18:12:31

SWDEV-96241 - Support SDI on PAL (runtime changes)

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#42 edit


[ROCm/clr commit: 1eda218988]
Этот коммит содержится в:
foreman
2017-02-02 18:23:12 -05:00
родитель 3db14ee4cb
Коммит fcf63c43c8
4 изменённых файлов: 49 добавлений и 53 удалений
-1
Просмотреть файл
@@ -168,7 +168,6 @@ Memory::create(
if (result) {
switch (memoryType()) {
case Resource::Pinned:
case Resource::ExternalPhysical:
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
break;
+14 -2
Просмотреть файл
@@ -399,9 +399,9 @@ Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo)
createInfo->heaps[0] = Pal::GpuHeapGartCacheable;
desc_.cardMemory_ = false;
break;
case Shader:
case BusAddressable:
case ExternalPhysical:
desc_.cardMemory_ = false;
case Shader:
// Fall through to process the memory allocation ...
case Local:
createInfo->heapCount = 2;
@@ -1075,6 +1075,18 @@ Resource::create(MemoryType memType, CreateParams* params)
createInfo.alignment = MaxGpuAlignment;
createInfo.vaRange = Pal::VaRange::Default;
createInfo.priority = Pal::GpuMemPriority::Normal;
if (memoryType() == ExternalPhysical){
cl_bus_address_amd bus_address =
(reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
createInfo.surfaceBusAddr = bus_address.surface_bus_address;
createInfo.markerBusAddr = bus_address.marker_bus_address;
createInfo.flags.sdiExternal = true;
}
else if (memoryType() == BusAddressable){
createInfo.flags.busAddressable = true;
}
memTypeToHeap(&createInfo);
// createInfo.priority;
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
+3 -4
Просмотреть файл
@@ -353,13 +353,12 @@ Settings::create(
}
#endif // !defined(WITH_LIGHTNING_COMPILER)
//! @todo
/*
if (calAttr.totalSDIHeap > 0) {
if (palProp.gpuMemoryProperties.busAddressableMemSize > 0) {
//Enable bus addressable memory extension
enableExtension(ClAMDBusAddressableMemory);
}
//! @todo
/*
if (calAttr.longIdleDetect) {
// KMD is unable to detect if we map the visible memory for CPU access, so
// accessing persistent staged buffer may fail if LongIdleDetct is enabled.
+32 -46
Просмотреть файл
@@ -1264,6 +1264,7 @@ VirtualGPU::copyMemory(cl_command_type type
// Check if HW can be used for memory copy
switch (type) {
case CL_COMMAND_MAKE_BUFFERS_RESIDENT_AMD:
case CL_COMMAND_SVM_MEMCPY:
case CL_COMMAND_COPY_BUFFER: {
amd::Coord3D realSrcOrigin(srcOrigin[0]);
@@ -2731,28 +2732,28 @@ VirtualGPU::submitSignal(amd::SignalCommand & vcmd)
{
amd::ScopedLock lock(execution());
profilingBegin(vcmd);
pal::Memory* gpuMemory = dev().getGpuMemory(&vcmd.memory());
Unimplemented();
/*
pal::Memory* pGpuMemory = dev().getGpuMemory(&vcmd.memory());
GpuEvent gpuEvent;
eventBegin(MainEngine);
uint32_t value = vcmd.markerValue();
uint32_t size = vcmd.memory().getSize();
addVmMemory(pGpuMemory);
if (vcmd.type() == CL_COMMAND_WAIT_SIGNAL_AMD) {
uint64_t surfAddr = gpuMemory->iMem()->getPhysicalAddress(cs());
uint64_t markerAddr = gpuMemory->iMem()->getMarkerAddress(cs());
uint64_t markerOffset = markerAddr - surfAddr;
cs()->p2pMarkerOp(gpuMemory->iMem(), vcmd.markerValue(),
markerOffset, false);
iCmd()->CmdWaitMemoryValue(*(pGpuMemory->iMem()), size, value, 0xFFFFFFFF, Pal::CompareFunc::GreaterEqual);
}
else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
GpuEvent gpuEvent;
eventBegin(MainEngine);
cs()->p2pMarkerOp(gpuMemory->iMem(), vcmd.markerValue(), vcmd.markerOffset(), true);
//! @todo We don't need flush if an event is tracked.
cs()->Flush();
eventEnd(MainEngine, gpuEvent);
gpuMemory->setBusy(*this, gpuEvent);
// Update the global GPU event
setGpuEvent(gpuEvent);
iCmd()->CmdUpdateMemory(*(pGpuMemory->iMem()), size, 4, &value);
}
*/
eventEnd(MainEngine, gpuEvent);
pGpuMemory->setBusy(*this, gpuEvent);
// Update the global GPU event
setGpuEvent(gpuEvent);
profilingEnd(vcmd);
}
@@ -2761,39 +2762,24 @@ VirtualGPU::submitMakeBuffersResident(amd::MakeBuffersResidentCommand & vcmd)
{
amd::ScopedLock lock(execution());
profilingBegin(vcmd);
std::vector<amd::Memory*> memObjects = vcmd.memObjects();
cl_uint numObjects = memObjects.size();
Pal::IGpuMemory** pGpuMemObjects = new Pal::IGpuMemory*[numObjects];
uint32_t numObjects = memObjects.size();
for(cl_uint i = 0; i < numObjects; ++i)
for (int i = 0; i < numObjects; i++)
{
pal::Memory* gpuMemory = dev().getGpuMemory(memObjects[i]);
pGpuMemObjects[i] = gpuMemory->iMem();
gpuMemory->syncCacheFromHost(*this);
}
// dummy render into the SDI surfaces so that KMD will be able to provide the bus addresses
uint dummy = 0;
static_cast<const KernelBlitManager&>(dev().xferMgr())
.writeRawData(*(dev().getGpuMemory(memObjects[i])), sizeof(dummy), &dummy);
uint64_t* surfBusAddr = new uint64_t[numObjects];
uint64_t* markerBusAddr = new uint64_t[numObjects];
Unimplemented();
/*
gslErrorCode res = cs()->makeBuffersResident(numObjects, pGpuMemObjects,
surfBusAddr, markerBusAddr);
if(res != GSL_NO_ERROR) {
LogError("MakeBuffersResident failed");
vcmd.setStatus(CL_INVALID_OPERATION);
pal::Memory* pGpuMemory = dev().getGpuMemory(memObjects[i]);
pGpuMemory->syncCacheFromHost(*this);
vcmd.busAddress()[i].surface_bus_address = pGpuMemory->iMem()->Desc().surfaceBusAddr;
vcmd.busAddress()[i].marker_bus_address = pGpuMemory->iMem()->Desc().markerBusAddr;
}
else {
cl_bus_address_amd* busAddr = vcmd.busAddress();
for(cl_uint i = 0; i < numObjects; ++i)
{
busAddr[i].surface_bus_address = surfBusAddr[i];
busAddr[i].marker_bus_address = markerBusAddr[i];
}
}
*/
delete[] pGpuMemObjects;
delete[] surfBusAddr;
delete[] markerBusAddr;
profilingEnd(vcmd);
}