2014-07-04 16:17:05 -04:00
|
|
|
//! Implementation of GPU device memory management
|
|
|
|
|
|
|
|
|
|
#include "top.hpp"
|
|
|
|
|
#include "thread/thread.hpp"
|
|
|
|
|
#include "thread/monitor.hpp"
|
|
|
|
|
#include "device/device.hpp"
|
|
|
|
|
#include "device/gpu/gpudevice.hpp"
|
|
|
|
|
#include "device/gpu/gpublit.hpp"
|
|
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
#include <d3d10_1.h>
|
|
|
|
|
#include "amdocl/cl_d3d9_amd.hpp"
|
|
|
|
|
#include "amdocl/cl_d3d10_amd.hpp"
|
|
|
|
|
#include "amdocl/cl_d3d11_amd.hpp"
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif //_WIN32
|
2014-07-04 16:17:05 -04:00
|
|
|
#include "amdocl/cl_gl_amd.hpp"
|
|
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <fstream>
|
|
|
|
|
#include <sstream>
|
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
|
|
//! Turn this on to enable sanity checks before and after every heap operation.
|
|
|
|
|
#if DEBUG
|
2017-04-13 13:56:38 -04:00
|
|
|
#define EXTRA_HEAP_CHECKS 1
|
|
|
|
|
#endif // DEBUG
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
namespace gpu {
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t size)
|
|
|
|
|
: device::Memory(owner),
|
|
|
|
|
Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType) {
|
|
|
|
|
init();
|
|
|
|
|
|
|
|
|
|
if (owner.parent() != NULL) {
|
|
|
|
|
flags_ |= SubMemoryObject;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, size_t size)
|
|
|
|
|
: device::Memory(size),
|
|
|
|
|
Resource(gpuDev, amd::alignUp(size, Device::Heap::ElementSize) / Device::Heap::ElementSize,
|
|
|
|
|
Device::Heap::ElementType) {
|
|
|
|
|
init();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, cmSurfFmt format)
|
|
|
|
|
: device::Memory(owner), Resource(gpuDev, width, format) {
|
|
|
|
|
init();
|
|
|
|
|
|
|
|
|
|
if (owner.parent() != NULL) {
|
|
|
|
|
flags_ |= SubMemoryObject;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, size_t size, size_t width, cmSurfFmt format)
|
|
|
|
|
: device::Memory(size), Resource(gpuDev, width, format) {
|
|
|
|
|
init();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, size_t height, size_t depth,
|
|
|
|
|
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
|
|
|
|
|
uint mipLevels)
|
|
|
|
|
: device::Memory(owner),
|
|
|
|
|
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
|
|
|
|
|
init();
|
|
|
|
|
|
|
|
|
|
if (owner.parent() != NULL) {
|
|
|
|
|
flags_ |= SubMemoryObject;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::Memory(const Device& gpuDev, size_t size, size_t width, size_t height, size_t depth,
|
|
|
|
|
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
|
|
|
|
|
uint mipLevels)
|
|
|
|
|
: device::Memory(size),
|
|
|
|
|
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
|
|
|
|
|
init();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::init() {
|
|
|
|
|
indirectMapCount_ = 0;
|
|
|
|
|
interopType_ = InteropNone;
|
|
|
|
|
interopMemory_ = NULL;
|
|
|
|
|
pinnedMemory_ = NULL;
|
|
|
|
|
parent_ = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
2017-04-13 13:56:38 -04:00
|
|
|
static HANDLE getSharedHandle(IUnknown* pIface) {
|
|
|
|
|
// Sanity checks
|
|
|
|
|
assert(pIface != NULL);
|
|
|
|
|
|
|
|
|
|
HRESULT hRes;
|
|
|
|
|
HANDLE hShared;
|
|
|
|
|
IDXGIResource* pDxgiRes = NULL;
|
|
|
|
|
if ((hRes = (const_cast<IUnknown*>(pIface))
|
|
|
|
|
->QueryInterface(__uuidof(IDXGIResource), (void**)&pDxgiRes)) != S_OK) {
|
|
|
|
|
return (HANDLE)0;
|
|
|
|
|
}
|
|
|
|
|
if (!pDxgiRes) {
|
|
|
|
|
return (HANDLE)0;
|
|
|
|
|
}
|
|
|
|
|
hRes = pDxgiRes->GetSharedHandle(&hShared);
|
|
|
|
|
pDxgiRes->Release();
|
|
|
|
|
if (hRes != S_OK) {
|
|
|
|
|
return (HANDLE)0;
|
|
|
|
|
}
|
|
|
|
|
return hShared;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
#endif //_WIN32
|
|
|
|
|
|
|
|
|
|
bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params) {
|
|
|
|
|
bool result;
|
|
|
|
|
|
|
|
|
|
// Reset the flag in case we reallocate the heap in local/remote
|
|
|
|
|
flags_ &= ~HostMemoryDirectAccess;
|
|
|
|
|
|
|
|
|
|
// Create a resource in CAL
|
|
|
|
|
result = Resource::create(memType, params);
|
|
|
|
|
|
|
|
|
|
// Check if CAL created a resource
|
|
|
|
|
if (result) {
|
|
|
|
|
switch (memoryType()) {
|
|
|
|
|
case Resource::Pinned:
|
|
|
|
|
case Resource::ExternalPhysical:
|
|
|
|
|
// Marks memory object for direct GPU access to the host memory
|
|
|
|
|
flags_ |= HostMemoryDirectAccess;
|
|
|
|
|
break;
|
|
|
|
|
case Resource::Remote:
|
|
|
|
|
case Resource::RemoteUSWC:
|
|
|
|
|
if (!cal()->tiled_) {
|
|
|
|
|
// Marks memory object for direct GPU access to the host memory
|
|
|
|
|
flags_ |= HostMemoryDirectAccess;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
break;
|
|
|
|
|
case Resource::View: {
|
|
|
|
|
Resource::ViewParams* view = reinterpret_cast<Resource::ViewParams*>(params);
|
|
|
|
|
if (view->resource_->memoryType() == Resource::Persistent) {
|
|
|
|
|
flags_ |= HostMemoryDirectAccess;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
// Check if parent was allocated in system memory
|
|
|
|
|
if ((view->resource_->memoryType() == Resource::Pinned) ||
|
|
|
|
|
(((view->resource_->memoryType() == Resource::Remote) ||
|
|
|
|
|
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
|
|
|
|
|
// @todo Enable unconditional optimization for remote memory
|
|
|
|
|
// Check for external allocation, to avoid the optimization
|
|
|
|
|
// for non-VM (double copy) mode
|
|
|
|
|
(owner() != NULL) &&
|
|
|
|
|
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) || dev().settings().remoteAlloc_))) {
|
|
|
|
|
// Marks memory object for direct GPU access to the host memory
|
|
|
|
|
flags_ |= HostMemoryDirectAccess;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
if ((view->owner_ != NULL) && (view->owner_->parent() != NULL)) {
|
|
|
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
|
|
|
flags_ |= SubMemoryObject;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
break;
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
case Resource::ImageView: {
|
|
|
|
|
Resource::ImageViewParams* view = reinterpret_cast<Resource::ImageViewParams*>(params);
|
|
|
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
|
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case Resource::ImageBuffer: {
|
|
|
|
|
Resource::ImageBufferParams* view = reinterpret_cast<Resource::ImageBufferParams*>(params);
|
|
|
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
|
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
2014-07-04 16:17:05 -04:00
|
|
|
break;
|
|
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Memory::processGLResource(GLResourceOP operation) {
|
|
|
|
|
bool retVal = false;
|
|
|
|
|
switch (operation) {
|
|
|
|
|
case GLDecompressResource:
|
|
|
|
|
retVal = gslGLAcquire();
|
|
|
|
|
break;
|
|
|
|
|
case GLInvalidateFBO:
|
|
|
|
|
retVal = gslGLRelease();
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
assert(false && "unknown GLResourceOP");
|
|
|
|
|
}
|
|
|
|
|
return retVal;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Memory::createInterop(InteropType type) {
|
|
|
|
|
Resource::MemoryType memType = Resource::Empty;
|
|
|
|
|
Resource::OGLInteropParams oglRes;
|
2014-07-04 16:17:05 -04:00
|
|
|
#ifdef _WIN32
|
2017-04-13 13:56:38 -04:00
|
|
|
Resource::D3DInteropParams d3dRes;
|
|
|
|
|
#endif //_WIN32
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Only external objects support interop
|
|
|
|
|
assert(owner() != NULL);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Resource::CreateParams* createParams = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
amd::InteropObject* interop = owner()->getInteropObj();
|
|
|
|
|
assert((interop != NULL) && "An invalid interop object is impossible!");
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
amd::GLObject* glObject = interop->asGLObject();
|
2016-05-03 17:00:14 -04:00
|
|
|
|
2014-07-04 16:17:05 -04:00
|
|
|
#ifdef _WIN32
|
2017-04-13 13:56:38 -04:00
|
|
|
amd::D3D10Object* d3d10Object = interop->asD3D10Object();
|
|
|
|
|
amd::D3D11Object* d3d11Object = interop->asD3D11Object();
|
|
|
|
|
amd::D3D9Object* d3d9Object = interop->asD3D9Object();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (d3d10Object != NULL) {
|
|
|
|
|
createParams = &d3dRes;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
d3dRes.owner_ = owner();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
const amd::D3D10ObjDesc_t* objDesc = d3d10Object->getObjDesc();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
memType = Resource::D3D10Interop;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Get shared handle
|
|
|
|
|
if ((d3dRes.handle_ = getSharedHandle(d3d10Object->getD3D10Resource()))) {
|
|
|
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d10Object->getD3D10Resource());
|
|
|
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
d3dRes.misc = 0;
|
|
|
|
|
// Find D3D10 object type
|
|
|
|
|
switch (objDesc->objDim_) {
|
|
|
|
|
case D3D10_RESOURCE_DIMENSION_BUFFER:
|
|
|
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
|
|
|
break;
|
|
|
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE1D:
|
|
|
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE2D:
|
|
|
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE3D:
|
|
|
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
|
|
|
if (objDesc->mipLevels_ > 1) {
|
|
|
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
|
|
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
|
|
|
d3dRes.layer_ = d3d10Object->getSubresource() / objDesc->mipLevels_;
|
|
|
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource() % objDesc->mipLevels_;
|
|
|
|
|
} else {
|
|
|
|
|
d3dRes.layer_ = 0;
|
|
|
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
} else if (d3d11Object != NULL) {
|
|
|
|
|
createParams = &d3dRes;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
d3dRes.owner_ = owner();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
const amd::D3D11ObjDesc_t* objDesc = d3d11Object->getObjDesc();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
memType = Resource::D3D11Interop;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Get shared handle
|
|
|
|
|
if ((d3dRes.handle_ = getSharedHandle(d3d11Object->getD3D11Resource()))) {
|
|
|
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d11Object->getD3D11Resource());
|
|
|
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
d3dRes.misc = 0;
|
|
|
|
|
// Find D3D11 object type
|
|
|
|
|
switch (objDesc->objDim_) {
|
|
|
|
|
case D3D11_RESOURCE_DIMENSION_BUFFER:
|
|
|
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
|
|
|
break;
|
|
|
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
|
|
|
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
|
|
|
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
|
|
|
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
|
|
|
d3dRes.layer_ = d3d11Object->getPlane();
|
|
|
|
|
d3dRes.misc = d3d11Object->getMiscFlag();
|
|
|
|
|
if (objDesc->mipLevels_ > 1) {
|
|
|
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
|
|
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
|
|
|
d3dRes.layer_ = d3d11Object->getSubresource() / objDesc->mipLevels_;
|
|
|
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource() % objDesc->mipLevels_;
|
|
|
|
|
} else {
|
|
|
|
|
d3dRes.layer_ = 0;
|
|
|
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
} else if (d3d9Object != NULL) {
|
|
|
|
|
createParams = &d3dRes;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
d3dRes.owner_ = owner();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
const amd::D3D9ObjDesc_t* objDesc = d3d9Object->getObjDesc();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
memType = Resource::D3D9Interop;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Get shared handle
|
|
|
|
|
if ((d3dRes.handle_ = d3d9Object->getD3D9SharedHandle())) {
|
|
|
|
|
d3dRes.iDirect3D_ = static_cast<void*>(d3d9Object->getD3D9Resource());
|
|
|
|
|
d3dRes.type_ = Resource::InteropSurface;
|
|
|
|
|
d3dRes.mipLevel_ = 0;
|
|
|
|
|
d3dRes.layer_ = d3d9Object->getPlane();
|
|
|
|
|
d3dRes.misc = d3d9Object->getMiscFlag();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
} else
|
|
|
|
|
#endif //_WIN32
|
|
|
|
|
if (glObject != NULL) {
|
|
|
|
|
createParams = &oglRes;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
oglRes.owner_ = owner();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
memType = Resource::OGLInterop;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Fill the interop creation parameters
|
|
|
|
|
oglRes.handle_ = static_cast<CALuint>(glObject->getGLName());
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Find OGL object type
|
|
|
|
|
switch (glObject->getCLGLObjectType()) {
|
|
|
|
|
case CL_GL_OBJECT_BUFFER:
|
|
|
|
|
oglRes.type_ = Resource::InteropVertexBuffer;
|
|
|
|
|
break;
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE_BUFFER:
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE1D:
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE1D_ARRAY:
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE2D:
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE2D_ARRAY:
|
|
|
|
|
case CL_GL_OBJECT_TEXTURE3D:
|
|
|
|
|
oglRes.type_ = Resource::InteropTexture;
|
|
|
|
|
if (GL_TEXTURE_CUBE_MAP == glObject->getGLTarget()) {
|
|
|
|
|
switch (glObject->getCubemapFace()) {
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
|
|
|
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
|
|
|
|
|
oglRes.type_ = Resource::InteropTextureViewCube;
|
|
|
|
|
oglRes.layer_ = glObject->getCubemapFace() - GL_TEXTURE_CUBE_MAP_POSITIVE_X;
|
|
|
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else if (glObject->getGLMipLevel() != 0) {
|
|
|
|
|
oglRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
|
oglRes.layer_ = 0;
|
|
|
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
break;
|
|
|
|
|
case CL_GL_OBJECT_RENDERBUFFER:
|
|
|
|
|
oglRes.type_ = Resource::InteropRenderBuffer;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2014-07-04 16:17:05 -04:00
|
|
|
return false;
|
2017-04-13 13:56:38 -04:00
|
|
|
break;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
oglRes.glPlatformContext_ = owner()->getContext().info().hCtx_;
|
|
|
|
|
oglRes.glDeviceContext_ =
|
|
|
|
|
owner()->getContext().info().hDev_[amd::Context::DeviceFlagIdx::GLDeviceKhrIdx];
|
|
|
|
|
// We dont pass any flags here for the GL Resource.
|
|
|
|
|
oglRes.flags_ = 0;
|
|
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get the interop settings
|
|
|
|
|
if (type == InteropDirectAccess) {
|
|
|
|
|
// Create memory object
|
|
|
|
|
if (!create(memType, createParams)) {
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
} else {
|
|
|
|
|
// Allocate Resource object for interop as buffer
|
|
|
|
|
interopMemory_ = new Memory(
|
|
|
|
|
dev(), size(), amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
|
|
|
|
|
Device::Heap::ElementType);
|
|
|
|
|
|
|
|
|
|
// Create the interop object in CAL
|
|
|
|
|
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
|
|
|
|
|
delete interopMemory_;
|
|
|
|
|
interopMemory_ = NULL;
|
|
|
|
|
return false;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
setInteropType(type);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory::~Memory() {
|
|
|
|
|
// Clean VA cache
|
|
|
|
|
dev().removeVACache(this);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
delete interopMemory_;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Release associated map target, if any
|
|
|
|
|
if (NULL != mapMemory_) {
|
2018-07-04 15:45:29 -04:00
|
|
|
if (owner()->getSvmPtr() != nullptr) {
|
|
|
|
|
owner()->uncommitSvmMemory();
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
mapMemory()->unmap(NULL);
|
|
|
|
|
mapMemory_->release();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Destory pinned memory
|
|
|
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
|
|
|
delete pinnedMemory_;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if ((owner() != NULL) && isHostMemDirectAccess() && !(flags_ & SubMemoryObject) &&
|
|
|
|
|
(memoryType() != Resource::ExternalPhysical)) {
|
|
|
|
|
// Unmap memory if direct access was requested
|
|
|
|
|
unmap(NULL);
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags) {
|
|
|
|
|
// If the last writer was another GPU, then make a writeback
|
|
|
|
|
if (!isHostMemDirectAccess() && (owner()->getLastWriter() != NULL) &&
|
|
|
|
|
(&dev() != owner()->getLastWriter())) {
|
|
|
|
|
mgpuCacheWriteBack();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
|
|
|
if (!isHostMemDirectAccess() && (NULL != owner()->getHostMem())) {
|
|
|
|
|
bool hasUpdates = true;
|
|
|
|
|
|
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
|
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
|
|
|
gpu::Memory* gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
|
|
|
|
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
|
|
|
//! since a view is a small portion of parent
|
|
|
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
|
|
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
|
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
|
|
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
|
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
|
|
|
// then the parent sync can be called from multiple threads
|
|
|
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
|
|
|
gpuMemory->syncCacheFromHost(gpu, syncFlagsTmp);
|
|
|
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
|
|
|
//! this view, if the parent sync operation was a NOP.
|
|
|
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Is this a NOP?
|
|
|
|
|
if ((version_ == owner()->getVersion()) || (&dev() == owner()->getLastWriter())) {
|
|
|
|
|
hasUpdates = false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Update all available views, since we sync the parent
|
|
|
|
|
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
|
|
|
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
|
|
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
|
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
|
|
|
|
|
|
if (hasUpdates) {
|
|
|
|
|
// Parent will be synced so update all views with a skip
|
|
|
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
|
|
|
} else {
|
|
|
|
|
// Passthrough the skip entire flag to the views, since
|
|
|
|
|
// any view is a submemory of the parent
|
|
|
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
|
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
|
|
|
//! It may cause a system lock, because possible resource
|
|
|
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
|
|
|
static const bool AllocSubBuffer = false;
|
|
|
|
|
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
|
|
|
if (NULL != devSub) {
|
|
|
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
|
|
|
gpuSub->syncCacheFromHost(gpu, syncFlagsTmp);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Make sure we didn't have a NOP,
|
|
|
|
|
// because this GPU device was the last writer
|
|
|
|
|
if (&dev() != owner()->getLastWriter()) {
|
|
|
|
|
// Update the latest version
|
|
|
|
|
version_ = owner()->getVersion();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
|
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool result = false;
|
|
|
|
|
static const bool Entire = true;
|
|
|
|
|
amd::Coord3D origin(0, 0, 0);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// If host memory was pinned then make a transfer
|
|
|
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
|
|
|
if (cal()->buffer_) {
|
|
|
|
|
amd::Coord3D region(owner()->getSize());
|
|
|
|
|
result = gpu.blitMgr().copyBuffer(*pinnedMemory_, *this, origin, origin, region, Entire);
|
|
|
|
|
} else {
|
|
|
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
|
|
|
result = gpu.blitMgr().copyBufferToImage(*pinnedMemory_, *this, origin, origin,
|
|
|
|
|
image.getRegion(), Entire, image.getRowPitch(),
|
|
|
|
|
image.getSlicePitch());
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!result) {
|
|
|
|
|
if (cal()->buffer_) {
|
|
|
|
|
amd::Coord3D region(owner()->getSize());
|
|
|
|
|
result = gpu.blitMgr().writeBuffer(owner()->getHostMem(), *this, origin, region, Entire);
|
|
|
|
|
} else {
|
|
|
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
|
|
|
result = gpu.blitMgr().writeImage(owner()->getHostMem(), *this, origin, image.getRegion(),
|
|
|
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
//!@todo A wait isn't really necessary. However
|
|
|
|
|
//! Linux no-VM may have extra random failures.
|
|
|
|
|
wait(gpu);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Should never fail
|
|
|
|
|
assert(result && "Memory synchronization failed!");
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::syncHostFromCache(device::Memory::SyncFlags syncFlags) {
|
|
|
|
|
// Sanity checks
|
|
|
|
|
assert(owner() != NULL);
|
|
|
|
|
|
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
|
|
|
if (!isHostMemDirectAccess()) {
|
|
|
|
|
bool hasUpdates = true;
|
|
|
|
|
|
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
|
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
|
|
|
device::Memory* m = owner()->parent()->getDeviceMemory(dev());
|
|
|
|
|
|
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
|
|
|
//! since a view is a small portion of parent
|
|
|
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
|
|
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
|
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
|
|
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
|
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
|
|
|
// then the parent sync can be called from multiple threads
|
|
|
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
|
|
|
m->syncHostFromCache(syncFlagsTmp);
|
|
|
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
|
|
|
//! this view, if the parent sync operation was a NOP.
|
|
|
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Is this a NOP?
|
|
|
|
|
if ((NULL == owner()->getLastWriter()) || (version_ == owner()->getVersion())) {
|
|
|
|
|
hasUpdates = false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Update all available views, since we sync the parent
|
|
|
|
|
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
|
|
|
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
|
|
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
|
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
|
|
|
|
|
|
if (hasUpdates) {
|
|
|
|
|
// Parent will be synced so update all views with a skip
|
|
|
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
|
|
|
} else {
|
|
|
|
|
// Passthrough the skip entire flag to the views, since
|
|
|
|
|
// any view is a submemory of the parent
|
|
|
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
|
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
|
|
|
//! It may cause a system lock, because possible resource
|
|
|
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
|
|
|
static const bool AllocSubBuffer = false;
|
|
|
|
|
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
|
|
|
if (NULL != devSub) {
|
|
|
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
|
|
|
gpuSub->syncHostFromCache(syncFlagsTmp);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Make sure we didn't have a NOP,
|
|
|
|
|
// because CPU was the last writer
|
|
|
|
|
if (NULL != owner()->getLastWriter()) {
|
|
|
|
|
// Mark parent as up to date, set our version accordingly
|
|
|
|
|
version_ = owner()->getVersion();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
|
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool result = false;
|
|
|
|
|
static const bool Entire = true;
|
|
|
|
|
amd::Coord3D origin(0, 0, 0);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// If backing store was pinned then make a transfer
|
|
|
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
|
|
|
if (cal()->buffer_) {
|
|
|
|
|
amd::Coord3D region(owner()->getSize());
|
|
|
|
|
result = dev().xferMgr().copyBuffer(*this, *pinnedMemory_, origin, origin, region, Entire);
|
|
|
|
|
} else {
|
|
|
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
|
|
|
result = dev().xferMgr().copyImageToBuffer(*this, *pinnedMemory_, origin, origin,
|
|
|
|
|
image.getRegion(), Entire, image.getRowPitch(),
|
|
|
|
|
image.getSlicePitch());
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Just do a basic host read
|
|
|
|
|
if (!result) {
|
|
|
|
|
if (cal()->buffer_) {
|
|
|
|
|
amd::Coord3D region(owner()->getSize());
|
|
|
|
|
result = dev().xferMgr().readBuffer(*this, owner()->getHostMem(), origin, region, Entire);
|
|
|
|
|
} else {
|
|
|
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
|
|
|
result = dev().xferMgr().readImage(*this, owner()->getHostMem(), origin, image.getRegion(),
|
|
|
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Should never fail
|
|
|
|
|
assert(result && "Memory synchronization failed!");
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
gpu::Memory* Memory::createBufferView(amd::Memory& subBufferOwner) {
|
|
|
|
|
gpu::Memory* viewMemory;
|
|
|
|
|
Resource::ViewParams params;
|
|
|
|
|
|
|
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
|
|
|
size_t size = subBufferOwner.getSize();
|
|
|
|
|
|
|
|
|
|
// Create a memory object
|
|
|
|
|
viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
|
|
|
|
|
if (NULL == viewMemory) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
params.owner_ = &subBufferOwner;
|
|
|
|
|
params.gpu_ = static_cast<VirtualGPU*>(subBufferOwner.getVirtualDevice());
|
|
|
|
|
params.offset_ = offset;
|
|
|
|
|
params.size_ = size;
|
|
|
|
|
params.resource_ = this;
|
|
|
|
|
params.memory_ = this;
|
|
|
|
|
if (!viewMemory->create(Resource::View, ¶ms)) {
|
|
|
|
|
delete viewMemory;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Explicitly set the host memory location,
|
|
|
|
|
// because the parent location could change after reallocation
|
|
|
|
|
if (NULL != owner()->getHostMem()) {
|
|
|
|
|
subBufferOwner.setHostMem(reinterpret_cast<char*>(owner()->getHostMem()) + offset);
|
|
|
|
|
} else {
|
|
|
|
|
subBufferOwner.setHostMem(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return viewMemory;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::decIndMapCount() {
|
|
|
|
|
// Map/unmap must be serialized
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (indirectMapCount_ == 0) {
|
|
|
|
|
if (!mipMapped()) {
|
|
|
|
|
LogError("decIndMapCount() called when indirectMapCount_ already zero");
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
return;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Decrement the counter and release indirect map if it's the last op
|
|
|
|
|
if (--indirectMapCount_ == 0) {
|
|
|
|
|
if (NULL != mapMemory_) {
|
|
|
|
|
amd::Memory* memory = mapMemory_;
|
|
|
|
|
amd::Memory* empty = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Get GPU memory
|
|
|
|
|
Memory* gpuMemory = mapMemory();
|
|
|
|
|
gpuMemory->unmap(NULL);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!dev().addMapTarget(memory)) {
|
|
|
|
|
memory->release();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
|
|
|
// so it's safe to clear the pointer
|
|
|
|
|
assert((mapMemory_ != NULL) && "Mapped buffer should be valid");
|
|
|
|
|
mapMemory_ = NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Note - must be called by the device under the async lock, so no spinning
|
|
|
|
|
// or long pauses allowed in this function.
|
2017-04-13 13:56:38 -04:00
|
|
|
void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
|
|
|
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
|
|
|
// Sanity checks
|
|
|
|
|
assert(owner() != NULL);
|
|
|
|
|
|
|
|
|
|
// Map/unmap must be serialized
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
|
|
|
|
|
|
address mapAddress = NULL;
|
|
|
|
|
size_t offset = origin[0];
|
|
|
|
|
|
|
|
|
|
// For SVM implementation, we cannot use cached map. if svm space, use the svm host pointer
|
|
|
|
|
void* initHostPtr = owner()->getSvmPtr();
|
|
|
|
|
if (NULL != initHostPtr) {
|
|
|
|
|
owner()->commitSvmMemory();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (owner()->numDevices() > 1) {
|
|
|
|
|
if ((NULL == initHostPtr) && (owner()->getHostMem() == NULL)) {
|
|
|
|
|
static const bool forceAllocHostMem = true;
|
|
|
|
|
if (!owner()->allocHostMemory(NULL, forceAllocHostMem)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
//! \note Ignore pinning result
|
|
|
|
|
// bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
|
2015-02-13 17:49:06 -05:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
incIndMapCount();
|
|
|
|
|
// If host memory exists, use it
|
|
|
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
|
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
|
|
|
}
|
|
|
|
|
// If resource is a persistent allocation, we can use it directly
|
|
|
|
|
else if (isPersistentDirectMap()) {
|
|
|
|
|
if (NULL == map(NULL)) {
|
|
|
|
|
LogError("Could not map target persistent resource");
|
|
|
|
|
decIndMapCount();
|
|
|
|
|
return NULL;
|
2015-02-13 17:49:06 -05:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
mapAddress = data();
|
|
|
|
|
}
|
|
|
|
|
// Otherwise we can use a remote resource:
|
|
|
|
|
else {
|
|
|
|
|
// Are we in range?
|
|
|
|
|
size_t elementCount = cal()->width_;
|
|
|
|
|
size_t rSize = elementCount * elementSize();
|
|
|
|
|
if (offset >= rSize || offset + region[0] > rSize) {
|
|
|
|
|
LogWarning("Memory::allocMapTarget() - offset/size out of bounds");
|
|
|
|
|
return NULL;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Allocate a map resource if there isn't any yet
|
|
|
|
|
if (indirectMapCount_ == 1) {
|
|
|
|
|
const static bool SysMem = true;
|
|
|
|
|
bool failed = false;
|
|
|
|
|
amd::Memory* memory = NULL;
|
|
|
|
|
// Search for a possible indirect resource
|
|
|
|
|
cl_mem_flags flag = 0;
|
|
|
|
|
bool canBeCached = true;
|
|
|
|
|
if (NULL != initHostPtr) {
|
|
|
|
|
// make sure the host memory is committed already, or we have a big problem.
|
|
|
|
|
assert(owner()->isSvmPtrCommited() && "The host svm memory not committed yet!");
|
|
|
|
|
flag = CL_MEM_USE_HOST_PTR;
|
|
|
|
|
canBeCached = false;
|
|
|
|
|
} else {
|
|
|
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (memory == NULL) {
|
|
|
|
|
// for map target of svm buffer , we need use svm host ptr
|
|
|
|
|
memory = new (dev().context()) amd::Buffer(dev().context(), flag, owner()->getSize());
|
|
|
|
|
Memory* gpuMemory;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
if ((memory == NULL) || !memory->create(initHostPtr, SysMem)) {
|
|
|
|
|
failed = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
memory->setCacheStatus(canBeCached);
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create, Map and get the base pointer for the resource
|
|
|
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
|
|
|
failed = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (false);
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (failed) {
|
|
|
|
|
if (memory != NULL) {
|
|
|
|
|
memory->release();
|
|
|
|
|
}
|
|
|
|
|
decIndMapCount();
|
|
|
|
|
LogError("Could not map target resource");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
|
|
|
// so it's safe to assign the new pointer
|
|
|
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
|
|
|
mapMemory_ = memory;
|
|
|
|
|
} else {
|
|
|
|
|
// Did the map resource allocation fail?
|
|
|
|
|
if (mapMemory_ == NULL) {
|
|
|
|
|
LogError("Could not map target resource");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
mapAddress = mapMemory()->data();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return mapAddress + offset;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
bool Memory::pinSystemMemory(void* hostPtr, size_t size) {
|
|
|
|
|
bool result = false;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// If memory has a direct access already, then skip the host memory pinning
|
|
|
|
|
if (isHostMemDirectAccess()) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Check if memory is pinned already
|
|
|
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate memory for the pinned object
|
|
|
|
|
pinnedMemory_ = new Memory(dev(), size);
|
|
|
|
|
|
|
|
|
|
if (pinnedMemory_ == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check if it's a view
|
|
|
|
|
if (flags_ & SubMemoryObject) {
|
|
|
|
|
const gpu::Memory* gpuMemory;
|
|
|
|
|
if (owner() != NULL) {
|
|
|
|
|
gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
|
|
|
} else {
|
|
|
|
|
gpuMemory = parent();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (gpuMemory->flags_ & PinnedMemoryAlloced) {
|
|
|
|
|
Resource::ViewParams params;
|
|
|
|
|
params.owner_ = owner();
|
|
|
|
|
params.offset_ = owner()->getOrigin();
|
|
|
|
|
params.size_ = owner()->getSize();
|
|
|
|
|
params.resource_ = gpuMemory->pinnedMemory_;
|
|
|
|
|
params.memory_ = NULL;
|
|
|
|
|
result = pinnedMemory_->create(Resource::View, ¶ms);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
} else {
|
|
|
|
|
Resource::PinnedParams params;
|
|
|
|
|
// Fill resource creation parameters
|
|
|
|
|
params.owner_ = owner();
|
|
|
|
|
params.hostMemRef_ = owner()->getHostMemRef();
|
|
|
|
|
params.size_ = size;
|
|
|
|
|
|
|
|
|
|
// Create resource
|
|
|
|
|
result = pinnedMemory_->create(Resource::Pinned, ¶ms);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!result) {
|
|
|
|
|
delete pinnedMemory_;
|
|
|
|
|
pinnedMemory_ = NULL;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
flags_ |= PinnedMemoryAlloced;
|
|
|
|
|
return true;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, uint numLayers,
|
|
|
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
|
|
|
uint resFlags = 0;
|
|
|
|
|
if (flags == Memory::CpuReadOnly) {
|
|
|
|
|
resFlags = Resource::ReadOnly;
|
|
|
|
|
} else if (flags == Memory::CpuWriteOnly) {
|
|
|
|
|
resFlags = Resource::WriteOnly;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void* ptr = map(&static_cast<VirtualGPU&>(vDev), resFlags, startLayer, numLayers);
|
|
|
|
|
if (!cal()->buffer_) {
|
|
|
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
|
|
|
*slicePitch = cal()->slice_ * elementSize();
|
|
|
|
|
}
|
|
|
|
|
return ptr;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory* Memory::mapMemory() const {
|
|
|
|
|
Memory* map = NULL;
|
|
|
|
|
if (NULL != mapMemory_) {
|
|
|
|
|
map = reinterpret_cast<Memory*>(mapMemory_->getDeviceMemory(dev()));
|
|
|
|
|
}
|
|
|
|
|
return map;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void Memory::mgpuCacheWriteBack() {
|
|
|
|
|
// Lock memory object, so only one write back can occur
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
|
|
|
|
|
|
// Attempt to allocate a staging buffer if don't have any
|
|
|
|
|
if (owner()->getHostMem() == NULL) {
|
|
|
|
|
if (nullptr != owner()->getSvmPtr()) {
|
|
|
|
|
owner()->commitSvmMemory();
|
|
|
|
|
owner()->setHostMem(owner()->getSvmPtr());
|
|
|
|
|
} else {
|
|
|
|
|
static const bool forceAllocHostMem = true;
|
|
|
|
|
owner()->allocHostMemory(nullptr, forceAllocHostMem);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
// Make synchronization
|
|
|
|
|
if (owner()->getHostMem() != NULL) {
|
|
|
|
|
//! \note Ignore pinning result
|
|
|
|
|
bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
|
|
|
|
|
owner()->cacheWriteBack();
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
Memory* Buffer::createBufferView(amd::Memory& subBufferOwner) const {
|
|
|
|
|
gpu::Memory* subBuffer;
|
|
|
|
|
Resource::ViewParams params;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
|
|
|
size_t size = subBufferOwner.getSize();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create a memory object
|
|
|
|
|
subBuffer = new gpu::Buffer(dev(), subBufferOwner, size);
|
|
|
|
|
if (NULL == subBuffer) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Allocate a view for this buffer object
|
|
|
|
|
params.owner_ = &subBufferOwner;
|
|
|
|
|
params.offset_ = offset;
|
|
|
|
|
params.size_ = size;
|
|
|
|
|
params.resource_ = this;
|
|
|
|
|
params.memory_ = this;
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (!subBuffer->create(Resource::View, ¶ms)) {
|
|
|
|
|
delete subBuffer;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return subBuffer;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
|
|
|
|
|
size_t* rowPitch, size_t* slicePitch) {
|
|
|
|
|
// Sanity checks
|
|
|
|
|
assert(owner() != NULL);
|
|
|
|
|
bool useRemoteResource = true;
|
|
|
|
|
size_t slicePitchTmp = 0;
|
|
|
|
|
size_t height = cal()->height_;
|
|
|
|
|
size_t depth = cal()->depth_;
|
|
|
|
|
|
|
|
|
|
// Map/unmap must be serialized
|
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
|
|
|
|
|
|
address mapAddress = NULL;
|
|
|
|
|
size_t offset = origin[0];
|
|
|
|
|
|
|
|
|
|
incIndMapCount();
|
|
|
|
|
|
|
|
|
|
// If host memory exists, use it
|
|
|
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
|
|
|
useRemoteResource = false;
|
|
|
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
|
|
|
amd::Image* amdImage = owner()->asImage();
|
|
|
|
|
|
|
|
|
|
// Calculate the offset in bytes
|
|
|
|
|
offset *= elementSize();
|
|
|
|
|
|
|
|
|
|
// Update the row and slice pitches value
|
|
|
|
|
*rowPitch =
|
|
|
|
|
(amdImage->getRowPitch() == 0) ? (cal()->width_ * elementSize()) : amdImage->getRowPitch();
|
|
|
|
|
slicePitchTmp =
|
|
|
|
|
(amdImage->getSlicePitch() == 0) ? (height * (*rowPitch)) : amdImage->getSlicePitch();
|
|
|
|
|
|
|
|
|
|
// Adjust the offset in Y and Z dimensions
|
|
|
|
|
offset += origin[1] * (*rowPitch);
|
|
|
|
|
offset += origin[2] * slicePitchTmp;
|
|
|
|
|
}
|
|
|
|
|
// If resource is a persistent allocation, we can use it directly
|
|
|
|
|
//! @note Even if resource is a persistent allocation,
|
|
|
|
|
//! runtime can't use it directly,
|
|
|
|
|
//! because CAL volume map doesn't work properly.
|
|
|
|
|
//! @todo arrays can be added for persistent lock with some CAL changes
|
|
|
|
|
else if (isPersistentDirectMap()) {
|
|
|
|
|
if (NULL == map(NULL)) {
|
|
|
|
|
useRemoteResource = true;
|
|
|
|
|
LogError("Could not map target persistent resource, try remote resource");
|
|
|
|
|
} else {
|
|
|
|
|
useRemoteResource = false;
|
|
|
|
|
mapAddress = data();
|
|
|
|
|
|
|
|
|
|
// Calculate the offset in bytes
|
|
|
|
|
offset *= elementSize();
|
|
|
|
|
|
|
|
|
|
// Update the row pitch value
|
|
|
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
|
|
|
|
|
|
|
|
// Adjust the offset in Y dimension
|
|
|
|
|
offset += origin[1] * (*rowPitch);
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Otherwise we can use a remote resource:
|
|
|
|
|
if (useRemoteResource) {
|
|
|
|
|
// Calculate X offset in bytes
|
|
|
|
|
offset *= elementSize();
|
|
|
|
|
|
|
|
|
|
// Allocate a map resource if there isn't any yet
|
|
|
|
|
if (indirectMapCount_ == 1) {
|
|
|
|
|
const static bool SysMem = true;
|
|
|
|
|
bool failed = false;
|
|
|
|
|
amd::Memory* memory;
|
|
|
|
|
|
|
|
|
|
// Search for a possible indirect resource
|
|
|
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
|
|
|
|
|
|
|
|
if (memory == NULL) {
|
|
|
|
|
// Allocate a new buffer to use as the map target
|
|
|
|
|
//! @note Allocate a 1D buffer, since CAL issues with 3D
|
|
|
|
|
//! Also HW doesn't support untiled images
|
|
|
|
|
memory = new (dev().context())
|
|
|
|
|
amd::Buffer(dev().context(), 0, cal()->width_ * height * depth * elementSize());
|
|
|
|
|
memory->setVirtualDevice(owner()->getVirtualDevice());
|
|
|
|
|
|
|
|
|
|
Memory* gpuMemory;
|
|
|
|
|
do {
|
|
|
|
|
if ((memory == NULL) || !memory->create(NULL, SysMem)) {
|
|
|
|
|
failed = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Create, Map and get the base pointer for the resource
|
|
|
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
|
|
|
failed = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (false);
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (failed) {
|
|
|
|
|
if (memory != NULL) {
|
|
|
|
|
memory->release();
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
decIndMapCount();
|
|
|
|
|
LogError("Could not map target resource");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
|
|
|
// so it's safe to assign the new pointer
|
|
|
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
|
|
|
mapMemory_ = memory;
|
|
|
|
|
} else {
|
|
|
|
|
// Did the map resource allocation fail?
|
|
|
|
|
if (mapMemory_ == NULL) {
|
|
|
|
|
LogError("Could not map target resource");
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
mapAddress = mapMemory()->data();
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
// Update the row and slice pitches value
|
|
|
|
|
*rowPitch = region[0] * elementSize();
|
|
|
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
|
|
|
slicePitchTmp = *rowPitch;
|
|
|
|
|
} else {
|
|
|
|
|
slicePitchTmp = *rowPitch * region[1];
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
2017-04-13 13:56:38 -04:00
|
|
|
// Use start of the indirect buffer
|
|
|
|
|
offset = 0;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
if (slicePitch != NULL) {
|
|
|
|
|
*slicePitch = slicePitchTmp;
|
|
|
|
|
}
|
2014-07-04 16:17:05 -04:00
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
return mapAddress + offset;
|
2014-07-04 16:17:05 -04:00
|
|
|
}
|
|
|
|
|
|
2017-04-13 13:56:38 -04:00
|
|
|
} // namespace gpu
|