Files
rocm-systems/rocclr/runtime/device/gpu/gpumemory.cpp
T

1124 строки
35 KiB
C++
Исходник Обычный вид История

2014-07-04 16:17:05 -04:00
//! Implementation of GPU device memory management
#include "top.hpp"
#include "thread/thread.hpp"
#include "thread/monitor.hpp"
#include "device/device.hpp"
#include "device/gpu/gpudevice.hpp"
#include "device/gpu/gpublit.hpp"
#ifdef _WIN32
#include <d3d10_1.h>
#include "amdocl/cl_d3d9_amd.hpp"
#include "amdocl/cl_d3d10_amd.hpp"
#include "amdocl/cl_d3d11_amd.hpp"
#endif //_WIN32
2014-07-04 16:17:05 -04:00
#include "amdocl/cl_gl_amd.hpp"
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
//! Turn this on to enable sanity checks before and after every heap operation.
#if DEBUG
#define EXTRA_HEAP_CHECKS 1
#endif // DEBUG
2014-07-04 16:17:05 -04:00
namespace gpu {
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t size)
: device::Memory(owner),
Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType) {
init();
if (owner.parent() != NULL) {
flags_ |= SubMemoryObject;
}
2014-07-04 16:17:05 -04:00
}
Memory::Memory(const Device& gpuDev, size_t size)
: device::Memory(size),
Resource(gpuDev, amd::alignUp(size, Device::Heap::ElementSize) / Device::Heap::ElementSize,
Device::Heap::ElementType) {
init();
2014-07-04 16:17:05 -04:00
}
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, cmSurfFmt format)
: device::Memory(owner), Resource(gpuDev, width, format) {
init();
if (owner.parent() != NULL) {
flags_ |= SubMemoryObject;
}
2014-07-04 16:17:05 -04:00
}
Memory::Memory(const Device& gpuDev, size_t size, size_t width, cmSurfFmt format)
: device::Memory(size), Resource(gpuDev, width, format) {
init();
2014-07-04 16:17:05 -04:00
}
Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, size_t height, size_t depth,
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
uint mipLevels)
: device::Memory(owner),
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
init();
if (owner.parent() != NULL) {
flags_ |= SubMemoryObject;
}
2014-07-04 16:17:05 -04:00
}
Memory::Memory(const Device& gpuDev, size_t size, size_t width, size_t height, size_t depth,
cmSurfFmt format, gslChannelOrder chOrder, cl_mem_object_type imageType,
uint mipLevels)
: device::Memory(size),
Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels) {
init();
2014-07-04 16:17:05 -04:00
}
void Memory::init() {
indirectMapCount_ = 0;
interopType_ = InteropNone;
interopMemory_ = NULL;
pinnedMemory_ = NULL;
parent_ = NULL;
2014-07-04 16:17:05 -04:00
}
#ifdef _WIN32
static HANDLE getSharedHandle(IUnknown* pIface) {
// Sanity checks
assert(pIface != NULL);
HRESULT hRes;
HANDLE hShared;
IDXGIResource* pDxgiRes = NULL;
if ((hRes = (const_cast<IUnknown*>(pIface))
->QueryInterface(__uuidof(IDXGIResource), (void**)&pDxgiRes)) != S_OK) {
return (HANDLE)0;
}
if (!pDxgiRes) {
return (HANDLE)0;
}
hRes = pDxgiRes->GetSharedHandle(&hShared);
pDxgiRes->Release();
if (hRes != S_OK) {
return (HANDLE)0;
}
return hShared;
2014-07-04 16:17:05 -04:00
}
#endif //_WIN32
bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params) {
bool result;
// Reset the flag in case we reallocate the heap in local/remote
flags_ &= ~HostMemoryDirectAccess;
// Create a resource in CAL
result = Resource::create(memType, params);
// Check if CAL created a resource
if (result) {
switch (memoryType()) {
case Resource::Pinned:
case Resource::ExternalPhysical:
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
break;
case Resource::Remote:
case Resource::RemoteUSWC:
if (!cal()->tiled_) {
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
2014-07-04 16:17:05 -04:00
}
break;
case Resource::View: {
Resource::ViewParams* view = reinterpret_cast<Resource::ViewParams*>(params);
if (view->resource_->memoryType() == Resource::Persistent) {
flags_ |= HostMemoryDirectAccess;
2014-07-04 16:17:05 -04:00
}
// Check if parent was allocated in system memory
if ((view->resource_->memoryType() == Resource::Pinned) ||
(((view->resource_->memoryType() == Resource::Remote) ||
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
// @todo Enable unconditional optimization for remote memory
// Check for external allocation, to avoid the optimization
// for non-VM (double copy) mode
(owner() != NULL) &&
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) || dev().settings().remoteAlloc_))) {
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
2014-07-04 16:17:05 -04:00
}
if ((view->owner_ != NULL) && (view->owner_->parent() != NULL)) {
parent_ = reinterpret_cast<const Memory*>(view->memory_);
flags_ |= SubMemoryObject;
2014-07-04 16:17:05 -04:00
}
break;
}
case Resource::ImageView: {
Resource::ImageViewParams* view = reinterpret_cast<Resource::ImageViewParams*>(params);
parent_ = reinterpret_cast<const Memory*>(view->memory_);
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
break;
}
case Resource::ImageBuffer: {
Resource::ImageBufferParams* view = reinterpret_cast<Resource::ImageBufferParams*>(params);
parent_ = reinterpret_cast<const Memory*>(view->memory_);
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
break;
}
default:
2014-07-04 16:17:05 -04:00
break;
}
}
return result;
2014-07-04 16:17:05 -04:00
}
bool Memory::processGLResource(GLResourceOP operation) {
bool retVal = false;
switch (operation) {
case GLDecompressResource:
retVal = gslGLAcquire();
break;
case GLInvalidateFBO:
retVal = gslGLRelease();
break;
default:
assert(false && "unknown GLResourceOP");
}
return retVal;
}
2014-07-04 16:17:05 -04:00
bool Memory::createInterop(InteropType type) {
Resource::MemoryType memType = Resource::Empty;
Resource::OGLInteropParams oglRes;
2014-07-04 16:17:05 -04:00
#ifdef _WIN32
Resource::D3DInteropParams d3dRes;
#endif //_WIN32
2014-07-04 16:17:05 -04:00
// Only external objects support interop
assert(owner() != NULL);
2014-07-04 16:17:05 -04:00
Resource::CreateParams* createParams = NULL;
2014-07-04 16:17:05 -04:00
amd::InteropObject* interop = owner()->getInteropObj();
assert((interop != NULL) && "An invalid interop object is impossible!");
2014-07-04 16:17:05 -04:00
amd::GLObject* glObject = interop->asGLObject();
2014-07-04 16:17:05 -04:00
#ifdef _WIN32
amd::D3D10Object* d3d10Object = interop->asD3D10Object();
amd::D3D11Object* d3d11Object = interop->asD3D11Object();
amd::D3D9Object* d3d9Object = interop->asD3D9Object();
2014-07-04 16:17:05 -04:00
if (d3d10Object != NULL) {
createParams = &d3dRes;
2014-07-04 16:17:05 -04:00
d3dRes.owner_ = owner();
2014-07-04 16:17:05 -04:00
const amd::D3D10ObjDesc_t* objDesc = d3d10Object->getObjDesc();
2014-07-04 16:17:05 -04:00
memType = Resource::D3D10Interop;
2014-07-04 16:17:05 -04:00
// Get shared handle
if ((d3dRes.handle_ = getSharedHandle(d3d10Object->getD3D10Resource()))) {
d3dRes.iDirect3D_ = static_cast<void*>(d3d10Object->getD3D10Resource());
d3dRes.type_ = Resource::InteropTypeless;
}
2014-07-04 16:17:05 -04:00
d3dRes.misc = 0;
// Find D3D10 object type
switch (objDesc->objDim_) {
case D3D10_RESOURCE_DIMENSION_BUFFER:
d3dRes.type_ = Resource::InteropVertexBuffer;
break;
case D3D10_RESOURCE_DIMENSION_TEXTURE1D:
case D3D10_RESOURCE_DIMENSION_TEXTURE2D:
case D3D10_RESOURCE_DIMENSION_TEXTURE3D:
d3dRes.type_ = Resource::InteropTexture;
if (objDesc->mipLevels_ > 1) {
d3dRes.type_ = Resource::InteropTextureViewLevel;
if (objDesc->arraySize_ > 1) {
d3dRes.layer_ = d3d10Object->getSubresource() / objDesc->mipLevels_;
d3dRes.mipLevel_ = d3d10Object->getSubresource() % objDesc->mipLevels_;
} else {
d3dRes.layer_ = 0;
d3dRes.mipLevel_ = d3d10Object->getSubresource();
}
2014-07-04 16:17:05 -04:00
}
break;
default:
return false;
break;
2014-07-04 16:17:05 -04:00
}
} else if (d3d11Object != NULL) {
createParams = &d3dRes;
2014-07-04 16:17:05 -04:00
d3dRes.owner_ = owner();
2014-07-04 16:17:05 -04:00
const amd::D3D11ObjDesc_t* objDesc = d3d11Object->getObjDesc();
2014-07-04 16:17:05 -04:00
memType = Resource::D3D11Interop;
2014-07-04 16:17:05 -04:00
// Get shared handle
if ((d3dRes.handle_ = getSharedHandle(d3d11Object->getD3D11Resource()))) {
d3dRes.iDirect3D_ = static_cast<void*>(d3d11Object->getD3D11Resource());
d3dRes.type_ = Resource::InteropTypeless;
}
2014-07-04 16:17:05 -04:00
d3dRes.misc = 0;
// Find D3D11 object type
switch (objDesc->objDim_) {
case D3D11_RESOURCE_DIMENSION_BUFFER:
d3dRes.type_ = Resource::InteropVertexBuffer;
break;
case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
d3dRes.type_ = Resource::InteropTexture;
d3dRes.layer_ = d3d11Object->getPlane();
d3dRes.misc = d3d11Object->getMiscFlag();
if (objDesc->mipLevels_ > 1) {
d3dRes.type_ = Resource::InteropTextureViewLevel;
if (objDesc->arraySize_ > 1) {
d3dRes.layer_ = d3d11Object->getSubresource() / objDesc->mipLevels_;
d3dRes.mipLevel_ = d3d11Object->getSubresource() % objDesc->mipLevels_;
} else {
d3dRes.layer_ = 0;
d3dRes.mipLevel_ = d3d11Object->getSubresource();
}
2014-07-04 16:17:05 -04:00
}
break;
default:
return false;
break;
2014-07-04 16:17:05 -04:00
}
} else if (d3d9Object != NULL) {
createParams = &d3dRes;
2014-07-04 16:17:05 -04:00
d3dRes.owner_ = owner();
2014-07-04 16:17:05 -04:00
const amd::D3D9ObjDesc_t* objDesc = d3d9Object->getObjDesc();
2014-07-04 16:17:05 -04:00
memType = Resource::D3D9Interop;
2014-07-04 16:17:05 -04:00
// Get shared handle
if ((d3dRes.handle_ = d3d9Object->getD3D9SharedHandle())) {
d3dRes.iDirect3D_ = static_cast<void*>(d3d9Object->getD3D9Resource());
d3dRes.type_ = Resource::InteropSurface;
d3dRes.mipLevel_ = 0;
d3dRes.layer_ = d3d9Object->getPlane();
d3dRes.misc = d3d9Object->getMiscFlag();
2014-07-04 16:17:05 -04:00
}
} else
#endif //_WIN32
if (glObject != NULL) {
createParams = &oglRes;
2014-07-04 16:17:05 -04:00
oglRes.owner_ = owner();
2014-07-04 16:17:05 -04:00
memType = Resource::OGLInterop;
2014-07-04 16:17:05 -04:00
// Fill the interop creation parameters
oglRes.handle_ = static_cast<CALuint>(glObject->getGLName());
2014-07-04 16:17:05 -04:00
// Find OGL object type
switch (glObject->getCLGLObjectType()) {
case CL_GL_OBJECT_BUFFER:
oglRes.type_ = Resource::InteropVertexBuffer;
break;
case CL_GL_OBJECT_TEXTURE_BUFFER:
case CL_GL_OBJECT_TEXTURE1D:
case CL_GL_OBJECT_TEXTURE1D_ARRAY:
case CL_GL_OBJECT_TEXTURE2D:
case CL_GL_OBJECT_TEXTURE2D_ARRAY:
case CL_GL_OBJECT_TEXTURE3D:
oglRes.type_ = Resource::InteropTexture;
if (GL_TEXTURE_CUBE_MAP == glObject->getGLTarget()) {
switch (glObject->getCubemapFace()) {
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
oglRes.type_ = Resource::InteropTextureViewCube;
oglRes.layer_ = glObject->getCubemapFace() - GL_TEXTURE_CUBE_MAP_POSITIVE_X;
oglRes.mipLevel_ = glObject->getGLMipLevel();
break;
default:
break;
}
} else if (glObject->getGLMipLevel() != 0) {
oglRes.type_ = Resource::InteropTextureViewLevel;
oglRes.layer_ = 0;
oglRes.mipLevel_ = glObject->getGLMipLevel();
2014-07-04 16:17:05 -04:00
}
break;
case CL_GL_OBJECT_RENDERBUFFER:
oglRes.type_ = Resource::InteropRenderBuffer;
break;
default:
2014-07-04 16:17:05 -04:00
return false;
break;
2014-07-04 16:17:05 -04:00
}
oglRes.glPlatformContext_ = owner()->getContext().info().hCtx_;
oglRes.glDeviceContext_ =
owner()->getContext().info().hDev_[amd::Context::DeviceFlagIdx::GLDeviceKhrIdx];
// We dont pass any flags here for the GL Resource.
oglRes.flags_ = 0;
} else {
return false;
}
// Get the interop settings
if (type == InteropDirectAccess) {
// Create memory object
if (!create(memType, createParams)) {
return false;
2014-07-04 16:17:05 -04:00
}
} else {
// Allocate Resource object for interop as buffer
interopMemory_ = new Memory(
dev(), size(), amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
Device::Heap::ElementType);
// Create the interop object in CAL
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
delete interopMemory_;
interopMemory_ = NULL;
return false;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
setInteropType(type);
2014-07-04 16:17:05 -04:00
return true;
2014-07-04 16:17:05 -04:00
}
Memory::~Memory() {
// Clean VA cache
dev().removeVACache(this);
2014-07-04 16:17:05 -04:00
delete interopMemory_;
2014-07-04 16:17:05 -04:00
// Release associated map target, if any
if (NULL != mapMemory_) {
if (owner()->getSvmPtr() != nullptr) {
owner()->uncommitSvmMemory();
}
mapMemory()->unmap(NULL);
mapMemory_->release();
}
2014-07-04 16:17:05 -04:00
// Destory pinned memory
if (flags_ & PinnedMemoryAlloced) {
delete pinnedMemory_;
}
2014-07-04 16:17:05 -04:00
if ((owner() != NULL) && isHostMemDirectAccess() && !(flags_ & SubMemoryObject) &&
(memoryType() != Resource::ExternalPhysical)) {
// Unmap memory if direct access was requested
unmap(NULL);
}
2014-07-04 16:17:05 -04:00
}
void Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags) {
// If the last writer was another GPU, then make a writeback
if (!isHostMemDirectAccess() && (owner()->getLastWriter() != NULL) &&
(&dev() != owner()->getLastWriter())) {
mgpuCacheWriteBack();
}
// If host memory doesn't have direct access, then we have to synchronize
if (!isHostMemDirectAccess() && (NULL != owner()->getHostMem())) {
bool hasUpdates = true;
// Make sure the parent of subbuffer is up to date
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
gpu::Memory* gpuMemory = dev().getGpuMemory(owner()->parent());
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
//! since a view is a small portion of parent
device::Memory::SyncFlags syncFlagsTmp;
// Sync parent from a view, so views have to be skipped
syncFlagsTmp.skipViews_ = true;
// Make sure the parent sync is an unique operation.
// If the app uses multiple subbuffers from multiple queues,
// then the parent sync can be called from multiple threads
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
gpuMemory->syncCacheFromHost(gpu, syncFlagsTmp);
//! \note Don't do early exit here, since we still have to sync
//! this view, if the parent sync operation was a NOP.
//! If parent was synchronized, then this view sync will be a NOP
2014-07-04 16:17:05 -04:00
}
// Is this a NOP?
if ((version_ == owner()->getVersion()) || (&dev() == owner()->getLastWriter())) {
hasUpdates = false;
}
2014-07-04 16:17:05 -04:00
// Update all available views, since we sync the parent
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
device::Memory::SyncFlags syncFlagsTmp;
// Sync views from parent, so parent has to be skipped
syncFlagsTmp.skipParent_ = true;
if (hasUpdates) {
// Parent will be synced so update all views with a skip
syncFlagsTmp.skipEntire_ = true;
} else {
// Passthrough the skip entire flag to the views, since
// any view is a submemory of the parent
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
}
amd::ScopedLock lock(owner()->lockMemoryOps());
for (auto& sub : owner()->subBuffers()) {
//! \note Don't allow subbuffer's allocation in the worker thread.
//! It may cause a system lock, because possible resource
//! destruction, heap reallocation or subbuffer allocation
static const bool AllocSubBuffer = false;
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
if (NULL != devSub) {
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
gpuSub->syncCacheFromHost(gpu, syncFlagsTmp);
2014-07-04 16:17:05 -04:00
}
}
}
2014-07-04 16:17:05 -04:00
// Make sure we didn't have a NOP,
// because this GPU device was the last writer
if (&dev() != owner()->getLastWriter()) {
// Update the latest version
version_ = owner()->getVersion();
}
2014-07-04 16:17:05 -04:00
// Exit if sync is a NOP or sync can be skipped
if (!hasUpdates || syncFlags.skipEntire_) {
return;
}
2014-07-04 16:17:05 -04:00
bool result = false;
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
2014-07-04 16:17:05 -04:00
// If host memory was pinned then make a transfer
if (flags_ & PinnedMemoryAlloced) {
if (cal()->buffer_) {
amd::Coord3D region(owner()->getSize());
result = gpu.blitMgr().copyBuffer(*pinnedMemory_, *this, origin, origin, region, Entire);
} else {
amd::Image& image = *static_cast<amd::Image*>(owner());
result = gpu.blitMgr().copyBufferToImage(*pinnedMemory_, *this, origin, origin,
image.getRegion(), Entire, image.getRowPitch(),
image.getSlicePitch());
}
2014-07-04 16:17:05 -04:00
}
if (!result) {
if (cal()->buffer_) {
amd::Coord3D region(owner()->getSize());
result = gpu.blitMgr().writeBuffer(owner()->getHostMem(), *this, origin, region, Entire);
} else {
amd::Image& image = *static_cast<amd::Image*>(owner());
result = gpu.blitMgr().writeImage(owner()->getHostMem(), *this, origin, image.getRegion(),
image.getRowPitch(), image.getSlicePitch(), Entire);
}
}
2014-07-04 16:17:05 -04:00
//!@todo A wait isn't really necessary. However
//! Linux no-VM may have extra random failures.
wait(gpu);
2014-07-04 16:17:05 -04:00
// Should never fail
assert(result && "Memory synchronization failed!");
}
}
2014-07-04 16:17:05 -04:00
void Memory::syncHostFromCache(device::Memory::SyncFlags syncFlags) {
// Sanity checks
assert(owner() != NULL);
// If host memory doesn't have direct access, then we have to synchronize
if (!isHostMemDirectAccess()) {
bool hasUpdates = true;
// Make sure the parent of subbuffer is up to date
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
device::Memory* m = owner()->parent()->getDeviceMemory(dev());
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
//! since a view is a small portion of parent
device::Memory::SyncFlags syncFlagsTmp;
// Sync parent from a view, so views have to be skipped
syncFlagsTmp.skipViews_ = true;
// Make sure the parent sync is an unique operation.
// If the app uses multiple subbuffers from multiple queues,
// then the parent sync can be called from multiple threads
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
m->syncHostFromCache(syncFlagsTmp);
//! \note Don't do early exit here, since we still have to sync
//! this view, if the parent sync operation was a NOP.
//! If parent was synchronized, then this view sync will be a NOP
}
2014-07-04 16:17:05 -04:00
// Is this a NOP?
if ((NULL == owner()->getLastWriter()) || (version_ == owner()->getVersion())) {
hasUpdates = false;
}
2014-07-04 16:17:05 -04:00
// Update all available views, since we sync the parent
if ((owner()->subBuffers().size() != 0) && (hasUpdates || !syncFlags.skipViews_)) {
device::Memory::SyncFlags syncFlagsTmp;
// Sync views from parent, so parent has to be skipped
syncFlagsTmp.skipParent_ = true;
if (hasUpdates) {
// Parent will be synced so update all views with a skip
syncFlagsTmp.skipEntire_ = true;
} else {
// Passthrough the skip entire flag to the views, since
// any view is a submemory of the parent
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
}
amd::ScopedLock lock(owner()->lockMemoryOps());
for (auto& sub : owner()->subBuffers()) {
//! \note Don't allow subbuffer's allocation in the worker thread.
//! It may cause a system lock, because possible resource
//! destruction, heap reallocation or subbuffer allocation
static const bool AllocSubBuffer = false;
device::Memory* devSub = sub->getDeviceMemory(dev(), AllocSubBuffer);
if (NULL != devSub) {
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
gpuSub->syncHostFromCache(syncFlagsTmp);
2014-07-04 16:17:05 -04:00
}
}
}
2014-07-04 16:17:05 -04:00
// Make sure we didn't have a NOP,
// because CPU was the last writer
if (NULL != owner()->getLastWriter()) {
// Mark parent as up to date, set our version accordingly
version_ = owner()->getVersion();
2014-07-04 16:17:05 -04:00
}
// Exit if sync is a NOP or sync can be skipped
if (!hasUpdates || syncFlags.skipEntire_) {
return;
}
2014-07-04 16:17:05 -04:00
bool result = false;
static const bool Entire = true;
amd::Coord3D origin(0, 0, 0);
2014-07-04 16:17:05 -04:00
// If backing store was pinned then make a transfer
if (flags_ & PinnedMemoryAlloced) {
if (cal()->buffer_) {
amd::Coord3D region(owner()->getSize());
result = dev().xferMgr().copyBuffer(*this, *pinnedMemory_, origin, origin, region, Entire);
} else {
amd::Image& image = *static_cast<amd::Image*>(owner());
result = dev().xferMgr().copyImageToBuffer(*this, *pinnedMemory_, origin, origin,
image.getRegion(), Entire, image.getRowPitch(),
image.getSlicePitch());
}
2014-07-04 16:17:05 -04:00
}
// Just do a basic host read
if (!result) {
if (cal()->buffer_) {
amd::Coord3D region(owner()->getSize());
result = dev().xferMgr().readBuffer(*this, owner()->getHostMem(), origin, region, Entire);
} else {
amd::Image& image = *static_cast<amd::Image*>(owner());
result = dev().xferMgr().readImage(*this, owner()->getHostMem(), origin, image.getRegion(),
image.getRowPitch(), image.getSlicePitch(), Entire);
}
2014-07-04 16:17:05 -04:00
}
// Should never fail
assert(result && "Memory synchronization failed!");
}
}
2014-07-04 16:17:05 -04:00
gpu::Memory* Memory::createBufferView(amd::Memory& subBufferOwner) {
gpu::Memory* viewMemory;
Resource::ViewParams params;
size_t offset = subBufferOwner.getOrigin();
size_t size = subBufferOwner.getSize();
// Create a memory object
viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
if (NULL == viewMemory) {
return NULL;
}
params.owner_ = &subBufferOwner;
params.gpu_ = static_cast<VirtualGPU*>(subBufferOwner.getVirtualDevice());
params.offset_ = offset;
params.size_ = size;
params.resource_ = this;
params.memory_ = this;
if (!viewMemory->create(Resource::View, &params)) {
delete viewMemory;
return NULL;
}
// Explicitly set the host memory location,
// because the parent location could change after reallocation
if (NULL != owner()->getHostMem()) {
subBufferOwner.setHostMem(reinterpret_cast<char*>(owner()->getHostMem()) + offset);
} else {
subBufferOwner.setHostMem(NULL);
}
return viewMemory;
2014-07-04 16:17:05 -04:00
}
void Memory::decIndMapCount() {
// Map/unmap must be serialized
amd::ScopedLock lock(owner()->lockMemoryOps());
2014-07-04 16:17:05 -04:00
if (indirectMapCount_ == 0) {
if (!mipMapped()) {
LogError("decIndMapCount() called when indirectMapCount_ already zero");
2014-07-04 16:17:05 -04:00
}
return;
}
2014-07-04 16:17:05 -04:00
// Decrement the counter and release indirect map if it's the last op
if (--indirectMapCount_ == 0) {
if (NULL != mapMemory_) {
amd::Memory* memory = mapMemory_;
amd::Memory* empty = NULL;
2014-07-04 16:17:05 -04:00
// Get GPU memory
Memory* gpuMemory = mapMemory();
gpuMemory->unmap(NULL);
2014-07-04 16:17:05 -04:00
if (!dev().addMapTarget(memory)) {
memory->release();
}
2014-07-04 16:17:05 -04:00
// Map/unamp is serialized for the same memory object,
// so it's safe to clear the pointer
assert((mapMemory_ != NULL) && "Mapped buffer should be valid");
mapMemory_ = NULL;
2014-07-04 16:17:05 -04:00
}
}
2014-07-04 16:17:05 -04:00
}
// Note - must be called by the device under the async lock, so no spinning
// or long pauses allowed in this function.
void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
size_t* rowPitch, size_t* slicePitch) {
// Sanity checks
assert(owner() != NULL);
// Map/unmap must be serialized
amd::ScopedLock lock(owner()->lockMemoryOps());
address mapAddress = NULL;
size_t offset = origin[0];
// For SVM implementation, we cannot use cached map. if svm space, use the svm host pointer
void* initHostPtr = owner()->getSvmPtr();
if (NULL != initHostPtr) {
owner()->commitSvmMemory();
}
if (owner()->numDevices() > 1) {
if ((NULL == initHostPtr) && (owner()->getHostMem() == NULL)) {
static const bool forceAllocHostMem = true;
if (!owner()->allocHostMemory(NULL, forceAllocHostMem)) {
return NULL;
}
//! \note Ignore pinning result
// bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
}
}
incIndMapCount();
// If host memory exists, use it
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
}
// If resource is a persistent allocation, we can use it directly
else if (isPersistentDirectMap()) {
if (NULL == map(NULL)) {
LogError("Could not map target persistent resource");
decIndMapCount();
return NULL;
}
mapAddress = data();
}
// Otherwise we can use a remote resource:
else {
// Are we in range?
size_t elementCount = cal()->width_;
size_t rSize = elementCount * elementSize();
if (offset >= rSize || offset + region[0] > rSize) {
LogWarning("Memory::allocMapTarget() - offset/size out of bounds");
return NULL;
2014-07-04 16:17:05 -04:00
}
// Allocate a map resource if there isn't any yet
if (indirectMapCount_ == 1) {
const static bool SysMem = true;
bool failed = false;
amd::Memory* memory = NULL;
// Search for a possible indirect resource
cl_mem_flags flag = 0;
bool canBeCached = true;
if (NULL != initHostPtr) {
// make sure the host memory is committed already, or we have a big problem.
assert(owner()->isSvmPtrCommited() && "The host svm memory not committed yet!");
flag = CL_MEM_USE_HOST_PTR;
canBeCached = false;
} else {
memory = dev().findMapTarget(owner()->getSize());
}
if (memory == NULL) {
// for map target of svm buffer , we need use svm host ptr
memory = new (dev().context()) amd::Buffer(dev().context(), flag, owner()->getSize());
Memory* gpuMemory;
do {
if ((memory == NULL) || !memory->create(initHostPtr, SysMem)) {
failed = true;
break;
}
memory->setCacheStatus(canBeCached);
2014-07-04 16:17:05 -04:00
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
2014-07-04 16:17:05 -04:00
// Create, Map and get the base pointer for the resource
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
failed = true;
break;
}
} while (false);
}
2014-07-04 16:17:05 -04:00
if (failed) {
if (memory != NULL) {
memory->release();
}
decIndMapCount();
LogError("Could not map target resource");
return NULL;
}
// Map/unamp is serialized for the same memory object,
// so it's safe to assign the new pointer
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
mapMemory_ = memory;
} else {
// Did the map resource allocation fail?
if (mapMemory_ == NULL) {
LogError("Could not map target resource");
return NULL;
}
2014-07-04 16:17:05 -04:00
}
mapAddress = mapMemory()->data();
}
2014-07-04 16:17:05 -04:00
return mapAddress + offset;
}
2014-07-04 16:17:05 -04:00
bool Memory::pinSystemMemory(void* hostPtr, size_t size) {
bool result = false;
2014-07-04 16:17:05 -04:00
// If memory has a direct access already, then skip the host memory pinning
if (isHostMemDirectAccess()) {
return true;
}
2014-07-04 16:17:05 -04:00
// Check if memory is pinned already
if (flags_ & PinnedMemoryAlloced) {
return true;
}
// Allocate memory for the pinned object
pinnedMemory_ = new Memory(dev(), size);
if (pinnedMemory_ == NULL) {
return false;
}
// Check if it's a view
if (flags_ & SubMemoryObject) {
const gpu::Memory* gpuMemory;
if (owner() != NULL) {
gpuMemory = dev().getGpuMemory(owner()->parent());
} else {
gpuMemory = parent();
2014-07-04 16:17:05 -04:00
}
if (gpuMemory->flags_ & PinnedMemoryAlloced) {
Resource::ViewParams params;
params.owner_ = owner();
params.offset_ = owner()->getOrigin();
params.size_ = owner()->getSize();
params.resource_ = gpuMemory->pinnedMemory_;
params.memory_ = NULL;
result = pinnedMemory_->create(Resource::View, &params);
2014-07-04 16:17:05 -04:00
}
} else {
Resource::PinnedParams params;
// Fill resource creation parameters
params.owner_ = owner();
params.hostMemRef_ = owner()->getHostMemRef();
params.size_ = size;
// Create resource
result = pinnedMemory_->create(Resource::Pinned, &params);
}
if (!result) {
delete pinnedMemory_;
pinnedMemory_ = NULL;
return false;
}
2014-07-04 16:17:05 -04:00
flags_ |= PinnedMemoryAlloced;
return true;
2014-07-04 16:17:05 -04:00
}
void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, uint numLayers,
size_t* rowPitch, size_t* slicePitch) {
uint resFlags = 0;
if (flags == Memory::CpuReadOnly) {
resFlags = Resource::ReadOnly;
} else if (flags == Memory::CpuWriteOnly) {
resFlags = Resource::WriteOnly;
}
void* ptr = map(&static_cast<VirtualGPU&>(vDev), resFlags, startLayer, numLayers);
if (!cal()->buffer_) {
*rowPitch = cal()->pitch_ * elementSize();
*slicePitch = cal()->slice_ * elementSize();
}
return ptr;
2014-07-04 16:17:05 -04:00
}
void Memory::cpuUnmap(device::VirtualDevice& vDev) { unmap(&static_cast<VirtualGPU&>(vDev)); }
2014-07-04 16:17:05 -04:00
Memory* Memory::mapMemory() const {
Memory* map = NULL;
if (NULL != mapMemory_) {
map = reinterpret_cast<Memory*>(mapMemory_->getDeviceMemory(dev()));
}
return map;
2014-07-04 16:17:05 -04:00
}
void Memory::mgpuCacheWriteBack() {
// Lock memory object, so only one write back can occur
amd::ScopedLock lock(owner()->lockMemoryOps());
// Attempt to allocate a staging buffer if don't have any
if (owner()->getHostMem() == NULL) {
if (nullptr != owner()->getSvmPtr()) {
owner()->commitSvmMemory();
owner()->setHostMem(owner()->getSvmPtr());
} else {
static const bool forceAllocHostMem = true;
owner()->allocHostMemory(nullptr, forceAllocHostMem);
2014-07-04 16:17:05 -04:00
}
}
// Make synchronization
if (owner()->getHostMem() != NULL) {
//! \note Ignore pinning result
bool ok = pinSystemMemory(owner()->getHostMem(), owner()->getSize());
owner()->cacheWriteBack();
}
2014-07-04 16:17:05 -04:00
}
Memory* Buffer::createBufferView(amd::Memory& subBufferOwner) const {
gpu::Memory* subBuffer;
Resource::ViewParams params;
2014-07-04 16:17:05 -04:00
size_t offset = subBufferOwner.getOrigin();
size_t size = subBufferOwner.getSize();
2014-07-04 16:17:05 -04:00
// Create a memory object
subBuffer = new gpu::Buffer(dev(), subBufferOwner, size);
if (NULL == subBuffer) {
return NULL;
}
2014-07-04 16:17:05 -04:00
// Allocate a view for this buffer object
params.owner_ = &subBufferOwner;
params.offset_ = offset;
params.size_ = size;
params.resource_ = this;
params.memory_ = this;
2014-07-04 16:17:05 -04:00
if (!subBuffer->create(Resource::View, &params)) {
delete subBuffer;
return NULL;
}
2014-07-04 16:17:05 -04:00
return subBuffer;
2014-07-04 16:17:05 -04:00
}
void* Image::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& region, uint mapFlags,
size_t* rowPitch, size_t* slicePitch) {
// Sanity checks
assert(owner() != NULL);
bool useRemoteResource = true;
size_t slicePitchTmp = 0;
size_t height = cal()->height_;
size_t depth = cal()->depth_;
// Map/unmap must be serialized
amd::ScopedLock lock(owner()->lockMemoryOps());
address mapAddress = NULL;
size_t offset = origin[0];
incIndMapCount();
// If host memory exists, use it
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
useRemoteResource = false;
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
amd::Image* amdImage = owner()->asImage();
// Calculate the offset in bytes
offset *= elementSize();
// Update the row and slice pitches value
*rowPitch =
(amdImage->getRowPitch() == 0) ? (cal()->width_ * elementSize()) : amdImage->getRowPitch();
slicePitchTmp =
(amdImage->getSlicePitch() == 0) ? (height * (*rowPitch)) : amdImage->getSlicePitch();
// Adjust the offset in Y and Z dimensions
offset += origin[1] * (*rowPitch);
offset += origin[2] * slicePitchTmp;
}
// If resource is a persistent allocation, we can use it directly
//! @note Even if resource is a persistent allocation,
//! runtime can't use it directly,
//! because CAL volume map doesn't work properly.
//! @todo arrays can be added for persistent lock with some CAL changes
else if (isPersistentDirectMap()) {
if (NULL == map(NULL)) {
useRemoteResource = true;
LogError("Could not map target persistent resource, try remote resource");
} else {
useRemoteResource = false;
mapAddress = data();
// Calculate the offset in bytes
offset *= elementSize();
// Update the row pitch value
*rowPitch = cal()->pitch_ * elementSize();
// Adjust the offset in Y dimension
offset += origin[1] * (*rowPitch);
2014-07-04 16:17:05 -04:00
}
}
// Otherwise we can use a remote resource:
if (useRemoteResource) {
// Calculate X offset in bytes
offset *= elementSize();
// Allocate a map resource if there isn't any yet
if (indirectMapCount_ == 1) {
const static bool SysMem = true;
bool failed = false;
amd::Memory* memory;
// Search for a possible indirect resource
memory = dev().findMapTarget(owner()->getSize());
if (memory == NULL) {
// Allocate a new buffer to use as the map target
//! @note Allocate a 1D buffer, since CAL issues with 3D
//! Also HW doesn't support untiled images
memory = new (dev().context())
amd::Buffer(dev().context(), 0, cal()->width_ * height * depth * elementSize());
memory->setVirtualDevice(owner()->getVirtualDevice());
Memory* gpuMemory;
do {
if ((memory == NULL) || !memory->create(NULL, SysMem)) {
failed = true;
break;
}
2014-07-04 16:17:05 -04:00
gpuMemory = reinterpret_cast<Memory*>(memory->getDeviceMemory(dev()));
2014-07-04 16:17:05 -04:00
// Create, Map and get the base pointer for the resource
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
failed = true;
break;
}
} while (false);
}
2014-07-04 16:17:05 -04:00
if (failed) {
if (memory != NULL) {
memory->release();
2014-07-04 16:17:05 -04:00
}
decIndMapCount();
LogError("Could not map target resource");
return NULL;
}
// Map/unamp is serialized for the same memory object,
// so it's safe to assign the new pointer
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
mapMemory_ = memory;
} else {
// Did the map resource allocation fail?
if (mapMemory_ == NULL) {
LogError("Could not map target resource");
return NULL;
}
2014-07-04 16:17:05 -04:00
}
mapAddress = mapMemory()->data();
2014-07-04 16:17:05 -04:00
// Update the row and slice pitches value
*rowPitch = region[0] * elementSize();
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
slicePitchTmp = *rowPitch;
} else {
slicePitchTmp = *rowPitch * region[1];
2014-07-04 16:17:05 -04:00
}
// Use start of the indirect buffer
offset = 0;
}
2014-07-04 16:17:05 -04:00
if (slicePitch != NULL) {
*slicePitch = slicePitchTmp;
}
2014-07-04 16:17:05 -04:00
return mapAddress + offset;
2014-07-04 16:17:05 -04:00
}
} // namespace gpu