6a5cdbf60c
SWDEV-93075 - [OCL] Access violation in clCreateContext() in amdocl.dll when DX9 and DX11 devices are used. Add support for multiple external devices for context creation to make sure the devices are initialized with proper type. Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#50 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#19 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#95 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#274 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#547 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#160 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#128 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.cpp#62 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsadevice.hpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#41 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#25 edit
1316 lignes
41 KiB
C++
1316 lignes
41 KiB
C++
//! Implementation of GPU device memory management
|
|
|
|
#include "top.hpp"
|
|
#include "thread/thread.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/device.hpp"
|
|
#include "device/gpu/gpudevice.hpp"
|
|
#include "device/gpu/gpublit.hpp"
|
|
|
|
#ifdef _WIN32
|
|
#include <d3d10_1.h>
|
|
#include "amdocl/cl_d3d9_amd.hpp"
|
|
#include "amdocl/cl_d3d10_amd.hpp"
|
|
#include "amdocl/cl_d3d11_amd.hpp"
|
|
#endif //_WIN32
|
|
#include "amdocl/cl_gl_amd.hpp"
|
|
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
|
|
//! Turn this on to enable sanity checks before and after every heap operation.
|
|
#if DEBUG
|
|
#define EXTRA_HEAP_CHECKS 1
|
|
#endif // DEBUG
|
|
|
|
namespace gpu {
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
amd::Memory& owner,
|
|
size_t size)
|
|
: device::Memory(owner)
|
|
, Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType)
|
|
{
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
size_t size)
|
|
: device::Memory(size)
|
|
, Resource(gpuDev,
|
|
amd::alignUp(size, Device::Heap::ElementSize) /
|
|
Device::Heap::ElementSize, Device::Heap::ElementType)
|
|
{
|
|
init();
|
|
}
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
amd::Memory& owner,
|
|
size_t width,
|
|
cmSurfFmt format
|
|
)
|
|
: device::Memory(owner)
|
|
, Resource(gpuDev, width, format)
|
|
{
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
size_t size,
|
|
size_t width,
|
|
cmSurfFmt format
|
|
)
|
|
: device::Memory(size)
|
|
, Resource(gpuDev, width, format)
|
|
{
|
|
init();
|
|
}
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
amd::Memory& owner,
|
|
size_t width,
|
|
size_t height,
|
|
size_t depth,
|
|
cmSurfFmt format,
|
|
gslChannelOrder chOrder,
|
|
cl_mem_object_type imageType,
|
|
uint mipLevels
|
|
)
|
|
: device::Memory(owner)
|
|
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
|
|
{
|
|
init();
|
|
|
|
if (owner.parent() != NULL) {
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
}
|
|
|
|
Memory::Memory(
|
|
const Device& gpuDev,
|
|
size_t size,
|
|
size_t width,
|
|
size_t height,
|
|
size_t depth,
|
|
cmSurfFmt format,
|
|
gslChannelOrder chOrder,
|
|
cl_mem_object_type imageType,
|
|
uint mipLevels
|
|
)
|
|
: device::Memory(size)
|
|
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
|
|
{
|
|
init();
|
|
}
|
|
|
|
void
|
|
Memory::init()
|
|
{
|
|
indirectMapCount_ = 0;
|
|
interopType_ = InteropNone;
|
|
interopMemory_ = NULL;
|
|
pinnedMemory_ = NULL;
|
|
parent_ = NULL;
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
static HANDLE
|
|
getSharedHandle(IUnknown* pIface)
|
|
{
|
|
// Sanity checks
|
|
assert(pIface != NULL);
|
|
|
|
HRESULT hRes;
|
|
HANDLE hShared;
|
|
IDXGIResource* pDxgiRes = NULL;
|
|
if((hRes = (const_cast<IUnknown*>(pIface))->QueryInterface(
|
|
__uuidof(IDXGIResource),
|
|
(void**) &pDxgiRes)) != S_OK) {
|
|
return (HANDLE) 0;
|
|
}
|
|
if(!pDxgiRes) {
|
|
return (HANDLE) 0;
|
|
}
|
|
hRes = pDxgiRes->GetSharedHandle(&hShared);
|
|
pDxgiRes->Release();
|
|
if(hRes != S_OK) {
|
|
return (HANDLE) 0;
|
|
}
|
|
return hShared;
|
|
}
|
|
#endif //_WIN32
|
|
|
|
bool
|
|
Memory::create(
|
|
Resource::MemoryType memType,
|
|
Resource::CreateParams* params)
|
|
{
|
|
bool result;
|
|
|
|
// Reset the flag in case we reallocate the heap in local/remote
|
|
flags_ &= ~HostMemoryDirectAccess;
|
|
|
|
// Create a resource in CAL
|
|
result = Resource::create(memType, params);
|
|
|
|
// Check if CAL created a resource
|
|
if (result) {
|
|
switch (memoryType()) {
|
|
case Resource::Persistent:
|
|
case Resource::Pinned:
|
|
case Resource::ExternalPhysical:
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
break;
|
|
case Resource::Remote:
|
|
case Resource::RemoteUSWC:
|
|
if (!cal()->tiled_) {
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
break;
|
|
case Resource::View: {
|
|
Resource::ViewParams* view =
|
|
reinterpret_cast<Resource::ViewParams*>(params);
|
|
if (view->resource_->memoryType() == Resource::Persistent) {
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
// Check if parent was allocated in system memory
|
|
if ((view->resource_->memoryType() == Resource::Pinned) ||
|
|
(((view->resource_->memoryType() == Resource::Remote) ||
|
|
(view->resource_->memoryType() == Resource::RemoteUSWC)) &&
|
|
// @todo Enable unconditional optimization for remote memory
|
|
// Check for external allocation, to avoid the optimization
|
|
// for non-VM (double copy) mode
|
|
(owner() != NULL) &&
|
|
((owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
|
|
dev().settings().remoteAlloc_))) {
|
|
// Marks memory object for direct GPU access to the host memory
|
|
flags_ |= HostMemoryDirectAccess;
|
|
}
|
|
if ((view->owner_ != NULL) && (view->owner_->parent() != NULL)) {
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject;
|
|
}
|
|
break;
|
|
}
|
|
case Resource::ImageView: {
|
|
Resource::ImageViewParams* view =
|
|
reinterpret_cast<Resource::ImageViewParams*>(params);
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
break;
|
|
}
|
|
case Resource::ImageBuffer: {
|
|
Resource::ImageBufferParams* view =
|
|
reinterpret_cast<Resource::ImageBufferParams*>(params);
|
|
parent_ = reinterpret_cast<const Memory*>(view->memory_);
|
|
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool Memory::processGLResource(GLResourceOP operation)
|
|
{
|
|
bool retVal = false;
|
|
switch (operation)
|
|
{
|
|
case GLDecompressResource:
|
|
retVal = gslGLAcquire();
|
|
break;
|
|
case GLInvalidateFBO:
|
|
retVal = gslGLRelease();
|
|
break;
|
|
default:
|
|
assert(false && "unknown GLResourceOP");
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
Memory::createInterop(InteropType type)
|
|
{
|
|
Resource::MemoryType memType = Resource::Empty;
|
|
Resource::OGLInteropParams oglRes;
|
|
#ifdef _WIN32
|
|
Resource::D3DInteropParams d3dRes;
|
|
#endif //_WIN32
|
|
|
|
// Only external objects support interop
|
|
assert(owner() != NULL);
|
|
|
|
Resource::CreateParams* createParams = NULL;
|
|
|
|
amd::InteropObject* interop = owner()->getInteropObj();
|
|
assert((interop != NULL) && "An invalid interop object is impossible!");
|
|
|
|
amd::GLObject* glObject = interop->asGLObject();
|
|
|
|
#ifdef _WIN32
|
|
amd::D3D10Object* d3d10Object = interop->asD3D10Object();
|
|
amd::D3D11Object* d3d11Object = interop->asD3D11Object();
|
|
amd::D3D9Object* d3d9Object = interop->asD3D9Object();
|
|
|
|
if (d3d10Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D10ObjDesc_t* objDesc = d3d10Object->getObjDesc();
|
|
|
|
memType = Resource::D3D10Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ =
|
|
getSharedHandle(d3d10Object->getD3D10Resource()))) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>
|
|
(d3d10Object->getD3D10Resource());
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
}
|
|
|
|
d3dRes.misc = 0;
|
|
// Find D3D10 object type
|
|
switch (objDesc->objDim_) {
|
|
case D3D10_RESOURCE_DIMENSION_BUFFER:
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE1D:
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE2D:
|
|
case D3D10_RESOURCE_DIMENSION_TEXTURE3D:
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
if (objDesc->mipLevels_ > 1) {
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
d3dRes.layer_ = d3d10Object->getSubresource() /
|
|
objDesc->mipLevels_;
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource() %
|
|
objDesc->mipLevels_;
|
|
}
|
|
else {
|
|
d3dRes.layer_ = 0;
|
|
d3dRes.mipLevel_ = d3d10Object->getSubresource();
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
}
|
|
else if (d3d11Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D11ObjDesc_t* objDesc = d3d11Object->getObjDesc();
|
|
|
|
memType = Resource::D3D11Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ =
|
|
getSharedHandle(d3d11Object->getD3D11Resource()))) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>
|
|
(d3d11Object->getD3D11Resource());
|
|
d3dRes.type_ = Resource::InteropTypeless;
|
|
}
|
|
|
|
d3dRes.misc = 0;
|
|
// Find D3D11 object type
|
|
switch (objDesc->objDim_) {
|
|
case D3D11_RESOURCE_DIMENSION_BUFFER:
|
|
d3dRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE1D:
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
|
|
case D3D11_RESOURCE_DIMENSION_TEXTURE3D:
|
|
d3dRes.type_ = Resource::InteropTexture;
|
|
d3dRes.layer_= d3d11Object->getPlane();
|
|
d3dRes.misc = d3d11Object->getMiscFlag();
|
|
if (objDesc->mipLevels_ > 1) {
|
|
d3dRes.type_ = Resource::InteropTextureViewLevel;
|
|
|
|
if (objDesc->arraySize_ > 1) {
|
|
d3dRes.layer_ = d3d11Object->getSubresource() /
|
|
objDesc->mipLevels_;
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource() %
|
|
objDesc->mipLevels_;
|
|
}
|
|
else {
|
|
d3dRes.layer_ = 0;
|
|
d3dRes.mipLevel_ = d3d11Object->getSubresource();
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
}
|
|
else if (d3d9Object != NULL) {
|
|
createParams = &d3dRes;
|
|
|
|
d3dRes.owner_ = owner();
|
|
|
|
const amd::D3D9ObjDesc_t* objDesc = d3d9Object->getObjDesc();
|
|
|
|
memType = Resource::D3D9Interop;
|
|
|
|
// Get shared handle
|
|
if ((d3dRes.handle_ = d3d9Object->getD3D9SharedHandle())) {
|
|
d3dRes.iDirect3D_ = static_cast<void*>
|
|
(d3d9Object->getD3D9Resource());
|
|
d3dRes.type_ = Resource::InteropSurface;
|
|
d3dRes.mipLevel_ = 0;
|
|
d3dRes.layer_ = d3d9Object->getPlane();
|
|
d3dRes.misc = d3d9Object->getMiscFlag();
|
|
}
|
|
}
|
|
else
|
|
#endif //_WIN32
|
|
if (glObject != NULL) {
|
|
createParams = &oglRes;
|
|
|
|
oglRes.owner_ = owner();
|
|
|
|
memType = Resource::OGLInterop;
|
|
|
|
// Fill the interop creation parameters
|
|
oglRes.handle_ = static_cast<CALuint>(glObject->getGLName());
|
|
|
|
// Find OGL object type
|
|
switch (glObject->getCLGLObjectType()) {
|
|
case CL_GL_OBJECT_BUFFER:
|
|
oglRes.type_ = Resource::InteropVertexBuffer;
|
|
break;
|
|
case CL_GL_OBJECT_TEXTURE_BUFFER:
|
|
case CL_GL_OBJECT_TEXTURE1D:
|
|
case CL_GL_OBJECT_TEXTURE1D_ARRAY:
|
|
case CL_GL_OBJECT_TEXTURE2D:
|
|
case CL_GL_OBJECT_TEXTURE2D_ARRAY:
|
|
case CL_GL_OBJECT_TEXTURE3D:
|
|
oglRes.type_ = Resource::InteropTexture;
|
|
if (GL_TEXTURE_CUBE_MAP == glObject->getGLTarget()) {
|
|
switch (glObject->getCubemapFace()) {
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
|
|
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
|
|
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
|
|
oglRes.type_ = Resource::InteropTextureViewCube;
|
|
oglRes.layer_ =
|
|
glObject->getCubemapFace() - GL_TEXTURE_CUBE_MAP_POSITIVE_X;
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if (glObject->getGLMipLevel() != 0) {
|
|
oglRes.type_ = Resource::InteropTextureViewLevel;
|
|
oglRes.layer_ = 0;
|
|
oglRes.mipLevel_ = glObject->getGLMipLevel();
|
|
}
|
|
break;
|
|
case CL_GL_OBJECT_RENDERBUFFER:
|
|
oglRes.type_ = Resource::InteropRenderBuffer;
|
|
break;
|
|
default:
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
oglRes.glPlatformContext_ = owner()->getContext().info().hCtx_;
|
|
oglRes.glDeviceContext_ = owner()->getContext().info().hDev_[amd::Context::DeviceFlagIdx::GLDeviceKhrIdx];
|
|
// We dont pass any flags here for the GL Resource.
|
|
oglRes.flags_ = 0;
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
|
|
// Get the interop settings
|
|
if (type == InteropDirectAccess) {
|
|
// Create memory object
|
|
if (!create(memType, createParams)) {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
// Allocate Resource object for interop as buffer
|
|
interopMemory_ = new Memory(dev(), size(),
|
|
amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
|
|
Device::Heap::ElementType);
|
|
|
|
// Create the interop object in CAL
|
|
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
|
|
delete interopMemory_;
|
|
interopMemory_ = NULL;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
setInteropType(type);
|
|
|
|
return true;
|
|
}
|
|
|
|
Memory::~Memory()
|
|
{
|
|
// Clean VA cache
|
|
dev().removeVACache(this);
|
|
|
|
delete interopMemory_;
|
|
|
|
// Release associated map target, if any
|
|
if (NULL != mapMemory_) {
|
|
mapMemory()->unmap(NULL);
|
|
mapMemory_->release();
|
|
}
|
|
|
|
// Destory pinned memory
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
delete pinnedMemory_;
|
|
}
|
|
|
|
if ((owner() != NULL) && isHostMemDirectAccess() &&
|
|
!(flags_ & SubMemoryObject) &&
|
|
(memoryType() != Resource::ExternalPhysical)) {
|
|
// Unmap memory if direct access was requested
|
|
unmap(NULL);
|
|
}
|
|
}
|
|
|
|
void
|
|
Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
|
|
{
|
|
// If the last writer was another GPU, then make a writeback
|
|
if (!isHostMemDirectAccess() &&
|
|
(owner()->getLastWriter() != NULL) &&
|
|
(&dev() != owner()->getLastWriter())) {
|
|
mgpuCacheWriteBack();
|
|
}
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
if (!isHostMemDirectAccess() && (NULL != owner()->getHostMem())) {
|
|
bool hasUpdates = true;
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
gpu::Memory* gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
//! since a view is a small portion of parent
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
// then the parent sync can be called from multiple threads
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
gpuMemory->syncCacheFromHost(gpu, syncFlagsTmp);
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
//! this view, if the parent sync operation was a NOP.
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
|
}
|
|
|
|
// Is this a NOP?
|
|
if ((version_ == owner()->getVersion()) ||
|
|
(&dev() == owner()->getLastWriter())) {
|
|
hasUpdates = false;
|
|
}
|
|
|
|
// Update all available views, since we sync the parent
|
|
if ((owner()->subBuffers().size() != 0) &&
|
|
(hasUpdates || !syncFlags.skipViews_)) {
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
if (hasUpdates) {
|
|
// Parent will be synced so update all views with a skip
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
}
|
|
else {
|
|
// Passthrough the skip entire flag to the views, since
|
|
// any view is a submemory of the parent
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
}
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
//! It may cause a system lock, because possible resource
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
static const bool AllocSubBuffer = false;
|
|
device::Memory* devSub =
|
|
sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
if (NULL != devSub) {
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
gpuSub->syncCacheFromHost(gpu, syncFlagsTmp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure we didn't have a NOP,
|
|
// because this GPU device was the last writer
|
|
if (&dev() != owner()->getLastWriter()) {
|
|
// Update the latest version
|
|
version_ = owner()->getVersion();
|
|
}
|
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
return;
|
|
}
|
|
|
|
bool result = false;
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
|
|
// If host memory was pinned then make a transfer
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = gpu.blitMgr().copyBuffer(*pinnedMemory_,
|
|
*this, origin, origin, region, Entire);
|
|
}
|
|
else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = gpu.blitMgr().copyBufferToImage(*pinnedMemory_,
|
|
*this, origin, origin, image.getRegion(), Entire,
|
|
image.getRowPitch(), image.getSlicePitch());
|
|
}
|
|
}
|
|
|
|
if (!result) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = gpu.blitMgr().writeBuffer(owner()->getHostMem(),
|
|
*this, origin, region, Entire);
|
|
}
|
|
else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = gpu.blitMgr().writeImage(owner()->getHostMem(),
|
|
*this, origin, image.getRegion(),
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
}
|
|
}
|
|
|
|
//!@todo A wait isn't really necessary. However
|
|
//! Linux no-VM may have extra random failures.
|
|
wait(gpu);
|
|
|
|
// Should never fail
|
|
assert(result && "Memory synchronization failed!");
|
|
}
|
|
}
|
|
|
|
void
|
|
Memory::syncHostFromCache(device::Memory::SyncFlags syncFlags)
|
|
{
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
|
|
// If host memory doesn't have direct access, then we have to synchronize
|
|
if (!isHostMemDirectAccess()) {
|
|
bool hasUpdates = true;
|
|
|
|
// Make sure the parent of subbuffer is up to date
|
|
if (!syncFlags.skipParent_ && (flags_ & SubMemoryObject)) {
|
|
device::Memory* m = owner()->parent()->getDeviceMemory(dev());
|
|
|
|
//! \note: Skipping the sync for a view doesn't reflect the parent settings,
|
|
//! since a view is a small portion of parent
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync parent from a view, so views have to be skipped
|
|
syncFlagsTmp.skipViews_ = true;
|
|
|
|
// Make sure the parent sync is an unique operation.
|
|
// If the app uses multiple subbuffers from multiple queues,
|
|
// then the parent sync can be called from multiple threads
|
|
amd::ScopedLock lock(owner()->parent()->lockMemoryOps());
|
|
m->syncHostFromCache(syncFlagsTmp);
|
|
//! \note Don't do early exit here, since we still have to sync
|
|
//! this view, if the parent sync operation was a NOP.
|
|
//! If parent was synchronized, then this view sync will be a NOP
|
|
}
|
|
|
|
// Is this a NOP?
|
|
if ((NULL == owner()->getLastWriter()) ||
|
|
(version_ == owner()->getVersion())) {
|
|
hasUpdates = false;
|
|
}
|
|
|
|
// Update all available views, since we sync the parent
|
|
if ((owner()->subBuffers().size() != 0) &&
|
|
(hasUpdates || !syncFlags.skipViews_)) {
|
|
device::Memory::SyncFlags syncFlagsTmp;
|
|
|
|
// Sync views from parent, so parent has to be skipped
|
|
syncFlagsTmp.skipParent_ = true;
|
|
|
|
if (hasUpdates) {
|
|
// Parent will be synced so update all views with a skip
|
|
syncFlagsTmp.skipEntire_ = true;
|
|
}
|
|
else {
|
|
// Passthrough the skip entire flag to the views, since
|
|
// any view is a submemory of the parent
|
|
syncFlagsTmp.skipEntire_ = syncFlags.skipEntire_;
|
|
}
|
|
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
for (auto& sub : owner()->subBuffers()) {
|
|
//! \note Don't allow subbuffer's allocation in the worker thread.
|
|
//! It may cause a system lock, because possible resource
|
|
//! destruction, heap reallocation or subbuffer allocation
|
|
static const bool AllocSubBuffer = false;
|
|
device::Memory* devSub =
|
|
sub->getDeviceMemory(dev(), AllocSubBuffer);
|
|
if (NULL != devSub) {
|
|
gpu::Memory* gpuSub = reinterpret_cast<gpu::Memory*>(devSub);
|
|
gpuSub->syncHostFromCache(syncFlagsTmp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure we didn't have a NOP,
|
|
// because CPU was the last writer
|
|
if (NULL != owner()->getLastWriter()) {
|
|
// Mark parent as up to date, set our version accordingly
|
|
version_ = owner()->getVersion();
|
|
}
|
|
|
|
// Exit if sync is a NOP or sync can be skipped
|
|
if (!hasUpdates || syncFlags.skipEntire_) {
|
|
return;
|
|
}
|
|
|
|
bool result = false;
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
|
|
// If backing store was pinned then make a transfer
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = dev().xferMgr().copyBuffer(*this,
|
|
*pinnedMemory_, origin, origin, region, Entire);
|
|
}
|
|
else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = dev().xferMgr().copyImageToBuffer(*this,
|
|
*pinnedMemory_, origin, origin, image.getRegion(), Entire,
|
|
image.getRowPitch(), image.getSlicePitch());
|
|
}
|
|
}
|
|
|
|
// Just do a basic host read
|
|
if (!result) {
|
|
if (cal()->buffer_) {
|
|
amd::Coord3D region(owner()->getSize());
|
|
result = dev().xferMgr().readBuffer(*this,
|
|
owner()->getHostMem(), origin, region, Entire);
|
|
}
|
|
else {
|
|
amd::Image& image = *static_cast<amd::Image*>(owner());
|
|
result = dev().xferMgr().readImage(*this,
|
|
owner()->getHostMem(), origin, image.getRegion(),
|
|
image.getRowPitch(), image.getSlicePitch(), Entire);
|
|
}
|
|
}
|
|
|
|
// Should never fail
|
|
assert(result && "Memory synchronization failed!");
|
|
}
|
|
}
|
|
|
|
gpu::Memory*
|
|
Memory::createBufferView(amd::Memory& subBufferOwner)
|
|
{
|
|
gpu::Memory* viewMemory;
|
|
Resource::ViewParams params;
|
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
size_t size = subBufferOwner.getSize();
|
|
|
|
// Create a memory object
|
|
viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
|
|
if (NULL == viewMemory) {
|
|
return NULL;
|
|
}
|
|
|
|
params.owner_ = &subBufferOwner;
|
|
params.gpu_ = static_cast<VirtualGPU*>(subBufferOwner.getVirtualDevice());
|
|
params.offset_ = offset;
|
|
params.size_ = size;
|
|
params.resource_ = this;
|
|
params.memory_ = this;
|
|
if (!viewMemory->create(Resource::View, ¶ms)) {
|
|
delete viewMemory;
|
|
return NULL;
|
|
}
|
|
|
|
// Explicitly set the host memory location,
|
|
// because the parent location could change after reallocation
|
|
if (NULL != owner()->getHostMem()) {
|
|
subBufferOwner.setHostMem(
|
|
reinterpret_cast<char*>(owner()->getHostMem()) + offset);
|
|
}
|
|
else {
|
|
subBufferOwner.setHostMem(NULL);
|
|
}
|
|
|
|
return viewMemory;
|
|
}
|
|
|
|
void
|
|
Memory::decIndMapCount()
|
|
{
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
if (indirectMapCount_ == 0) {
|
|
if (!mipMapped()) {
|
|
LogError("decIndMapCount() called when indirectMapCount_ already zero");
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Decrement the counter and release indirect map if it's the last op
|
|
if (--indirectMapCount_ == 0) {
|
|
if (NULL != mapMemory_) {
|
|
amd::Memory* memory = mapMemory_;
|
|
amd::Memory* empty = NULL;
|
|
|
|
// Get GPU memory
|
|
Memory* gpuMemory = mapMemory();
|
|
gpuMemory->unmap(NULL);
|
|
|
|
if (!dev().addMapTarget(memory)) {
|
|
memory->release();
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to clear the pointer
|
|
assert((mapMemory_ != NULL) && "Mapped buffer should be valid");
|
|
mapMemory_ = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note - must be called by the device under the async lock, so no spinning
|
|
// or long pauses allowed in this function.
|
|
void*
|
|
Memory::allocMapTarget(
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& region,
|
|
uint mapFlags,
|
|
size_t* rowPitch,
|
|
size_t* slicePitch)
|
|
{
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
address mapAddress = NULL;
|
|
size_t offset = origin[0];
|
|
|
|
//For SVM implementation, we cannot use cached map. if svm space, use the svm host pointer
|
|
void *initHostPtr = owner()->getSvmPtr();
|
|
if (NULL != initHostPtr) {
|
|
owner()->commitSvmMemory();
|
|
}
|
|
|
|
if (owner()->numDevices() > 1) {
|
|
if ((NULL == initHostPtr) && (owner()->getHostMem() == NULL)) {
|
|
static const bool forceAllocHostMem = true;
|
|
if (!owner()->allocHostMemory(NULL, forceAllocHostMem)) {
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
incIndMapCount();
|
|
// If host memory exists, use it
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
}
|
|
// If resource is a persistent allocation, we can use it directly
|
|
else if (isPersistentDirectMap()) {
|
|
if (NULL == map(NULL)) {
|
|
LogError("Could not map target persistent resource");
|
|
decIndMapCount();
|
|
return NULL;
|
|
}
|
|
mapAddress = data();
|
|
}
|
|
// Otherwise we can use a remote resource:
|
|
else {
|
|
// Are we in range?
|
|
size_t elementCount = cal()->width_;
|
|
size_t rSize = elementCount * elementSize();
|
|
if (offset >= rSize || offset + region[0] > rSize) {
|
|
LogWarning("Memory::allocMapTarget() - offset/size out of bounds");
|
|
return NULL;
|
|
}
|
|
|
|
// Allocate a map resource if there isn't any yet
|
|
if (indirectMapCount_ == 1) {
|
|
const static bool SysMem = true;
|
|
bool failed = false;
|
|
amd::Memory* memory = NULL;
|
|
// Search for a possible indirect resource
|
|
cl_mem_flags flag = 0;
|
|
bool canBeCached = true;
|
|
if (NULL != initHostPtr) {
|
|
//make sure the host memory is committed already, or we have a big problem.
|
|
assert(owner()->isSvmPtrCommited() && "The host svm memory not committed yet!");
|
|
flag = CL_MEM_USE_HOST_PTR;
|
|
canBeCached = false;
|
|
}
|
|
else {
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
}
|
|
|
|
if (memory == NULL) {
|
|
// for map target of svm buffer , we need use svm host ptr
|
|
memory = new(dev().context())
|
|
amd::Buffer(dev().context(), flag, owner()->getSize());
|
|
Memory* gpuMemory;
|
|
|
|
do {
|
|
if ((memory == NULL) || !memory->create(initHostPtr, SysMem)) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
memory->setCacheStatus(canBeCached);
|
|
|
|
gpuMemory = reinterpret_cast<Memory*>
|
|
(memory->getDeviceMemory(dev()));
|
|
|
|
// Create, Map and get the base pointer for the resource
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
while (false);
|
|
}
|
|
|
|
if (failed) {
|
|
if (memory != NULL) {
|
|
memory->release();
|
|
}
|
|
decIndMapCount();
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to assign the new pointer
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
mapMemory_ = memory;
|
|
}
|
|
else {
|
|
// Did the map resource allocation fail?
|
|
if (mapMemory_ == NULL) {
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
}
|
|
mapAddress = mapMemory()->data();
|
|
}
|
|
|
|
return mapAddress + offset;
|
|
}
|
|
|
|
bool
|
|
Memory::pinSystemMemory(void* hostPtr, size_t size)
|
|
{
|
|
bool result = false;
|
|
|
|
// If memory has a direct access already, then skip the host memory pinning
|
|
if (isHostMemDirectAccess()) {
|
|
return true;
|
|
}
|
|
|
|
// Destroy the old pinned memory if it was already allocated
|
|
if (flags_ & PinnedMemoryAlloced) {
|
|
delete pinnedMemory_;
|
|
flags_ &= ~PinnedMemoryAlloced;
|
|
}
|
|
|
|
// Allocate memory for the pinned object
|
|
pinnedMemory_ = new Memory(dev(), size);
|
|
|
|
if (pinnedMemory_ == NULL) {
|
|
return false;
|
|
}
|
|
|
|
// Check if it's a view
|
|
if (flags_ & SubMemoryObject) {
|
|
const gpu::Memory* gpuMemory;
|
|
if (owner() != NULL) {
|
|
gpuMemory = dev().getGpuMemory(owner()->parent());
|
|
}
|
|
else {
|
|
gpuMemory = parent();
|
|
}
|
|
|
|
if (gpuMemory->flags_ & PinnedMemoryAlloced) {
|
|
Resource::ViewParams params;
|
|
params.owner_ = owner();
|
|
params.offset_ = owner()->getOrigin();
|
|
params.size_ = owner()->getSize();
|
|
params.resource_ = gpuMemory->pinnedMemory_;
|
|
params.memory_ = NULL;
|
|
result = pinnedMemory_->create(Resource::View, ¶ms);
|
|
}
|
|
}
|
|
else {
|
|
Resource::PinnedParams params;
|
|
// Fill resource creation parameters
|
|
params.owner_ = owner();
|
|
params.hostMemRef_ = owner()->getHostMemRef();
|
|
params.size_ = size;
|
|
|
|
// Create resource
|
|
result = pinnedMemory_->create(Resource::Pinned, ¶ms);
|
|
}
|
|
|
|
if (!result) {
|
|
delete pinnedMemory_;
|
|
pinnedMemory_ = NULL;
|
|
return false;
|
|
}
|
|
|
|
flags_ |= PinnedMemoryAlloced;
|
|
return true;
|
|
}
|
|
|
|
void*
|
|
Memory::cpuMap(
|
|
device::VirtualDevice& vDev, uint flags,
|
|
uint startLayer, uint numLayers,
|
|
size_t* rowPitch,
|
|
size_t* slicePitch)
|
|
{
|
|
uint resFlags = 0;
|
|
if (flags == Memory::CpuReadOnly) {
|
|
resFlags = Resource::ReadOnly;
|
|
}
|
|
else if (flags == Memory::CpuWriteOnly) {
|
|
resFlags = Resource::WriteOnly;
|
|
}
|
|
|
|
void* ptr = map(&static_cast<VirtualGPU&>(vDev), resFlags, startLayer, numLayers);
|
|
if (!cal()->buffer_) {
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
*slicePitch = cal()->slice_ * elementSize();
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
void
|
|
Memory::cpuUnmap(device::VirtualDevice& vDev)
|
|
{
|
|
unmap(&static_cast<VirtualGPU&>(vDev));
|
|
}
|
|
|
|
//! \note moveTo() must be called only from outside of
|
|
//! VirtualGPU submit command methods.
|
|
//! Otherwise a deadlock in lockVgpus() is possible.
|
|
//! Also the logic in this function is very specific to
|
|
//! the zero-copy functionality.
|
|
|
|
bool
|
|
Memory::moveTo(Memory& dst)
|
|
{
|
|
bool result = false;
|
|
|
|
// Make sure that all virtual devices don't process any commands
|
|
Device::ScopedLockVgpus lock(dev());
|
|
|
|
// Wait for idle on all virtual GPUs
|
|
//!@note It's enough to wait on the active queue only
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
wait(*(dev().vgpus()[idx]));
|
|
}
|
|
|
|
static const bool Entire = true;
|
|
amd::Coord3D origin(0, 0, 0);
|
|
amd::Coord3D region(size());
|
|
|
|
// Transfer the data from old location to a new one
|
|
if (dev().xferMgr().copyBuffer(
|
|
*this, dst, origin, origin, region, Entire)) {
|
|
// Move all properties to the new object
|
|
dst.mapMemory_ = mapMemory_;
|
|
mapMemory_ = NULL;
|
|
|
|
dst.flags_ |= flags_ & ~HostMemoryDirectAccess;
|
|
flags_ &= HostMemoryDirectAccess;
|
|
|
|
dst.indirectMapCount_ = indirectMapCount_;
|
|
indirectMapCount_ = 0;
|
|
|
|
dst.pinnedMemory_ = pinnedMemory_;
|
|
pinnedMemory_ = NULL;
|
|
|
|
// Replace the device memory object
|
|
//! @note: current object will be destroyed
|
|
owner()->replaceDeviceMemory(&dev(), &dst);
|
|
result = true;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
Memory*
|
|
Memory::mapMemory() const
|
|
{
|
|
Memory* map = NULL;
|
|
if (NULL != mapMemory_) {
|
|
map = reinterpret_cast<Memory*>(mapMemory_->getDeviceMemory(dev()));
|
|
}
|
|
return map;
|
|
}
|
|
|
|
void
|
|
Memory::mgpuCacheWriteBack()
|
|
{
|
|
// Lock memory object, so only one write back can occur
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
// Attempt to allocate a staging buffer if don't have any
|
|
if (owner()->getHostMem() == NULL) {
|
|
static const bool forceAllocHostMem = true;
|
|
if (owner()->allocHostMemory(NULL, forceAllocHostMem)) {
|
|
//! \note Ignore pinning result
|
|
bool ok = pinSystemMemory(
|
|
owner()->getHostMem(), owner()->getHostMemRef()->size());
|
|
}
|
|
}
|
|
|
|
// Make synchronization
|
|
if (owner()->getHostMem() != NULL) {
|
|
owner()->cacheWriteBack();
|
|
}
|
|
}
|
|
|
|
Memory*
|
|
Buffer::createBufferView(amd::Memory& subBufferOwner) const
|
|
{
|
|
gpu::Memory* subBuffer;
|
|
Resource::ViewParams params;
|
|
|
|
size_t offset = subBufferOwner.getOrigin();
|
|
size_t size = subBufferOwner.getSize();
|
|
|
|
// Create a memory object
|
|
subBuffer = new gpu::Buffer(dev(), subBufferOwner, size);
|
|
if (NULL == subBuffer) {
|
|
return NULL;
|
|
}
|
|
|
|
// Allocate a view for this buffer object
|
|
params.owner_ = &subBufferOwner;
|
|
params.offset_ = offset;
|
|
params.size_ = size;
|
|
params.resource_ = this;
|
|
params.memory_ = this;
|
|
|
|
if (!subBuffer->create(Resource::View, ¶ms)) {
|
|
delete subBuffer;
|
|
return NULL;
|
|
}
|
|
|
|
return subBuffer;
|
|
}
|
|
|
|
void*
|
|
Image::allocMapTarget(
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& region,
|
|
uint mapFlags,
|
|
size_t* rowPitch,
|
|
size_t* slicePitch)
|
|
{
|
|
// Sanity checks
|
|
assert(owner() != NULL);
|
|
bool useRemoteResource = true;
|
|
size_t slicePitchTmp = 0;
|
|
size_t height = cal()->height_;
|
|
size_t depth = cal()->depth_;
|
|
|
|
// Map/unmap must be serialized
|
|
amd::ScopedLock lock(owner()->lockMemoryOps());
|
|
|
|
address mapAddress = NULL;
|
|
size_t offset = origin[0];
|
|
|
|
incIndMapCount();
|
|
|
|
// If host memory exists, use it
|
|
if ((owner()->getHostMem() != NULL) && isDirectMap()) {
|
|
useRemoteResource = false;
|
|
mapAddress = reinterpret_cast<address>(owner()->getHostMem());
|
|
amd::Image* amdImage = owner()->asImage();
|
|
|
|
// Calculate the offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Update the row and slice pitches value
|
|
*rowPitch = (amdImage->getRowPitch() == 0) ?
|
|
(cal()->width_ * elementSize()) : amdImage->getRowPitch();
|
|
slicePitchTmp = (amdImage->getSlicePitch() == 0) ?
|
|
(height * (*rowPitch)) : amdImage->getSlicePitch();
|
|
|
|
// Adjust the offset in Y and Z dimensions
|
|
offset += origin[1] * (*rowPitch);
|
|
offset += origin[2] * slicePitchTmp;
|
|
}
|
|
// If resource is a persistent allocation, we can use it directly
|
|
//! @note Even if resource is a persistent allocation,
|
|
//! runtime can't use it directly,
|
|
//! because CAL volume map doesn't work properly.
|
|
//! @todo arrays can be added for persistent lock with some CAL changes
|
|
else if (isPersistentDirectMap()) {
|
|
if (NULL == map(NULL)) {
|
|
useRemoteResource = true;
|
|
LogError("Could not map target persistent resource, try remote resource");
|
|
}
|
|
else {
|
|
useRemoteResource = false;
|
|
mapAddress = data();
|
|
|
|
// Calculate the offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Update the row pitch value
|
|
*rowPitch = cal()->pitch_ * elementSize();
|
|
|
|
// Adjust the offset in Y dimension
|
|
offset += origin[1] * (*rowPitch);
|
|
}
|
|
}
|
|
|
|
// Otherwise we can use a remote resource:
|
|
if (useRemoteResource) {
|
|
// Calculate X offset in bytes
|
|
offset *= elementSize();
|
|
|
|
// Allocate a map resource if there isn't any yet
|
|
if (indirectMapCount_ == 1) {
|
|
const static bool SysMem = true;
|
|
bool failed = false;
|
|
amd::Memory* memory;
|
|
|
|
// Search for a possible indirect resource
|
|
memory = dev().findMapTarget(owner()->getSize());
|
|
|
|
if (memory == NULL) {
|
|
// Allocate a new buffer to use as the map target
|
|
//! @note Allocate a 1D buffer, since CAL issues with 3D
|
|
//! Also HW doesn't support untiled images
|
|
memory = new (dev().context())
|
|
amd::Buffer(dev().context(), 0,
|
|
cal()->width_ * height * depth * elementSize());
|
|
memory->setVirtualDevice(owner()->getVirtualDevice());
|
|
|
|
Memory* gpuMemory;
|
|
do {
|
|
if ((memory == NULL) || !memory->create(NULL, SysMem)) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
|
|
gpuMemory = reinterpret_cast<Memory*>
|
|
(memory->getDeviceMemory(dev()));
|
|
|
|
// Create, Map and get the base pointer for the resource
|
|
if ((gpuMemory == NULL) || (NULL == gpuMemory->map(NULL))) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
while (false);
|
|
}
|
|
|
|
if (failed) {
|
|
if (memory != NULL) {
|
|
memory->release();
|
|
}
|
|
decIndMapCount();
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
|
|
// Map/unamp is serialized for the same memory object,
|
|
// so it's safe to assign the new pointer
|
|
assert((mapMemory_ == NULL) && "Mapped buffer can't be valid");
|
|
mapMemory_ = memory;
|
|
}
|
|
else {
|
|
// Did the map resource allocation fail?
|
|
if (mapMemory_ == NULL) {
|
|
LogError("Could not map target resource");
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
mapAddress = mapMemory()->data();
|
|
|
|
// Update the row and slice pitches value
|
|
*rowPitch = region[0] * elementSize();
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
slicePitchTmp = *rowPitch ;
|
|
}
|
|
else {
|
|
slicePitchTmp = *rowPitch * region[1];
|
|
}
|
|
// Use start of the indirect buffer
|
|
offset = 0;
|
|
}
|
|
|
|
if (slicePitch != NULL) {
|
|
*slicePitch = slicePitchTmp;
|
|
}
|
|
|
|
return mapAddress + offset;
|
|
}
|
|
|
|
} // namespace gpu
|