Files
rocm-systems/rocclr/runtime/device/gpu/gpuresource.cpp
T
foreman 5f93384dbc P4 to Git Change 1058915 by rili@rili_opencl_stg on 2014/07/24 12:24:49
EPR #399808 - Fix the value of HSA image channel order for CL_RGB

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#188 edit
2014-07-24 12:45:47 -04:00

2099 lines
61 KiB
C++

// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "os/os.hpp"
#include "device/device.hpp"
#include "utils/flags.hpp"
#include "thread/monitor.hpp"
#include "device/gpu/gpuresource.hpp"
#include "device/gpu/gpudevice.hpp"
#include "device/gpu/gpublit.hpp"
#include "device/gpu/gputimestamp.hpp"
#include "thread/atomic.hpp"
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <cmath>
namespace gpu {
GslResourceReference::GslResourceReference(
const Device& gpuDev,
gslMemObject gslResource,
gslMemObject gslResOriginal
)
: device_(gpuDev)
, resource_(gslResource)
, resOriginal_(gslResOriginal)
, cpuAddress_(NULL)
{
}
GslResourceReference::~GslResourceReference()
{
if (cpuAddress_ != NULL) {
device_.resUnmapRemote(gslResource());
}
if (0 != gslResource()) {
device_.resFree(gslResource());
resource_ = NULL;
}
if (0 != gslOriginal()) {
device_.resFree(gslOriginal());
resOriginal_ = NULL;
}
}
Resource::Resource(
const Device& gpuDev,
size_t width,
cmSurfFmt format)
: elementSize_(0)
, gpuDevice_(gpuDev)
, mapCount_(0)
, address_(NULL)
, offset_(0)
, curRename_(0)
, gslRef_(NULL)
, viewOwner_(NULL)
, hbOffset_(0)
, hbSize_(0)
, pinOffset_(0)
, byteView_(NULL)
, shortView_(NULL)
, glInterop_(0)
, gpu_(NULL)
{
// Fill GSL descriptor fields
cal_.type_ = Empty;
cal_.width_ = width;
cal_.height_ = 1;
cal_.depth_ = 1;
cal_.format_ = format;
cal_.flags_ = 0;
cal_.pitch_ = 0;
cal_.slice_ = 0;
cal_.channelOrder_ = GSL_CHANNEL_ORDER_REPLICATE_R;
cal_.dimension_ = GSL_MOA_BUFFER;
cal_.cardMemory_ = true;
cal_.dimSize_ = 1;
cal_.buffer_ = true;
cal_.imageArray_ = false;
cal_.imageType_ = 0;
cal_.SVMRes_ = false;
}
Resource::Resource(
const Device& gpuDev,
size_t width,
size_t height,
size_t depth,
cmSurfFmt format,
gslChannelOrder chOrder,
cl_mem_object_type imageType)
: elementSize_(0)
, gpuDevice_(gpuDev)
, mapCount_(0)
, address_(NULL)
, offset_(0)
, curRename_(0)
, gslRef_(NULL)
, viewOwner_(NULL)
, hbOffset_(0)
, hbSize_(0)
, pinOffset_(0)
, byteView_(NULL)
, shortView_(NULL)
, glInterop_(0)
, gpu_(NULL)
{
// Fill GSL descriptor fields
cal_.type_ = Empty;
cal_.width_ = width;
cal_.height_ = height;
cal_.depth_ = depth;
cal_.format_ = format;
cal_.flags_ = 0;
cal_.pitch_ = 0;
cal_.slice_ = 0;
cal_.channelOrder_ = chOrder;
cal_.cardMemory_ = true;
cal_.buffer_ = false;
cal_.imageArray_ = false;
cal_.imageType_ = imageType;
cal_.SVMRes_ = false;
switch (imageType) {
case CL_MEM_OBJECT_IMAGE2D:
cal_.dimension_ = GSL_MOA_TEXTURE_2D;
cal_.dimSize_ = 2;
break;
case CL_MEM_OBJECT_IMAGE3D:
cal_.dimension_ = GSL_MOA_TEXTURE_3D;
cal_.dimSize_ = 3;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
cal_.dimension_ = GSL_MOA_TEXTURE_2D_ARRAY;
cal_.dimSize_ = 3;
cal_.imageArray_ = true;
break;
case CL_MEM_OBJECT_IMAGE1D:
cal_.dimension_ = GSL_MOA_TEXTURE_1D;
cal_.dimSize_ = 1;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
cal_.dimension_ = GSL_MOA_TEXTURE_1D_ARRAY;
cal_.dimSize_ = 2;
cal_.imageArray_ = true;
break;
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
cal_.dimension_ = GSL_MOA_TEXTURE_BUFFER;
cal_.dimSize_ = 1;
break;
default:
cal_.dimSize_ = 1;
LogError("Unknown image type!");
break;
}
}
Resource::~Resource()
{
free();
}
static uint32_t GetHSAILImageFormatType(cmSurfFmt format)
{
uint32_t formatType = 0;
switch (format)
{
case CM_SURF_FMT_INTENSITY8:
case CM_SURF_FMT_RG8:
case CM_SURF_FMT_RGBA8:
case CM_SURF_FMT_RGBX8UI:
case CM_SURF_FMT_RGBA8_SRGB:
formatType = 2;
break;
case CM_SURF_FMT_R16:
case CM_SURF_FMT_RG16:
case CM_SURF_FMT_RGBA16:
case CM_SURF_FMT_DEPTH16:
formatType = 3;
break;
/*
case HSA_IMAGE_FMT_R5G6B5_UNORM:
formatType = 4;
break;
case HSA_IMAGE_FMT_R5G5B5_UNORM:
formatType = 5;
break;
case HSA_IMAGE_FMT_R10G10B10_UNORM:
formatType = 6;
break;
*/
case CM_SURF_FMT_BGR10_X2:
formatType = 7;
break;
case CM_SURF_FMT_sR8:
case CM_SURF_FMT_sRG8:
case CM_SURF_FMT_sRGBA8:
formatType = 0;
break;
case CM_SURF_FMT_sU16:
case CM_SURF_FMT_sUV16:
case CM_SURF_FMT_sUVWQ16:
formatType = 1;
break;
case CM_SURF_FMT_R8I:
case CM_SURF_FMT_RG8I:
case CM_SURF_FMT_RGBA8UI:
formatType = 11;
break;
case CM_SURF_FMT_R16I:
case CM_SURF_FMT_RG16I:
case CM_SURF_FMT_RGBA16UI:
formatType = 12;
break;
case CM_SURF_FMT_R32I:
case CM_SURF_FMT_RG32I:
case CM_SURF_FMT_RGBA32UI:
formatType = 13;
break;
case CM_SURF_FMT_sR8I:
case CM_SURF_FMT_sRG8I:
case CM_SURF_FMT_sRGBA8I:
formatType = 8;
break;
case CM_SURF_FMT_sR16I:
case CM_SURF_FMT_sRG16I:
case CM_SURF_FMT_sRGBA16I:
formatType = 9;
break;
case CM_SURF_FMT_sR32I:
case CM_SURF_FMT_sRG32I:
case CM_SURF_FMT_sRGBA32I:
formatType = 10;
break;
case CM_SURF_FMT_R32F:
case CM_SURF_FMT_RG32F:
case CM_SURF_FMT_RGBA32F:
case CM_SURF_FMT_DEPTH32F:
formatType = 15;
break;
case CM_SURF_FMT_R16F:
case CM_SURF_FMT_RG16F:
case CM_SURF_FMT_RGBA16F:
formatType = 14;
break;
default:
assert(false);
}
return formatType;
}
static uint32_t GetHSAILImageOrderType(gslChannelOrder chOrder)
{
uint32_t orderType = 0;
switch (chOrder)
{
case GSL_CHANNEL_ORDER_R:
orderType = 1;
break;
case GSL_CHANNEL_ORDER_A:
orderType = 0;
break;
case GSL_CHANNEL_ORDER_LUMINANCE:
orderType = 17;
break;
case GSL_CHANNEL_ORDER_INTENSITY:
orderType = 16;
break;
case GSL_CHANNEL_ORDER_RG:
orderType = 3;
break;
case GSL_CHANNEL_ORDER_RA:
orderType = 5;
break;
/*
case HSA_IMAGE_FMT_R5G6B5_UNORM:
case HSA_IMAGE_FMT_R5G5B5_UNORM:
case HSA_IMAGE_FMT_R10G10B10_UNORM:
orderType = 6;
break;*/
case GSL_CHANNEL_ORDER_RGB:
orderType = 6;
break;
case GSL_CHANNEL_ORDER_RGBA:
orderType = 8;
break;
case GSL_CHANNEL_ORDER_ARGB:
orderType = 10;
break;
case GSL_CHANNEL_ORDER_BGRA:
orderType = 9;
break;
case GSL_CHANNEL_ORDER_SRGB:
orderType = 12;
break;
case GSL_CHANNEL_ORDER_SRGBX:
orderType = 13;
break;
case GSL_CHANNEL_ORDER_SRGBA:
orderType = 14;
break;
case GSL_CHANNEL_ORDER_SBGRA:
orderType = 15;
break;
case GSL_CHANNEL_ORDER_REPLICATE_R:
orderType = 18;
break;
default:
assert(false);
}
return orderType;
}
bool
Resource::create(MemoryType memType, CreateParams* params, bool heap)
{
bool calRes = false;
gslMemObject gslResource = 0;
gslMemObject gslResOriginal = 0;
const amd::HostMemoryReference* hostMemRef = NULL;
bool imageCreateView = false;
CALuint hostMemOffset = 0;
bool foundCalRef = false;
bool viewDefined = false;
uint viewLayer = 0;
uint viewLevel = 0;
uint viewFlags = 0;
gslResource3D viewSize = {0};
CALdomain viewOffset = {0};
cmSurfFmt viewSurfFmt;
gslChannelOrder viewChannelOrder = GSL_CHANNEL_ORDER_UNSPECIFIED;
gslMemObjectAttribType viewResType;
CALresourceDesc desc;
uint64 bytePitch = (uint64)-1;
bool useRowPitch = false;
desc.vaBase = 0;
desc.section = GSL_SECTION_REGULAR;
if (NULL != params && NULL != params->owner_) { //make sure params not NULL
mcaddr svmPtr = reinterpret_cast<mcaddr>(params->owner_->getSvmPtr());
desc.vaBase = (svmPtr == 1)? 0:svmPtr;
cal_.SVMRes_ = (svmPtr != 0);
desc.section = (svmPtr != 0) ? GSL_SECTION_SVM : GSL_SECTION_REGULAR;
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
desc.section = GSL_SECTION_SVM_ATOMICS;
}
}
// This is a thread safe operation
const_cast<Device&>(dev()).initializeHeapResources();
// Get the element size
elementSize_ = static_cast<CALuint>(memoryFormatSize(cal()->format_).size_);
cal_.type_ = memType;
if (memType == Scratch) {
cal_.type_ = Local;
}
// Force remote allocation if it was requested in the settings
if (dev().settings().remoteAlloc_ && !heap &&
((memoryType() == Local) ||
(memoryType() == Persistent))) {
cal_.type_ = RemoteUSWC;
}
if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
cal_.type_ = RemoteUSWC;
}
if (cal()->buffer_) {
// Force linear tiling for buffer alloctions
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
}
if (params != NULL) {
gpu_ = params->gpu_;
}
switch (memoryType()) {
case Heap:
gslResource = dev().resGetHeap(0);
if (gslResource == 0) {
return false;
}
calRes = true;
cal_.width_ = static_cast<size_t>(gslResource->getPitch());
cal_.pitch_ = static_cast<size_t>(gslResource->getPitch());
break;
case Persistent:
if (dev().settings().linearPersistentImage_) {
// Force linear tiling for image allocations in persistent
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
}
// Fall through ...
case RemoteUSWC:
case Remote:
case BusAddressable:
case ExternalPhysical:
// Fall through to process the memory allocation ...
case Local: {
if (cal()->buffer_) {
//! @todo Remove alignment.
//! GSL asserts in mem copy with an unaligned size
cal_.width_ = amd::alignUp(cal_.width_, 64);
}
desc.dimension = cal()->dimension_;
desc.size.width = cal()->width_;
desc.size.height = cal()->height_;
desc.size.depth = cal()->depth_;
desc.format = cal()->format_;
desc.channelOrder = cal()->channelOrder_;
desc.flags = cal()->flags_;
desc.mipLevels = 0;
desc.systemMemory = NULL;
do {
// Find a type for allocation
if (memoryType() == Persistent) {
desc.type = GSL_MOA_MEMORY_CARD_LOCKABLE;
}
else if (memoryType() == Remote) {
desc.type = GSL_MOA_MEMORY_REMOTE_CACHEABLE;
}
else if (memoryType() == RemoteUSWC) {
desc.type = GSL_MOA_MEMORY_AGP;
}
else if (memoryType() == BusAddressable){
desc.type = GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE;
}
else if (memoryType() == ExternalPhysical){
desc.type = GSL_MOA_MEMORY_CARD_EXTERNAL_PHYSICAL;
cl_bus_address_amd bus_address =
(reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
desc.busAddress[0] = bus_address.surface_bus_address;
desc.busAddress[1] = bus_address.marker_bus_address;
}
else {
desc.type = GSL_MOA_MEMORY_CARD_EXT_NONEXT;
}
// Check resource cache first for an appropriate resource
gslRef_ = dev().resourceCache().findCalResource(&cal_);
if (memType == Scratch) {
desc.vaBase = static_cast<mcaddr>(0x100000000ULL);
}
else if ((gslRef_ != NULL) && (!dev().settings().use64BitPtr_)) {
// Make sure runtime didn't pick a resource with > 4GB address
if ((cal()->dimension_ == GSL_MOA_BUFFER) &&
(static_cast<uint64_t>(gslRef_->gslResource()->getSurfaceAddress() +
gslRef_->gslResource()->getSurfaceSize()) > (uint64_t(4) * Gi))) {
gslRef_->release();
gslRef_ = NULL;
}
}
// Try to allocate memory if we couldn't find a cached resource
if (gslRef_ == NULL) {
// Allocate memory
gslResource = dev().resAlloc(&desc);
if (gslResource != 0) {
calRes = true;
}
}
else {
calRes = true;
gslResource = gslRef_->gslOriginal();
foundCalRef = true;
}
// If GSL fails allocation then try other heaps
if (!calRes) {
// Free cache if we failed allocation
if (dev().resourceCache().free()) {
// We freed something - attempt to allocate memory again
continue;
}
// Local to Persistent
if (memoryType() == Local) {
cal_.type_ = Persistent;
}
else if (!heap && (memoryType() == Persistent)) {
cal_.type_ = RemoteUSWC;
}
// Remote cacheable to uncacheable
else if (memoryType() == Remote) {
cal_.type_ = RemoteUSWC;
}
else {
break;
}
}
}
while (!calRes);
}
break;
case Pinned: {
PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
CALuint allocSize = static_cast<CALuint>(pinned->size_);
void* pinAddress;
hostMemRef = pinned->hostMemRef_;
pinAddress = address_ = hostMemRef->hostMem();
// Use untiled allocation
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
desc.size.width = cal()->width_;
if (cal()->dimension_ == GSL_MOA_BUFFER) {
// Allign offset to 4K boundary (Vista/Win7 limitation)
char* tmpHost = const_cast<char*>(
amd::alignDown(reinterpret_cast<const char*>(address_),
PinnedMemoryAlignment));
// Find the partial size for unaligned copy
hostMemOffset = static_cast<CALuint>(
reinterpret_cast<const char*>(address_) - tmpHost);
pinOffset_ = hostMemOffset & 0xff;
//!@note GSL has a problem with the defines for flags and
//! view creation, so check the restriction here
if (!dev().heap()->isVirtual() && (pinOffset_ != 0)) {
return false;
}
pinAddress = tmpHost;
// Align width to avoid GSL useless assert with a view
if (hostMemOffset != 0) {
desc.size.width += hostMemOffset / elementSize();
desc.size.width = amd::alignUp(desc.size.width, 64);
}
hostMemOffset &= ~(0xff);
}
else if (cal()->dimension_ == GSL_MOA_TEXTURE_2D) {
//! @todo: Width has to be aligned for 3D.
//! Need to be replaced with a compute copy
// Width aligned by 8 texels
if (((cal()->width_ % 0x8) != 0) ||
// Pitch aligned by 64 bytes
(((cal()->width_ * elementSize()) % 0x40) != 0)) {
return false;
}
}
else {
//! @todo GSL doesn't support pinning with resAlloc_
return false;
}
// Fill the GSL desc info structure
desc.dimension = cal()->dimension_;
desc.type = GSL_MOA_MEMORY_SYSTEM;
desc.size.height = cal()->height_;
desc.size.depth = cal()->depth_;
desc.format = cal()->format_;
desc.channelOrder = cal()->channelOrder_;
desc.mipLevels = 0;
desc.systemMemory = reinterpret_cast<CALvoid*>(pinAddress);
desc.flags = 0;
// Ensure page alignment
if ((CALuint64)desc.systemMemory & (amd::Os::pageSize() - 1)) {
return false;
}
gslResource = dev().resAlloc(&desc);
if (gslResource != 0) {
calRes = true;
}
else {
pinOffset_ = 0;
}
}
break;
case View: {
// Save the offset in the global heap
ViewParams* view = reinterpret_cast<ViewParams*>(params);
offset_ = view->offset_;
// Make sure parent was provided
if (NULL != view->resource_) {
viewOwner_ = view->resource_;
uint64 bytePitch = (view->size_ + viewOwner_->pinOffset());
viewSize.width = bytePitch / elementSize();
viewSize.height = 1;
viewSize.depth = 1;
viewOffset.x = static_cast<CALuint>(offset() / elementSize());
viewOffset.y = 0;
viewOffset.width = 0;
viewOffset.height = 0;
gslResource = dev().resAllocView(
view->resource_->gslResource(), viewSize, viewOffset,
cal()->format_, GSL_CHANNEL_ORDER_REPLICATE_R,
cal()->dimension_, 0, 0, cal()->flags_, bytePitch);
if (gslResource != 0) {
calRes = true;
}
// Check if it's a heap allocation
if (!dev().heap()->isVirtual()) {
if (viewOwner_ == &dev().globalMem()) {
// Allocation directly from the heap
hbOffset_ = static_cast<uint64_t>(view->offset_);
}
else {
// Allocation from another memory object
hbOffset_ = static_cast<uint64_t>(view->offset_) +
viewOwner_->hbOffset();
}
hbSize_ = view->size_;
}
if (viewOwner_->isMemoryType(Pinned)) {
address_ = viewOwner_->data() + offset();
}
pinOffset_ = viewOwner_->pinOffset();
}
else {
cal_.type_ = Empty;
}
}
break;
case ImageView: {
ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
imageCreateView = true;
viewLayer = imageView->layer_;
viewLevel = imageView->level_;
gslResource = imageView->resource_->gslResource();
viewOwner_ = imageView->resource_;
if (viewLayer != 0) {
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
}
calRes = true;
}
break;
case ImageBuffer: {
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
imageCreateView = true;
gslResource = imageBuffer->resource_->gslResource();
viewOwner_ = imageBuffer->resource_;
calRes = true;
useRowPitch = true;
}
break;
case OGLInterop: {
OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
assert(oglRes->glPlatformContext_ &&
"We don't have OGL context!");
switch (oglRes->type_) {
case InteropVertexBuffer:
glType_ = CAL_RES_GL_BUFFER_TYPE_VERTEXBUFFER;
break;
case InteropRenderBuffer:
glType_ = CAL_RES_GL_BUFFER_TYPE_RENDERBUFFER;
break;
case InteropTexture:
case InteropTextureViewLevel:
case InteropTextureViewCube:
glType_ = CAL_RES_GL_BUFFER_TYPE_TEXTURE;
break;
default:
LogError("Unknown OGL interop type!");
return false;
break;
}
glPlatformContext_ = oglRes->glPlatformContext_;
glDeviceContext_ = oglRes->glDeviceContext_;
CALGSLDevice::GLResAssociate resData = {0};
resData.GLContext = oglRes->glPlatformContext_;
resData.GLdeviceContext = oglRes->glDeviceContext_;
resData.name = oglRes->handle_;
resData.type = glType_;
// We need not pass any flags down to OGL for interop
resData.flags = 0;
if (dev().resGLAssociate(resData)) {
gslResource = resData.memObject;
glInteropMbRes_ = resData.mbResHandle;
glInterop_ = resData.mem_base;
calRes = true;
}
// Check if we have to create a view
if (calRes &&
((oglRes->type_ == InteropTextureViewLevel) ||
(oglRes->type_ == InteropTextureViewCube))) {
imageCreateView = true;
viewLayer = oglRes->layer_;
viewLevel = oglRes->mipLevel_;
// Find the view parameters
if (InteropTextureViewLevel == oglRes->type_) {
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
}
else if (InteropTextureViewCube == oglRes->type_) {
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
}
else {
LogError("Unknown Interop View Type");
}
}
}
break;
#ifdef _WIN32
case D3D9Interop:
case D3D10Interop:
case D3D11Interop: {
D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
desc.dimension = cal()->dimension_;
desc.size.width = cal()->width_;
desc.size.height = cal()->height_;
desc.size.depth = cal()->depth_;
desc.format = cal()->format_;
desc.channelOrder = cal()->channelOrder_;
desc.flags = cal()->flags_;
desc.mipLevels = 0;
desc.systemMemory = NULL;
switch (d3dRes->misc) {
case 1: // NV12 format
case 2: // YV12 format
// Readjust the size to the original NV12/YV12 size, since runtime
// creates an interop for all planes
switch (d3dRes->layer_) {
case 0:
desc.size.height = 3 * desc.size.height / 2;
break;
case 1:
case 2:
// Force R8 format for the interop allocation by default
if (1 == d3dRes->misc) {
desc.format = CM_SURF_FMT_R8;
desc.channelOrder = GSL_CHANNEL_ORDER_R;
}
desc.size.width = 2 * desc.size.width;
desc.size.height = 3 * desc.size.height;
break;
default:
break;
}
break;
default:
break;
}
// Create an interop GSL object
gslResource = dev().resMapD3DResource(
&desc, (CALuint64)d3dRes->handle_, (memoryType() != D3D9Interop));
if (gslResource != 0) {
calRes = true;
}
else {
return false;
}
// Check if we have to create a view
if (calRes &&
((d3dRes->type_ == InteropTextureViewLevel) ||
(d3dRes->type_ == InteropTextureViewCube))) {
imageCreateView = true;
viewLayer = d3dRes->layer_;
viewLevel = d3dRes->mipLevel_;
// Find the view parameters
if (InteropTextureViewLevel == d3dRes->type_) {
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
}
else if (InteropTextureViewCube == d3dRes->type_) {
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
}
else {
LogError("Unknown Interop View Type");
}
}
switch (d3dRes->misc) {
case 0:
break;
case 1: // NV12 format
case 2: // YV12 format
// Create a view for the specified plane
viewDefined = true;
viewSize.width = cal()->width_;
viewSize.height = cal()->height_;
viewSize.depth = 1;
bytePitch = static_cast<size_t>(gslResource->getPitch());
viewOffset.x = 0;
viewSurfFmt = cal()->format_;
viewChannelOrder = cal()->channelOrder_;
switch (d3dRes->layer_) {
case -1:
break;
case 0:
break;
case 1:
// Y - plane size to the offset
viewOffset.x = bytePitch * viewSize.height * 2;
if (d3dRes->misc == 2) {
// YV12 format U is 2 times smaller plane
bytePitch /= 2;
}
break;
case 2:
// Y + U plane sizes to the offest.
// U plane is 4 times smaller than Y => 5/2
viewOffset.x = bytePitch * viewSize.height * 5 / 2;
// V is 2 times smaller plane
bytePitch /= 2;
break;
default:
LogError("Unknown Interop View Type");
calRes = false;
break;
}
break;
default:
LogError("Unknown Interop View Type");
calRes = false;
}
}
break;
#endif // _WIN32
default:
LogWarning("Resource::create() called with unknown memory type");
return false;
break;
}
// Create a view for interop, since the original buffer may have different format
// than the global buffer and GSL mem copy will fail
bool interopBufView = cal()->buffer_ &&
((memoryType() == D3D10Interop) || (memoryType() == OGLInterop) ||
(memoryType() == D3D11Interop));
bool ignoreParentHandle =
((memoryType() == ImageView) || (memoryType() == ImageBuffer));
// Create imageview if it was requested
if (calRes &&
(imageCreateView || interopBufView || hostMemOffset || viewDefined)) {
gslResOriginal = gslResource;
// Disable tiling if it's a buffer view
if (interopBufView || hostMemOffset) {
viewFlags = CAL_RESALLOCVIEW_GLOBAL_BUFFER;
}
viewResType = cal()->dimension_;
if (!viewDefined) {
viewSize.width = cal()->width_ + (pinOffset() / elementSize());
viewSize.height = cal()->height_;
viewSize.depth = cal()->depth_;
viewOffset.x = hostMemOffset / static_cast<CALuint>(elementSize());
viewOffset.y = 0;
viewOffset.width = 0;
viewOffset.height = 0;
viewSurfFmt = cal()->format_;
viewChannelOrder = cal()->channelOrder_;
}
if (useRowPitch && (params->owner_ != NULL) && params->owner_->asImage() &&
(params->owner_->asImage()->getRowPitch() != 0)) {
bytePitch = params->owner_->asImage()->getRowPitch();
}
// Allocate a view resource object
gslResource = dev().resAllocView(
gslResOriginal, viewSize, viewOffset, viewSurfFmt,
viewChannelOrder, viewResType, viewLevel, viewLayer, viewFlags, bytePitch);
if (gslResource == 0) {
// If we don't have to keep the parent handle,
// then destroy the original resource
if (!ignoreParentHandle) {
dev().resFree(gslResOriginal);
gslResOriginal = 0;
}
LogError("ResAlloc failed!");
return false;
}
if (ignoreParentHandle) {
gslResOriginal = 0;
}
}
if (!calRes) {
if (gslResource != 0) {
dev().resFree(gslResource);
}
if (memoryType() != Pinned) {
LogError("calResAlloc failed!");
}
return false;
}
// Find memory location
switch (gslResource->getAttribs().location) {
case GSL_MOA_MEMORY_CARD:
case GSL_MOA_MEMORY_CARD_EXT:
case GSL_MOA_MEMORY_CARD_LOCKABLE:
case GSL_MOA_MEMORY_CARD_EXT_NONEXT:
case GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE:
cal_.cardMemory_ = true;
break;
default:
cal_.cardMemory_ = false;
break;
}
gslMemObjectAttribTiling tiling = gslResource->getAttribs().tiling;
cal_.tiled_ = (GSL_MOA_TILING_LINEAR != tiling) &&
(GSL_MOA_TILING_LINEAR_GENERAL != tiling);
// Get the heap block offset if it's a virtual heap
if (dev().heap()->isVirtual()) {
hbOffset_ = gslResource->getSurfaceAddress() -
dev().heap()->baseAddress();
}
hbSize_ = static_cast<uint64_t>(gslResource->getSurfaceSize());
if (!dev().settings().use64BitPtr_ && (memType != Scratch)) {
// Make sure runtime doesn't go over the address space limit for buffers
if ((memoryType() != Heap) &&
(cal()->dimension_ == GSL_MOA_BUFFER) &&
((hbOffset_ + hbSize_) > (uint64_t(4) * Gi))) {
if (cal_.cardMemory_) {
LogPrintfError(
"Out of 4GB address space. Base: 0x%016llX, size: 0x%016llX!",
hbOffset_, hbSize_);
dev().resFree(gslResource);
//! @note: A workaround for a Windows delay on memory destruction
//! Runtime submits a fake memory fill to force KMD to return
//! the freed memory ranges
if (IS_WINDOWS) {
uint32_t pattern = 0;
Memory* dummy = reinterpret_cast<Memory*>(
dev().dummyPage()->getDeviceMemory(dev()));
dev().xferMgr().fillBuffer(*dummy, &pattern, sizeof(uint32_t),
amd::Coord3D(0), amd::Coord3D(sizeof(uint32_t)));
}
if ((gslResOriginal != 0) && !ignoreParentHandle) {
dev().resFree(gslResOriginal);
gslResOriginal = 0;
}
return false;
}
else {
LogWarning("Out of 4GB address space for AHP/UHP!");
}
}
}
if (!foundCalRef) {
gslRef_ = new GslResourceReference(dev(), gslResource, gslResOriginal);
if (gslRef_ == NULL) {
LogError("Memory allocation failure!");
dev().resFree(gslResource);
return false;
}
}
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
!cal()->buffer_) {
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
if (0 == hwSrd_) {
return false;
}
dev().fillImageHwState(gslResource, hwState_, 8 * sizeof(uint32_t));
hwState_[8] = GetHSAILImageFormatType(cal()->format_);
hwState_[9] = GetHSAILImageOrderType(cal()->channelOrder_);
hwState_[10] = static_cast<uint32_t>(cal()->width_);
// Workaround for depth view, change tileIndex to 0 for depth view
if ((memoryType() == ImageView) &&
(viewChannelOrder == GSL_CHANNEL_ORDER_REPLICATE_R)) {
if ((hwState_[3] & 0x1f00000) == 0xe00000) {
hwState_[3] = hwState_[3] & 0xfe0fffff ;
}
}
hwState_[11] = 0; // one extra reserved field in the argument
}
if (desc.section == GSL_SECTION_SVM || desc.section == GSL_SECTION_SVM_ATOMICS)
{
params->owner_->setSvmPtr(reinterpret_cast<void*>(gslResource->getSurfaceAddress()));
}
return true;
}
bool
Resource::reallocate(CreateParams* params)
{
GslResourceReference* old;
GslResourceReference* active;
old = gslRef_;
if (!create(memoryType(), params)) {
gslRef_ = old;
return false;
}
// Get the new active resource
active = gslRef_;
gslRef_ = old;
dev().resCopy(old->gslResource(),
active->gslResource(), CAL_MEMCOPY_SYNC);
// Free all old resources
assert(renames_.size() == 0);
free();
gslRef_ = active;
return true;
}
void
Resource::free()
{
if (NULL != byteView_) {
delete byteView_;
byteView_ = NULL;
}
if (NULL != shortView_) {
delete shortView_;
shortView_ = NULL;
}
if (gslRef_ == NULL) {
return;
}
// Sanity check for the map calls
if (mapCount_ != 0) {
LogWarning("Resource wasn't unlocked, but destroyed!");
}
const bool wait = (memoryType() != ImageView) &&
(memoryType() != ImageBuffer);
// Check if resource could be used in any queue(thread)
if (gpu_ == NULL) {
Device::ScopedLockVgpus lock(dev());
if (renames_.size() == 0) {
// Destroy GSL resource
if (gslResource() != 0) {
// Release all virtual memory objects on all virtual GPUs
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
dev().vgpus()[idx]->releaseMemory(gslResource(), wait);
}
//! @note: This is a workaround for bad applications that
//! don't unmap memory
if (mapCount_ != 0) {
unmap(NULL);
}
// Add resource to the cache
if (!dev().resourceCache().addCalResource(&cal_, gslRef_)) {
gslFree();
}
}
}
else {
renames_[curRename_]->cpuAddress_ = 0;
for (size_t i = 0; i < renames_.size(); ++i) {
gslRef_ = renames_[i];
// Destroy GSL resource
if (gslResource() != 0) {
// Release all virtual memory objects on all virtual GPUs
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
dev().vgpus()[idx]->releaseMemory(gslResource());
}
gslFree();
}
}
}
}
else {
if (renames_.size() == 0) {
// Destroy GSL resource
if (gslResource() != 0) {
// Release virtual memory object on the specified virtual GPU
gpu_->releaseMemory(gslResource(), wait);
gslFree();
}
}
else for (size_t i = 0; i < renames_.size(); ++i) {
gslRef_ = renames_[i];
// Destroy GSL resource
if (gslResource() != 0) {
// Release virtual memory object on the specified virtual GPUs
gpu_->releaseMemory(gslResource());
gslFree();
}
}
}
// Free SRD for images
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
!cal()->buffer_) {
dev().srds().freeSrdSlot(hwSrd_);
}
}
void
Resource::writeRawData(
VirtualGPU& gpu,
size_t size,
const void* data,
bool waitForEvent) const
{
GpuEvent event;
// Write data size bytes to surface
// size needs to be DWORD aligned
assert((size & 3) == 0);
gpu.writeSurfRaw(event, gslResource(), size, data);
setBusy(gpu, event);
// Update the global GPU event
gpu.setGpuEvent(event, false);
if (waitForEvent) {
// Wait for event to complete
gpu.waitForEvent(&event);
}
}
bool
Resource::partialMemCopyTo(
VirtualGPU& gpu,
const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin,
const amd::Coord3D& size,
Resource& dstResource,
bool enableCopyRect,
bool flushDMA) const
{
GpuEvent event;
bool result;
CALuint syncFlags = CAL_MEMCOPY_SYNC;
EngineType activeEngineID = gpu.engineID_;
static const bool waitOnBusyEngine = true;
// \note timing issues in Linux with sync mode
bool flush = true;
// Check if runtime can use async memory copy,
// even if a caller didn't request async
if (dev().settings().asyncMemCopy_ &&
// Keep ASYNC if profiling is disabled or sdma profiling is possible
(!gpu.profiling() || dev().settings().sdmaProfiling_) &&
(!cal()->cardMemory_ || !dstResource.cal()->cardMemory_)) {
// Switch to SDMA engine
gpu.engineID_ = SdmaEngine;
syncFlags = CAL_MEMCOPY_ASYNC;
flush = false;
}
// Wait for the resources, since runtime may use async transfers
wait(gpu, waitOnBusyEngine);
dstResource.wait(gpu, waitOnBusyEngine);
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
calSrcOrigin[1] = srcOrigin[1];
calSrcOrigin[2] = srcOrigin[2];
calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
calDstOrigin[1] = dstOrigin[1];
calDstOrigin[2] = dstOrigin[2];
calSize[0] = size[0];
calSize[1] = size[1];
calSize[2] = size[2];
result = gpu.copyPartial(event,
gslResource(), calSrcOrigin,
dstResource.gslResource(), calDstOrigin,
calSize, static_cast<CALmemcopyflags>(syncFlags), enableCopyRect);
if (result) {
// Mark source and destination as busy
setBusy(gpu, event);
dstResource.setBusy(gpu, event);
// Update the global GPU event
gpu.setGpuEvent(event, (flush | flushDMA));
}
// Restore the original engine
gpu.engineID_ = activeEngineID;
return result;
}
void
Resource::setBusy(
VirtualGPU& gpu,
GpuEvent gpuEvent
) const
{
gpu.assignGpuEvent(this, gpuEvent);
// If current resource is a view, then update the parent event as well
if (viewOwner_ != NULL) {
viewOwner_->setBusy(gpu, gpuEvent);
}
}
void
Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const
{
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
// Check if we have to wait unconditionally
if (!waitOnBusyEngine ||
// or we have to wait only if another engine was used on this resource
(waitOnBusyEngine && (gpuEvent->engineId_ != gpu.engineID_))) {
gpu.waitForEvent(gpuEvent);
}
// If current resource is a view and not in the global heap,
// then wait for the parent event as well
if ((viewOwner_ != NULL) && (viewOwner_ != &dev().globalMem())) {
viewOwner_->wait(gpu, waitOnBusyEngine);
}
}
bool
Resource::hostWrite(
VirtualGPU* gpu,
const void* hostPtr,
const amd::Coord3D& origin,
const amd::Coord3D& size,
uint flags,
size_t rowPitch,
size_t slicePitch)
{
void* dst;
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
startLayer = origin[1];
numLayers = size[1];
}
// Get physical GPU memmory
dst = map(gpu, flags, startLayer, numLayers);
if (NULL == dst) {
LogError("Couldn't map GPU memory for host write");
return false;
}
if (1 == cal()->dimSize_) {
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
// Update the pointer
dst = static_cast<void*>(static_cast<char*>(dst) + origin[0]);
// Copy memory
amd::Os::fastMemcpy(dst, hostPtr, copySize);
}
else {
size_t srcOffs = 0;
size_t dstOffsBase = origin[0] * elementSize_;
size_t dstOffs;
// Make sure we use the right pitch if it's not specified
if (rowPitch == 0) {
rowPitch = size[0] * elementSize_;
}
// Make sure we use the right slice if it's not specified
if (slicePitch == 0) {
slicePitch = size[0] * size[1] * elementSize_;
}
// Adjust the destination offset with Y dimension
dstOffsBase += cal()->pitch_ * origin[1] * elementSize_;
// Adjust the destination offset with Z dimension
dstOffsBase += cal()->slice_ * origin[2] * elementSize_;
// Copy memory slice by slice
for (size_t slice = 0; slice < size[2]; ++slice) {
dstOffs = dstOffsBase + slice * cal()->slice_ * elementSize_;
srcOffs = slice * slicePitch;
// Copy memory line by line
for (size_t row = 0; row < size[1]; ++row) {
// Copy memory
amd::Os::fastMemcpy(
(reinterpret_cast<address>(dst) + dstOffs),
(reinterpret_cast<const_address>(hostPtr) + srcOffs),
size[0] * elementSize_);
dstOffs += cal()->pitch_ * elementSize_;
srcOffs += rowPitch;
}
}
}
// Unmap GPU memory
unmap(gpu);
return true;
}
bool
Resource::hostRead(
VirtualGPU* gpu,
void* hostPtr,
const amd::Coord3D& origin,
const amd::Coord3D& size,
size_t rowPitch,
size_t slicePitch)
{
void* src;
size_t startLayer = origin[2];
size_t numLayers = size[2];
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
startLayer = origin[1];
numLayers = size[1];
}
// Get physical GPU memmory
src = map(gpu, ReadOnly, startLayer, numLayers);
if (NULL == src) {
LogError("Couldn't map GPU memory for host read");
return false;
}
if (1 == cal()->dimSize_) {
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
// Update the pointer
src = static_cast<void*>(static_cast<char*>(src) + origin[0]);
// Copy memory
amd::Os::fastMemcpy(hostPtr, src, copySize);
}
else {
size_t srcOffsBase = origin[0] * elementSize_;
size_t srcOffs;
size_t dstOffs = 0;
// Make sure we use the right pitch if it's not specified
if (rowPitch == 0) {
rowPitch = size[0] * elementSize_;
}
// Make sure we use the right slice if it's not specified
if (slicePitch == 0) {
slicePitch = size[0] * size[1] * elementSize_;
}
// Adjust destination offset with Y dimension
srcOffsBase += cal()->pitch_ * origin[1] * elementSize_;
// Adjust the destination offset with Z dimension
srcOffsBase += cal()->slice_ * origin[2] * elementSize_;
// Copy memory line by line
for (size_t slice = 0; slice < size[2]; ++slice) {
srcOffs = srcOffsBase + slice * cal()->slice_ * elementSize_;
dstOffs = slice * slicePitch;
// Copy memory line by line
for (size_t row = 0; row < size[1]; ++row) {
// Copy memory
amd::Os::fastMemcpy(
(reinterpret_cast<address>(hostPtr) + dstOffs),
(reinterpret_cast<const_address>(src) + srcOffs),
size[0] * elementSize_);
srcOffs += cal()->pitch_ * elementSize_;
dstOffs += rowPitch;
}
}
}
// Unmap GPU memory
unmap(gpu);
return true;
}
bool
Resource::gslMap(void** ptr, size_t* pitch, gslMapAccessType flags, gslMemObject resource) const
{
bool result = true;
if (cal_.cardMemory_ || cal_.tiled_) {
// @todo remove const cast
result = const_cast<Device&>(dev()).resMapLocal(*ptr, *pitch, resource, flags);
}
else {
result = dev().resMapRemote(*ptr, *pitch, resource, flags);
}
return result;
}
bool
Resource::gslUnmap(gslMemObject resource) const
{
bool result = true;
if (cal_.cardMemory_) {
// @todo remove const cast
result = const_cast<Device&>(dev()).resUnmapLocal(resource);
}
else {
result = dev().resUnmapRemote(resource);
}
return result;
}
bool
Resource::gslGLAcquire()
{
bool retVal = true;
if (cal()->type_ == OGLInterop) {
//release is required only for depth resources
switch ((int)cal()->format_) {
case CM_SURF_FMT_DEPTH24_STEN8:
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
case CM_SURF_FMT_DEPTH32F:
case CM_SURF_FMT_DEPTH16:
retVal = dev().resGLAcquire(glPlatformContext_,glInteropMbRes_, glType_);
break;
}
}
return retVal;
}
bool
Resource::gslGLRelease()
{
bool retVal = true;
if (cal()->type_ == OGLInterop) {
//release is required only for depth resources
switch ((int)cal()->format_) {
case CM_SURF_FMT_DEPTH24_STEN8:
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
case CM_SURF_FMT_DEPTH32F:
case CM_SURF_FMT_DEPTH16:
retVal = dev().resGLRelease(glPlatformContext_,glInteropMbRes_);
break;
}
}
return retVal;
}
void
Resource::gslFree() const
{
if (cal()->type_ == OGLInterop) {
if (0 == gslRef_->resOriginal_) {
dev().resGLFree(glPlatformContext_, glDeviceContext_,
gslRef_->resource_, glInterop_, glInteropMbRes_, glType_);
gslRef_->resource_ = 0;
}
else {
dev().resFree(gslRef_->resource_);
gslRef_->resource_ = 0;
dev().resGLFree(glPlatformContext_, glDeviceContext_,
gslRef_->resOriginal_, glInterop_, glInteropMbRes_, glType_);
gslRef_->resOriginal_ = 0;
}
}
gslRef_->release();
}
bool
Resource::isMemoryType(MemoryType memType) const
{
if (memoryType() == memType) {
return true;
}
else if (memoryType() == View) {
return viewOwner_->isMemoryType(memType);
}
return false;
}
bool
Resource::isPersistentDirectMap() const
{
bool directMap = ((memoryType() == Resource::Persistent) &&
(cal()->dimSize_ < 3) && !cal()->imageArray_);
// If direct map is possible, then validate it with the current tiling
if (directMap && cal()->tiled_) {
//!@note IOL for Linux doesn't support tiling aperture
// and runtime doesn't force linear images in persistent
directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_;
}
return directMap;
}
void*
Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
{
if (isMemoryType(Pinned)) {
// Check if we have to wait
if (!(flags & NoWait)) {
if (gpu != NULL) {
wait(*gpu);
}
}
return address_;
}
gslMapAccessType mapFlags = GSL_MAP_READ_WRITE;
if (flags & ReadOnly) {
assert(!(flags & Discard) && "We can't use lock discard with read only!");
mapFlags = GSL_MAP_READ_ONLY;
}
if (flags & WriteOnly) {
mapFlags = GSL_MAP_WRITE_ONLY;
}
// Check if use map discard
if (flags & Discard) {
mapFlags = GSL_MAP_WRITE_ONLY;
if (gpu != NULL) {
// If we use a new renamed allocation, then skip the wait
if (rename(*gpu)) {
flags |= NoWait;
}
}
}
// Check if we have to wait
if (!(flags & NoWait)) {
if (gpu != NULL) {
wait(*gpu);
}
}
// Check if memory wasn't mapped yet
if (++mapCount_ == 1) {
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
// Save map info for multilayer map/unmap
startLayer_ = startLayer;
numLayers_ = numLayers;
mapFlags_ = mapFlags;
// Map with layers
address_ = mapLayers(gpu, mapFlags);
}
else {
// Map current resource
if (!gslMap(&address_, &cal_.pitch_, mapFlags, gslResource())) {
LogError("cal::ResMap failed!");
--mapCount_;
return NULL;
}
}
}
//! \note the atomic operation with counter doesn't
// guarantee that the address will be valid,
// since GSL could still process the first map
if (address_ == NULL) {
amd::Os::sleep(10);
assert((address_ != NULL) && "Multiple maps failed!");
}
return address_;
}
void*
Resource::mapLayers(VirtualGPU* gpu, CALuint flags)
{
size_t srcOffs = 0;
size_t dstOffs = 0;
gslMemObject sliceResource = 0;
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
size_t layers = cal()->depth_;
size_t height = cal()->height_;
// Use 1D layers
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
gslDim = GSL_MOA_TEXTURE_1D;
height = 1;
layers = cal()->height_;
}
cal_.pitch_ = cal()->width_;
cal_.slice_ = cal()->pitch_ * height;
address_ = new char [cal()->slice_ * layers * elementSize()];
if (NULL == address_) {
return NULL;
}
// Check if map is write only
if (flags == GSL_MAP_WRITE_ONLY) {
return address_;
}
if (numLayers_ != 0) {
layers = startLayer_ + numLayers_;
}
dstOffs = startLayer_ * cal()->slice_ * elementSize();
// Loop through all layers
for (uint i = startLayer_; i < layers; ++i) {
gslResource3D gslSize;
CALdomain calOffset;
void* sliceAddr;
size_t pitch;
// Allocate a layer from the image
gslSize.width = cal()->width_;
gslSize.height = height;
gslSize.depth = 1;
calOffset.x = 0;
calOffset.y = 0;
calOffset.width = 0;
calOffset.height = 0;
sliceResource = dev().resAllocView(
gslResource(), gslSize,
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
if (0 == sliceResource) {
LogError("Map layer. resAllocSliceView failed!");
return NULL;
}
// Map 2D layer
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_READ_ONLY, sliceResource)) {
LogError("Map layer. CalResMap failed!");
return NULL;
}
srcOffs = 0;
// Copy memory line by line
for (size_t rows = 0; rows < height; ++rows) {
// Copy memory
amd::Os::fastMemcpy(
(reinterpret_cast<address>(address_) + dstOffs),
(reinterpret_cast<const_address>(sliceAddr) + srcOffs),
cal()->width_ * elementSize_);
dstOffs += cal()->pitch_ * elementSize();
srcOffs += pitch * elementSize();
}
// Unmap a layer
if (!gslUnmap(sliceResource)) {
LogError("Map layer. CalResUnmap failed!");
}
dev().resFree(sliceResource);
}
return address_;
}
void
Resource::unmap(VirtualGPU* gpu)
{
if (isMemoryType(Pinned)) {
return;
}
// Decrement map counter
int count = --mapCount_;
// Check if it's the last unmap
if (count == 0) {
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
// Unmap layers
unmapLayers(gpu);
}
else {
// Unmap current resource
if (!gslUnmap(gslResource())) {
LogError("CalResUnmap failed!");
}
}
address_ = NULL;
}
else if (count < 0) {
LogError("dev().serialCalResUnmap failed!");
++mapCount_;
return;
}
}
void
Resource::unmapLayers(VirtualGPU* gpu)
{
size_t srcOffs = 0;
size_t dstOffs = 0;
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
gslMemObject sliceResource = NULL;
CALuint layers = cal()->depth_;
CALuint height = cal()->height_;
// Use 1D layers
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
gslDim = GSL_MOA_TEXTURE_1D;
height = 1;
layers = cal()->height_;
}
if (numLayers_ != 0) {
layers = startLayer_ + numLayers_;
}
srcOffs = startLayer_ * cal()->slice_ * elementSize();
// Check if map is write only
if (!(mapFlags_ == GSL_MAP_READ_ONLY)) {
// Loop through all layers
for (uint i = startLayer_; i < layers; ++i) {
gslResource3D gslSize;
CALdomain calOffset;
void* sliceAddr;
size_t pitch;
// Allocate a layer from the image
gslSize.width = cal()->width_;
gslSize.height = height;
gslSize.depth = 1;
calOffset.x = 0;
calOffset.y = 0;
calOffset.width = 0;
calOffset.height = 0;
sliceResource = dev().resAllocView(
gslResource(), gslSize,
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
if (0 == sliceResource) {
LogError("Unmap layer. resAllocSliceView failed!");
return;
}
// Map a layer
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_WRITE_ONLY, sliceResource)) {
LogError("Unmap layer. CalResMap failed!");
return;
}
dstOffs = 0;
// Copy memory line by line
for (size_t rows = 0; rows < height; ++rows) {
// Copy memory
amd::Os::fastMemcpy(
(reinterpret_cast<address>(sliceAddr) + dstOffs),
(reinterpret_cast<const_address>(address_) + srcOffs),
cal()->width_ * elementSize_);
dstOffs += pitch * elementSize();
srcOffs += cal()->pitch_ * elementSize();
}
// Unmap a layer
if (!gslUnmap(sliceResource)) {
LogError("Unmap layer. CalResUnmap failed!");
}
dev().resFree(sliceResource);
}
}
// Destroy the mapped memory
delete [] reinterpret_cast<char*>(address_);
}
void
Resource::setActiveRename(VirtualGPU& gpu, GslResourceReference* rename)
{
// Copy the unique GSL data
gslRef_ = rename;
address_ = rename->cpuAddress_;
if (dev().heap()->isVirtual()) {
hbOffset_ = rename->gslResource()->getSurfaceAddress() -
dev().heap()->baseAddress();
}
}
bool
Resource::getActiveRename(VirtualGPU& gpu, GslResourceReference** rename)
{
// Copy the old data to the rename descriptor
*rename = gslRef_;
return true;
}
bool
Resource::rename(VirtualGPU& gpu, bool force)
{
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
if (!gpuEvent->isValid() && !force) {
return true;
}
bool useNext = false;
CALuint resSize = cal()->width_ * ((cal()->height_) ? cal()->height_ : 1) *
elementSize_;
// Rename will work with real GSL resources
if (((memoryType() != Local) &&
(memoryType() != Persistent) &&
(memoryType() != Remote) &&
(memoryType() != RemoteUSWC)) ||
(dev().settings().maxRenames_ == 0)) {
return false;
}
// If the resource for renaming is too big, then lets check the current status first
// at the cost of an extra flush
if (resSize >= (dev().settings().maxRenameSize_ / dev().settings().maxRenames_)) {
if (gpu.isDone(gpuEvent)) {
return true;
}
}
// Save the first
if (renames_.size() == 0) {
GslResourceReference* rename;
if (mapCount_ > 0) {
gslRef_->cpuAddress_ = address_;
}
if (!getActiveRename(gpu, &rename)) {
return false;
}
curRename_ = renames_.size();
renames_.push_back(rename);
}
// Can we use a new rename?
if ((renames_.size() <= dev().settings().maxRenames_) &&
((renames_.size() * resSize) <= dev().settings().maxRenameSize_)) {
GslResourceReference* rename;
// Create a new GSL allocation
if (create(memoryType())) {
if (mapCount_ > 0) {
assert(!cal()->cardMemory_ && "Unsupported memory type!");
if (!dev().resMapRemote(gslRef_->cpuAddress_, cal_.pitch_,
gslResource(), GSL_MAP_READ_WRITE)) {
LogError("gslMap fails on rename!");
}
address_ = gslRef_->cpuAddress_;
}
if (getActiveRename(gpu, &rename)) {
curRename_ = renames_.size();
renames_.push_back(rename);
}
else {
gslRef_->release();
useNext = true;
}
}
else {
useNext = true;
}
}
else {
useNext = true;
}
if (useNext) {
// Get the last submitted
curRename_++;
if (curRename_ >= renames_.size()) {
curRename_ = 0;
}
setActiveRename(gpu, renames_[curRename_]);
return false;
}
return true;
}
void
Resource::warmUpRenames(VirtualGPU& gpu)
{
for (uint i = 0; i < dev().settings().maxRenames_; ++i) {
const bool force = true;
rename(gpu, force);
}
}
Resource*
Resource::getAliasUAVBuffer(cmSurfFmt newFormat)
{
Resource* view = NULL;
uint byteSize;
// Lock device so a view allocation is unique operation
amd::ScopedLock k(dev().gslDeviceOps());
if (newFormat == CM_SURF_FMT_R8I) {
view = byteView_;
byteSize = 1;
}
else if (newFormat == CM_SURF_FMT_R16I) {
view = shortView_;
byteSize = 2;
}
else { // only take byte and short
assert(false && "Unsupported format for a view");
return NULL;
}
// allocate byte/short view
if (NULL == view) {
view = new Resource(dev(), (cal()->width_ * elementSize()) / byteSize, newFormat);
if (view == NULL) {
return NULL;
}
Resource::ViewParams params;
params.offset_ = 0;
params.size_ = cal()->width_ * elementSize();
params.resource_ = this;
if (!view->create(Resource::View, &params)) {
delete view;
return NULL;
}
// save view resource
if (newFormat == CM_SURF_FMT_R8I) {
byteView_ = view;
}
else if (newFormat == CM_SURF_FMT_R16I) {
shortView_ = view;
}
}
return view;
}
ResourceCache::~ResourceCache()
{
free();
}
//! \note the cache works in FILO mode
bool
ResourceCache::addCalResource(
Resource::CalResourceDesc* desc, GslResourceReference* ref)
{
amd::ScopedLock l(&lockCacheOps_);
bool result = false;
size_t size = getResourceSize(desc);
// Make sure current allocation isn't bigger than cache
if (((desc->type_ == Resource::Local) ||
(desc->type_ == Resource::Persistent) ||
(desc->type_ == Resource::Remote) ||
(desc->type_ == Resource::RemoteUSWC)) &&
(size < cacheSizeLimit_) &&
!desc->SVMRes_) {
// Validate the cache size limit. Loop until we have enough space
while ((cacheSize_ + size) > cacheSizeLimit_) {
removeLast();
}
Resource::CalResourceDesc* descCached = new Resource::CalResourceDesc;
if (descCached != NULL) {
// Copy the original desc to the cached version
memcpy(descCached, desc, sizeof(Resource::CalResourceDesc));
// Add the current resource to the cache
resCache_.push_front(std::make_pair(descCached, ref));
cacheSize_ += size;
result = true;
}
}
return result;
}
GslResourceReference*
ResourceCache::findCalResource(Resource::CalResourceDesc* desc)
{
amd::ScopedLock l(&lockCacheOps_);
bool found = false;
GslResourceReference* ref = NULL;
size_t size = getResourceSize(desc);
// Early exit if resource is too big
if (size >= cacheSizeLimit_ || desc->SVMRes_) {
//! \note we may need to free the cache here to reduce memory pressure
return ref;
}
// Serach the right resource through the cache list
std::list<std::pair<Resource::CalResourceDesc*,
GslResourceReference*> >::const_iterator it;
for (it = resCache_.begin(); it != resCache_.end(); ++it) {
Resource::CalResourceDesc* entry = it->first;
// Find if we can reuse this entry
if ((entry->dimension_ == desc->dimension_) &&
(entry->type_ == desc->type_) &&
(entry->width_ == desc->width_) &&
(entry->height_ == desc->height_) &&
(entry->depth_ == desc->depth_) &&
(entry->channelOrder_ == desc->channelOrder_) &&
(entry->format_ == desc->format_) &&
(entry->flags_ == desc->flags_)) {
ref = it->second;
delete it->first;
found = true;
break;
}
}
if (found) {
// Remove the found etry from the cache
resCache_.remove(*it);
cacheSize_ -= size;
}
return ref;
}
bool
ResourceCache::free(size_t minCacheEntries)
{
amd::ScopedLock l(&lockCacheOps_);
bool result = false;
if (minCacheEntries < resCache_.size()) {
if (static_cast<int>(cacheSize_) > 0) {
result = true;
}
// Clear the cache
while (static_cast<int>(cacheSize_) > 0) {
removeLast();
}
CondLog((cacheSize_ != 0), "Incorrect size for cache release!");
}
return result;
}
size_t
ResourceCache::getResourceSize(Resource::CalResourceDesc* desc)
{
// Find the total amount of elements
size_t size =
desc->width_ *
((desc->height_) ? desc->height_ : 1) *
((desc->depth_) ? desc->depth_: 1);
// Find total size in bytes
size *= static_cast<size_t>(memoryFormatSize(desc->format_).size_);
return size;
}
void
ResourceCache::removeLast()
{
std::pair<Resource::CalResourceDesc*, GslResourceReference*> entry;
entry = resCache_.back();
resCache_.pop_back();
size_t size = getResourceSize(entry.first);
// Delete CalResourceDesc
delete entry.first;
// Destroy GSL resource
entry.second->release();
cacheSize_ -= size;
}
} // namespace gpu