268ff9830e
EPR #402950 - Properly align the scratch buffer to 64K. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#452 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#189 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/include/cal/calcl.h#29 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#95 edit
2105 líneas
62 KiB
C++
2105 líneas
62 KiB
C++
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "os/os.hpp"
|
|
#include "device/device.hpp"
|
|
#include "utils/flags.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/gpu/gpuresource.hpp"
|
|
#include "device/gpu/gpudevice.hpp"
|
|
#include "device/gpu/gpublit.hpp"
|
|
#include "device/gpu/gputimestamp.hpp"
|
|
#include "thread/atomic.hpp"
|
|
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <cmath>
|
|
|
|
namespace gpu {
|
|
|
|
GslResourceReference::GslResourceReference(
|
|
const Device& gpuDev,
|
|
gslMemObject gslResource,
|
|
gslMemObject gslResOriginal
|
|
)
|
|
: device_(gpuDev)
|
|
, resource_(gslResource)
|
|
, resOriginal_(gslResOriginal)
|
|
, cpuAddress_(NULL)
|
|
{
|
|
}
|
|
|
|
GslResourceReference::~GslResourceReference()
|
|
{
|
|
if (cpuAddress_ != NULL) {
|
|
device_.resUnmapRemote(gslResource());
|
|
}
|
|
if (0 != gslResource()) {
|
|
device_.resFree(gslResource());
|
|
resource_ = NULL;
|
|
}
|
|
|
|
if (0 != gslOriginal()) {
|
|
device_.resFree(gslOriginal());
|
|
resOriginal_ = NULL;
|
|
}
|
|
}
|
|
|
|
Resource::Resource(
|
|
const Device& gpuDev,
|
|
size_t width,
|
|
cmSurfFmt format)
|
|
: elementSize_(0)
|
|
, gpuDevice_(gpuDev)
|
|
, mapCount_(0)
|
|
, address_(NULL)
|
|
, offset_(0)
|
|
, curRename_(0)
|
|
, gslRef_(NULL)
|
|
, viewOwner_(NULL)
|
|
, hbOffset_(0)
|
|
, hbSize_(0)
|
|
, pinOffset_(0)
|
|
, byteView_(NULL)
|
|
, shortView_(NULL)
|
|
, glInterop_(0)
|
|
, gpu_(NULL)
|
|
{
|
|
// Fill GSL descriptor fields
|
|
cal_.type_ = Empty;
|
|
cal_.width_ = width;
|
|
cal_.height_ = 1;
|
|
cal_.depth_ = 1;
|
|
cal_.format_ = format;
|
|
cal_.flags_ = 0;
|
|
cal_.pitch_ = 0;
|
|
cal_.slice_ = 0;
|
|
cal_.channelOrder_ = GSL_CHANNEL_ORDER_REPLICATE_R;
|
|
cal_.dimension_ = GSL_MOA_BUFFER;
|
|
cal_.cardMemory_ = true;
|
|
cal_.dimSize_ = 1;
|
|
cal_.buffer_ = true;
|
|
cal_.imageArray_ = false;
|
|
cal_.imageType_ = 0;
|
|
cal_.SVMRes_ = false;
|
|
}
|
|
|
|
Resource::Resource(
|
|
const Device& gpuDev,
|
|
size_t width,
|
|
size_t height,
|
|
size_t depth,
|
|
cmSurfFmt format,
|
|
gslChannelOrder chOrder,
|
|
cl_mem_object_type imageType)
|
|
: elementSize_(0)
|
|
, gpuDevice_(gpuDev)
|
|
, mapCount_(0)
|
|
, address_(NULL)
|
|
, offset_(0)
|
|
, curRename_(0)
|
|
, gslRef_(NULL)
|
|
, viewOwner_(NULL)
|
|
, hbOffset_(0)
|
|
, hbSize_(0)
|
|
, pinOffset_(0)
|
|
, byteView_(NULL)
|
|
, shortView_(NULL)
|
|
, glInterop_(0)
|
|
, gpu_(NULL)
|
|
{
|
|
// Fill GSL descriptor fields
|
|
cal_.type_ = Empty;
|
|
cal_.width_ = width;
|
|
cal_.height_ = height;
|
|
cal_.depth_ = depth;
|
|
cal_.format_ = format;
|
|
cal_.flags_ = 0;
|
|
cal_.pitch_ = 0;
|
|
cal_.slice_ = 0;
|
|
cal_.channelOrder_ = chOrder;
|
|
cal_.cardMemory_ = true;
|
|
cal_.buffer_ = false;
|
|
cal_.imageArray_ = false;
|
|
cal_.imageType_ = imageType;
|
|
cal_.SVMRes_ = false;
|
|
|
|
switch (imageType) {
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_2D;
|
|
cal_.dimSize_ = 2;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_3D;
|
|
cal_.dimSize_ = 3;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_2D_ARRAY;
|
|
cal_.dimSize_ = 3;
|
|
cal_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_1D;
|
|
cal_.dimSize_ = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_1D_ARRAY;
|
|
cal_.dimSize_ = 2;
|
|
cal_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_BUFFER;
|
|
cal_.dimSize_ = 1;
|
|
break;
|
|
default:
|
|
cal_.dimSize_ = 1;
|
|
LogError("Unknown image type!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
Resource::~Resource()
|
|
{
|
|
free();
|
|
}
|
|
|
|
static uint32_t GetHSAILImageFormatType(cmSurfFmt format)
|
|
{
|
|
uint32_t formatType = 0;
|
|
|
|
switch (format)
|
|
{
|
|
case CM_SURF_FMT_INTENSITY8:
|
|
case CM_SURF_FMT_RG8:
|
|
case CM_SURF_FMT_RGBA8:
|
|
case CM_SURF_FMT_RGBX8UI:
|
|
case CM_SURF_FMT_RGBA8_SRGB:
|
|
formatType = 2;
|
|
break;
|
|
case CM_SURF_FMT_R16:
|
|
case CM_SURF_FMT_RG16:
|
|
case CM_SURF_FMT_RGBA16:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
formatType = 3;
|
|
break;
|
|
/*
|
|
case HSA_IMAGE_FMT_R5G6B5_UNORM:
|
|
formatType = 4;
|
|
break;
|
|
case HSA_IMAGE_FMT_R5G5B5_UNORM:
|
|
formatType = 5;
|
|
break;
|
|
case HSA_IMAGE_FMT_R10G10B10_UNORM:
|
|
formatType = 6;
|
|
break;
|
|
*/
|
|
case CM_SURF_FMT_BGR10_X2:
|
|
formatType = 7;
|
|
break;
|
|
case CM_SURF_FMT_sR8:
|
|
case CM_SURF_FMT_sRG8:
|
|
case CM_SURF_FMT_sRGBA8:
|
|
formatType = 0;
|
|
break;
|
|
case CM_SURF_FMT_sU16:
|
|
case CM_SURF_FMT_sUV16:
|
|
case CM_SURF_FMT_sUVWQ16:
|
|
formatType = 1;
|
|
break;
|
|
case CM_SURF_FMT_R8I:
|
|
case CM_SURF_FMT_RG8I:
|
|
case CM_SURF_FMT_RGBA8UI:
|
|
formatType = 11;
|
|
break;
|
|
case CM_SURF_FMT_R16I:
|
|
case CM_SURF_FMT_RG16I:
|
|
case CM_SURF_FMT_RGBA16UI:
|
|
formatType = 12;
|
|
break;
|
|
case CM_SURF_FMT_R32I:
|
|
case CM_SURF_FMT_RG32I:
|
|
case CM_SURF_FMT_RGBA32UI:
|
|
formatType = 13;
|
|
break;
|
|
case CM_SURF_FMT_sR8I:
|
|
case CM_SURF_FMT_sRG8I:
|
|
case CM_SURF_FMT_sRGBA8I:
|
|
formatType = 8;
|
|
break;
|
|
case CM_SURF_FMT_sR16I:
|
|
case CM_SURF_FMT_sRG16I:
|
|
case CM_SURF_FMT_sRGBA16I:
|
|
formatType = 9;
|
|
break;
|
|
case CM_SURF_FMT_sR32I:
|
|
case CM_SURF_FMT_sRG32I:
|
|
case CM_SURF_FMT_sRGBA32I:
|
|
formatType = 10;
|
|
break;
|
|
case CM_SURF_FMT_R32F:
|
|
case CM_SURF_FMT_RG32F:
|
|
case CM_SURF_FMT_RGBA32F:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
formatType = 15;
|
|
break;
|
|
case CM_SURF_FMT_R16F:
|
|
case CM_SURF_FMT_RG16F:
|
|
case CM_SURF_FMT_RGBA16F:
|
|
formatType = 14;
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
|
|
return formatType;
|
|
}
|
|
|
|
static uint32_t GetHSAILImageOrderType(gslChannelOrder chOrder)
|
|
{
|
|
uint32_t orderType = 0;
|
|
|
|
switch (chOrder)
|
|
{
|
|
case GSL_CHANNEL_ORDER_R:
|
|
orderType = 1;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_A:
|
|
orderType = 0;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_LUMINANCE:
|
|
orderType = 17;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_INTENSITY:
|
|
orderType = 16;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RG:
|
|
orderType = 3;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RA:
|
|
orderType = 5;
|
|
break;
|
|
/*
|
|
case HSA_IMAGE_FMT_R5G6B5_UNORM:
|
|
case HSA_IMAGE_FMT_R5G5B5_UNORM:
|
|
case HSA_IMAGE_FMT_R10G10B10_UNORM:
|
|
orderType = 6;
|
|
break;*/
|
|
case GSL_CHANNEL_ORDER_RGB:
|
|
orderType = 6;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RGBA:
|
|
orderType = 8;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_ARGB:
|
|
orderType = 10;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_BGRA:
|
|
orderType = 9;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGB:
|
|
orderType = 12;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGBX:
|
|
orderType = 13;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGBA:
|
|
orderType = 14;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SBGRA:
|
|
orderType = 15;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_REPLICATE_R:
|
|
orderType = 18;
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
|
|
return orderType;
|
|
}
|
|
|
|
bool
|
|
Resource::create(MemoryType memType, CreateParams* params, bool heap)
|
|
{
|
|
bool calRes = false;
|
|
gslMemObject gslResource = 0;
|
|
gslMemObject gslResOriginal = 0;
|
|
const amd::HostMemoryReference* hostMemRef = NULL;
|
|
bool imageCreateView = false;
|
|
CALuint hostMemOffset = 0;
|
|
bool foundCalRef = false;
|
|
bool viewDefined = false;
|
|
uint viewLayer = 0;
|
|
uint viewLevel = 0;
|
|
uint viewFlags = 0;
|
|
gslResource3D viewSize = {0};
|
|
CALdomain viewOffset = {0};
|
|
cmSurfFmt viewSurfFmt;
|
|
gslChannelOrder viewChannelOrder = GSL_CHANNEL_ORDER_UNSPECIFIED;
|
|
gslMemObjectAttribType viewResType;
|
|
CALresourceDesc desc;
|
|
uint64 bytePitch = (uint64)-1;
|
|
bool useRowPitch = false;
|
|
|
|
desc.vaBase = 0;
|
|
desc.minAlignment = 0;
|
|
desc.section = GSL_SECTION_REGULAR;
|
|
if (NULL != params && NULL != params->owner_) { //make sure params not NULL
|
|
mcaddr svmPtr = reinterpret_cast<mcaddr>(params->owner_->getSvmPtr());
|
|
desc.vaBase = (svmPtr == 1)? 0:svmPtr;
|
|
cal_.SVMRes_ = (svmPtr != 0);
|
|
desc.section = (svmPtr != 0) ? GSL_SECTION_SVM : GSL_SECTION_REGULAR;
|
|
|
|
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
|
|
desc.section = GSL_SECTION_SVM_ATOMICS;
|
|
}
|
|
}
|
|
// This is a thread safe operation
|
|
const_cast<Device&>(dev()).initializeHeapResources();
|
|
|
|
// Get the element size
|
|
elementSize_ = static_cast<CALuint>(memoryFormatSize(cal()->format_).size_);
|
|
cal_.type_ = memType;
|
|
if (memType == Scratch) {
|
|
cal_.type_ = Local;
|
|
}
|
|
|
|
// Force remote allocation if it was requested in the settings
|
|
if (dev().settings().remoteAlloc_ && !heap &&
|
|
((memoryType() == Local) ||
|
|
(memoryType() == Persistent))) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
|
|
if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
|
|
if (cal()->buffer_) {
|
|
// Force linear tiling for buffer alloctions
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
}
|
|
|
|
if (params != NULL) {
|
|
gpu_ = params->gpu_;
|
|
}
|
|
|
|
switch (memoryType()) {
|
|
case Heap:
|
|
gslResource = dev().resGetHeap(0);
|
|
if (gslResource == 0) {
|
|
return false;
|
|
}
|
|
calRes = true;
|
|
cal_.width_ = static_cast<size_t>(gslResource->getPitch());
|
|
cal_.pitch_ = static_cast<size_t>(gslResource->getPitch());
|
|
break;
|
|
case Persistent:
|
|
if (dev().settings().linearPersistentImage_) {
|
|
// Force linear tiling for image allocations in persistent
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
}
|
|
// Fall through ...
|
|
case RemoteUSWC:
|
|
case Remote:
|
|
case BusAddressable:
|
|
case ExternalPhysical:
|
|
// Fall through to process the memory allocation ...
|
|
case Local: {
|
|
if (cal()->buffer_) {
|
|
//! @todo Remove alignment.
|
|
//! GSL asserts in mem copy with an unaligned size
|
|
cal_.width_ = amd::alignUp(cal_.width_, 64);
|
|
}
|
|
|
|
desc.dimension = cal()->dimension_;
|
|
desc.size.width = cal()->width_;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.flags = cal()->flags_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = NULL;
|
|
|
|
do {
|
|
// Find a type for allocation
|
|
if (memoryType() == Persistent) {
|
|
desc.type = GSL_MOA_MEMORY_CARD_LOCKABLE;
|
|
}
|
|
else if (memoryType() == Remote) {
|
|
desc.type = GSL_MOA_MEMORY_REMOTE_CACHEABLE;
|
|
}
|
|
else if (memoryType() == RemoteUSWC) {
|
|
desc.type = GSL_MOA_MEMORY_AGP;
|
|
}
|
|
else if (memoryType() == BusAddressable){
|
|
desc.type = GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE;
|
|
}
|
|
else if (memoryType() == ExternalPhysical){
|
|
desc.type = GSL_MOA_MEMORY_CARD_EXTERNAL_PHYSICAL;
|
|
cl_bus_address_amd bus_address =
|
|
(reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
|
|
desc.busAddress[0] = bus_address.surface_bus_address;
|
|
desc.busAddress[1] = bus_address.marker_bus_address;
|
|
}
|
|
else {
|
|
desc.type = GSL_MOA_MEMORY_CARD_EXT_NONEXT;
|
|
}
|
|
|
|
// Check resource cache first for an appropriate resource
|
|
gslRef_ = dev().resourceCache().findCalResource(&cal_);
|
|
if (memType == Scratch) {
|
|
if ((dev().settings().hsail_) || (dev().settings().oclVersion_ >= OpenCL20)) {
|
|
desc.minAlignment = 64 * Ki;
|
|
}
|
|
else {
|
|
desc.vaBase = static_cast<mcaddr>(0x100000000ULL);
|
|
}
|
|
}
|
|
else if ((gslRef_ != NULL) && (!dev().settings().use64BitPtr_)) {
|
|
// Make sure runtime didn't pick a resource with > 4GB address
|
|
if ((cal()->dimension_ == GSL_MOA_BUFFER) &&
|
|
(static_cast<uint64_t>(gslRef_->gslResource()->getSurfaceAddress() +
|
|
gslRef_->gslResource()->getSurfaceSize()) > (uint64_t(4) * Gi))) {
|
|
gslRef_->release();
|
|
gslRef_ = NULL;
|
|
}
|
|
}
|
|
// Try to allocate memory if we couldn't find a cached resource
|
|
if (gslRef_ == NULL) {
|
|
// Allocate memory
|
|
gslResource = dev().resAlloc(&desc);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
}
|
|
else {
|
|
calRes = true;
|
|
gslResource = gslRef_->gslOriginal();
|
|
foundCalRef = true;
|
|
}
|
|
|
|
// If GSL fails allocation then try other heaps
|
|
if (!calRes) {
|
|
// Free cache if we failed allocation
|
|
if (dev().resourceCache().free()) {
|
|
// We freed something - attempt to allocate memory again
|
|
continue;
|
|
}
|
|
|
|
// Local to Persistent
|
|
if (memoryType() == Local) {
|
|
cal_.type_ = Persistent;
|
|
}
|
|
else if (!heap && (memoryType() == Persistent)) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
// Remote cacheable to uncacheable
|
|
else if (memoryType() == Remote) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (!calRes);
|
|
}
|
|
break;
|
|
case Pinned: {
|
|
PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
|
|
CALuint allocSize = static_cast<CALuint>(pinned->size_);
|
|
void* pinAddress;
|
|
hostMemRef = pinned->hostMemRef_;
|
|
pinAddress = address_ = hostMemRef->hostMem();
|
|
|
|
// Use untiled allocation
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
|
|
desc.size.width = cal()->width_;
|
|
|
|
if (cal()->dimension_ == GSL_MOA_BUFFER) {
|
|
// Allign offset to 4K boundary (Vista/Win7 limitation)
|
|
char* tmpHost = const_cast<char*>(
|
|
amd::alignDown(reinterpret_cast<const char*>(address_),
|
|
PinnedMemoryAlignment));
|
|
|
|
// Find the partial size for unaligned copy
|
|
hostMemOffset = static_cast<CALuint>(
|
|
reinterpret_cast<const char*>(address_) - tmpHost);
|
|
|
|
pinOffset_ = hostMemOffset & 0xff;
|
|
//!@note GSL has a problem with the defines for flags and
|
|
//! view creation, so check the restriction here
|
|
if (!dev().heap()->isVirtual() && (pinOffset_ != 0)) {
|
|
return false;
|
|
}
|
|
|
|
pinAddress = tmpHost;
|
|
// Align width to avoid GSL useless assert with a view
|
|
if (hostMemOffset != 0) {
|
|
desc.size.width += hostMemOffset / elementSize();
|
|
desc.size.width = amd::alignUp(desc.size.width, 64);
|
|
}
|
|
hostMemOffset &= ~(0xff);
|
|
}
|
|
else if (cal()->dimension_ == GSL_MOA_TEXTURE_2D) {
|
|
//! @todo: Width has to be aligned for 3D.
|
|
//! Need to be replaced with a compute copy
|
|
// Width aligned by 8 texels
|
|
if (((cal()->width_ % 0x8) != 0) ||
|
|
// Pitch aligned by 64 bytes
|
|
(((cal()->width_ * elementSize()) % 0x40) != 0)) {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
//! @todo GSL doesn't support pinning with resAlloc_
|
|
return false;
|
|
}
|
|
|
|
// Fill the GSL desc info structure
|
|
desc.dimension = cal()->dimension_;
|
|
desc.type = GSL_MOA_MEMORY_SYSTEM;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = reinterpret_cast<CALvoid*>(pinAddress);
|
|
desc.flags = 0;
|
|
|
|
// Ensure page alignment
|
|
if ((CALuint64)desc.systemMemory & (amd::Os::pageSize() - 1)) {
|
|
return false;
|
|
}
|
|
|
|
gslResource = dev().resAlloc(&desc);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
else {
|
|
pinOffset_ = 0;
|
|
}
|
|
}
|
|
break;
|
|
case View: {
|
|
// Save the offset in the global heap
|
|
ViewParams* view = reinterpret_cast<ViewParams*>(params);
|
|
offset_ = view->offset_;
|
|
|
|
// Make sure parent was provided
|
|
if (NULL != view->resource_) {
|
|
viewOwner_ = view->resource_;
|
|
uint64 bytePitch = (view->size_ + viewOwner_->pinOffset());
|
|
viewSize.width = bytePitch / elementSize();
|
|
viewSize.height = 1;
|
|
viewSize.depth = 1;
|
|
viewOffset.x = static_cast<CALuint>(offset() / elementSize());
|
|
viewOffset.y = 0;
|
|
viewOffset.width = 0;
|
|
viewOffset.height = 0;
|
|
|
|
gslResource = dev().resAllocView(
|
|
view->resource_->gslResource(), viewSize, viewOffset,
|
|
cal()->format_, GSL_CHANNEL_ORDER_REPLICATE_R,
|
|
cal()->dimension_, 0, 0, cal()->flags_, bytePitch);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
|
|
// Check if it's a heap allocation
|
|
if (!dev().heap()->isVirtual()) {
|
|
if (viewOwner_ == &dev().globalMem()) {
|
|
// Allocation directly from the heap
|
|
hbOffset_ = static_cast<uint64_t>(view->offset_);
|
|
}
|
|
else {
|
|
// Allocation from another memory object
|
|
hbOffset_ = static_cast<uint64_t>(view->offset_) +
|
|
viewOwner_->hbOffset();
|
|
}
|
|
hbSize_ = view->size_;
|
|
}
|
|
|
|
if (viewOwner_->isMemoryType(Pinned)) {
|
|
address_ = viewOwner_->data() + offset();
|
|
}
|
|
pinOffset_ = viewOwner_->pinOffset();
|
|
}
|
|
else {
|
|
cal_.type_ = Empty;
|
|
}
|
|
}
|
|
break;
|
|
case ImageView: {
|
|
ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
|
|
imageCreateView = true;
|
|
viewLayer = imageView->layer_;
|
|
viewLevel = imageView->level_;
|
|
gslResource = imageView->resource_->gslResource();
|
|
viewOwner_ = imageView->resource_;
|
|
if (viewLayer != 0) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
calRes = true;
|
|
}
|
|
break;
|
|
case ImageBuffer: {
|
|
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
|
|
imageCreateView = true;
|
|
gslResource = imageBuffer->resource_->gslResource();
|
|
viewOwner_ = imageBuffer->resource_;
|
|
calRes = true;
|
|
useRowPitch = true;
|
|
}
|
|
break;
|
|
case OGLInterop: {
|
|
OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
|
|
assert(oglRes->glPlatformContext_ &&
|
|
"We don't have OGL context!");
|
|
switch (oglRes->type_) {
|
|
case InteropVertexBuffer:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_VERTEXBUFFER;
|
|
break;
|
|
case InteropRenderBuffer:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_RENDERBUFFER;
|
|
break;
|
|
case InteropTexture:
|
|
case InteropTextureViewLevel:
|
|
case InteropTextureViewCube:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_TEXTURE;
|
|
break;
|
|
default:
|
|
LogError("Unknown OGL interop type!");
|
|
return false;
|
|
break;
|
|
}
|
|
glPlatformContext_ = oglRes->glPlatformContext_;
|
|
glDeviceContext_ = oglRes->glDeviceContext_;
|
|
CALGSLDevice::GLResAssociate resData = {0};
|
|
resData.GLContext = oglRes->glPlatformContext_;
|
|
resData.GLdeviceContext = oglRes->glDeviceContext_;
|
|
resData.name = oglRes->handle_;
|
|
resData.type = glType_;
|
|
// We need not pass any flags down to OGL for interop
|
|
resData.flags = 0;
|
|
|
|
if (dev().resGLAssociate(resData)) {
|
|
gslResource = resData.memObject;
|
|
glInteropMbRes_ = resData.mbResHandle;
|
|
glInterop_ = resData.mem_base;
|
|
calRes = true;
|
|
}
|
|
|
|
// Check if we have to create a view
|
|
if (calRes &&
|
|
((oglRes->type_ == InteropTextureViewLevel) ||
|
|
(oglRes->type_ == InteropTextureViewCube))) {
|
|
imageCreateView = true;
|
|
viewLayer = oglRes->layer_;
|
|
viewLevel = oglRes->mipLevel_;
|
|
|
|
// Find the view parameters
|
|
if (InteropTextureViewLevel == oglRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
|
|
}
|
|
else if (InteropTextureViewCube == oglRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
else {
|
|
LogError("Unknown Interop View Type");
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
#ifdef _WIN32
|
|
case D3D9Interop:
|
|
case D3D10Interop:
|
|
case D3D11Interop: {
|
|
D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
|
|
desc.dimension = cal()->dimension_;
|
|
desc.size.width = cal()->width_;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.flags = cal()->flags_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = NULL;
|
|
switch (d3dRes->misc) {
|
|
case 1: // NV12 format
|
|
case 2: // YV12 format
|
|
// Readjust the size to the original NV12/YV12 size, since runtime
|
|
// creates an interop for all planes
|
|
switch (d3dRes->layer_) {
|
|
case 0:
|
|
desc.size.height = 3 * desc.size.height / 2;
|
|
break;
|
|
case 1:
|
|
case 2:
|
|
// Force R8 format for the interop allocation by default
|
|
if (1 == d3dRes->misc) {
|
|
desc.format = CM_SURF_FMT_R8;
|
|
desc.channelOrder = GSL_CHANNEL_ORDER_R;
|
|
}
|
|
desc.size.width = 2 * desc.size.width;
|
|
desc.size.height = 3 * desc.size.height;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Create an interop GSL object
|
|
gslResource = dev().resMapD3DResource(
|
|
&desc, (CALuint64)d3dRes->handle_, (memoryType() != D3D9Interop));
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
|
|
|
|
// Check if we have to create a view
|
|
if (calRes &&
|
|
((d3dRes->type_ == InteropTextureViewLevel) ||
|
|
(d3dRes->type_ == InteropTextureViewCube))) {
|
|
imageCreateView = true;
|
|
viewLayer = d3dRes->layer_;
|
|
viewLevel = d3dRes->mipLevel_;
|
|
|
|
// Find the view parameters
|
|
if (InteropTextureViewLevel == d3dRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
|
|
}
|
|
else if (InteropTextureViewCube == d3dRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
else {
|
|
LogError("Unknown Interop View Type");
|
|
}
|
|
}
|
|
|
|
switch (d3dRes->misc) {
|
|
case 0:
|
|
break;
|
|
case 1: // NV12 format
|
|
case 2: // YV12 format
|
|
// Create a view for the specified plane
|
|
viewDefined = true;
|
|
viewSize.width = cal()->width_;
|
|
viewSize.height = cal()->height_;
|
|
viewSize.depth = 1;
|
|
bytePitch = static_cast<size_t>(gslResource->getPitch());
|
|
viewOffset.x = 0;
|
|
viewSurfFmt = cal()->format_;
|
|
viewChannelOrder = cal()->channelOrder_;
|
|
switch (d3dRes->layer_) {
|
|
case -1:
|
|
break;
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
// Y - plane size to the offset
|
|
viewOffset.x = bytePitch * viewSize.height * 2;
|
|
if (d3dRes->misc == 2) {
|
|
// YV12 format U is 2 times smaller plane
|
|
bytePitch /= 2;
|
|
}
|
|
break;
|
|
case 2:
|
|
// Y + U plane sizes to the offest.
|
|
// U plane is 4 times smaller than Y => 5/2
|
|
viewOffset.x = bytePitch * viewSize.height * 5 / 2;
|
|
// V is 2 times smaller plane
|
|
bytePitch /= 2;
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
calRes = false;
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
calRes = false;
|
|
}
|
|
}
|
|
break;
|
|
#endif // _WIN32
|
|
default:
|
|
LogWarning("Resource::create() called with unknown memory type");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
// Create a view for interop, since the original buffer may have different format
|
|
// than the global buffer and GSL mem copy will fail
|
|
bool interopBufView = cal()->buffer_ &&
|
|
((memoryType() == D3D10Interop) || (memoryType() == OGLInterop) ||
|
|
(memoryType() == D3D11Interop));
|
|
|
|
bool ignoreParentHandle =
|
|
((memoryType() == ImageView) || (memoryType() == ImageBuffer));
|
|
|
|
// Create imageview if it was requested
|
|
if (calRes &&
|
|
(imageCreateView || interopBufView || hostMemOffset || viewDefined)) {
|
|
|
|
gslResOriginal = gslResource;
|
|
|
|
// Disable tiling if it's a buffer view
|
|
if (interopBufView || hostMemOffset) {
|
|
viewFlags = CAL_RESALLOCVIEW_GLOBAL_BUFFER;
|
|
}
|
|
|
|
viewResType = cal()->dimension_;
|
|
if (!viewDefined) {
|
|
viewSize.width = cal()->width_ + (pinOffset() / elementSize());
|
|
viewSize.height = cal()->height_;
|
|
viewSize.depth = cal()->depth_;
|
|
viewOffset.x = hostMemOffset / static_cast<CALuint>(elementSize());
|
|
viewOffset.y = 0;
|
|
viewOffset.width = 0;
|
|
viewOffset.height = 0;
|
|
viewSurfFmt = cal()->format_;
|
|
viewChannelOrder = cal()->channelOrder_;
|
|
}
|
|
|
|
if (useRowPitch && (params->owner_ != NULL) && params->owner_->asImage() &&
|
|
(params->owner_->asImage()->getRowPitch() != 0)) {
|
|
bytePitch = params->owner_->asImage()->getRowPitch();
|
|
}
|
|
|
|
// Allocate a view resource object
|
|
gslResource = dev().resAllocView(
|
|
gslResOriginal, viewSize, viewOffset, viewSurfFmt,
|
|
viewChannelOrder, viewResType, viewLevel, viewLayer, viewFlags, bytePitch);
|
|
|
|
if (gslResource == 0) {
|
|
// If we don't have to keep the parent handle,
|
|
// then destroy the original resource
|
|
if (!ignoreParentHandle) {
|
|
dev().resFree(gslResOriginal);
|
|
gslResOriginal = 0;
|
|
}
|
|
LogError("ResAlloc failed!");
|
|
return false;
|
|
}
|
|
|
|
if (ignoreParentHandle) {
|
|
gslResOriginal = 0;
|
|
}
|
|
}
|
|
|
|
if (!calRes) {
|
|
if (gslResource != 0) {
|
|
dev().resFree(gslResource);
|
|
}
|
|
if (memoryType() != Pinned) {
|
|
LogError("calResAlloc failed!");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Find memory location
|
|
switch (gslResource->getAttribs().location) {
|
|
case GSL_MOA_MEMORY_CARD:
|
|
case GSL_MOA_MEMORY_CARD_EXT:
|
|
case GSL_MOA_MEMORY_CARD_LOCKABLE:
|
|
case GSL_MOA_MEMORY_CARD_EXT_NONEXT:
|
|
case GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE:
|
|
cal_.cardMemory_ = true;
|
|
break;
|
|
default:
|
|
cal_.cardMemory_ = false;
|
|
break;
|
|
}
|
|
|
|
gslMemObjectAttribTiling tiling = gslResource->getAttribs().tiling;
|
|
cal_.tiled_ = (GSL_MOA_TILING_LINEAR != tiling) &&
|
|
(GSL_MOA_TILING_LINEAR_GENERAL != tiling);
|
|
|
|
// Get the heap block offset if it's a virtual heap
|
|
if (dev().heap()->isVirtual()) {
|
|
hbOffset_ = gslResource->getSurfaceAddress() -
|
|
dev().heap()->baseAddress();
|
|
}
|
|
hbSize_ = static_cast<uint64_t>(gslResource->getSurfaceSize());
|
|
|
|
if (!dev().settings().use64BitPtr_ && (memType != Scratch)) {
|
|
// Make sure runtime doesn't go over the address space limit for buffers
|
|
if ((memoryType() != Heap) &&
|
|
(cal()->dimension_ == GSL_MOA_BUFFER) &&
|
|
((hbOffset_ + hbSize_) > (uint64_t(4) * Gi))) {
|
|
if (cal_.cardMemory_) {
|
|
LogPrintfError(
|
|
"Out of 4GB address space. Base: 0x%016llX, size: 0x%016llX!",
|
|
hbOffset_, hbSize_);
|
|
|
|
dev().resFree(gslResource);
|
|
//! @note: A workaround for a Windows delay on memory destruction
|
|
//! Runtime submits a fake memory fill to force KMD to return
|
|
//! the freed memory ranges
|
|
if (IS_WINDOWS) {
|
|
uint32_t pattern = 0;
|
|
Memory* dummy = reinterpret_cast<Memory*>(
|
|
dev().dummyPage()->getDeviceMemory(dev()));
|
|
dev().xferMgr().fillBuffer(*dummy, &pattern, sizeof(uint32_t),
|
|
amd::Coord3D(0), amd::Coord3D(sizeof(uint32_t)));
|
|
}
|
|
if ((gslResOriginal != 0) && !ignoreParentHandle) {
|
|
dev().resFree(gslResOriginal);
|
|
gslResOriginal = 0;
|
|
}
|
|
return false;
|
|
}
|
|
else {
|
|
LogWarning("Out of 4GB address space for AHP/UHP!");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!foundCalRef) {
|
|
gslRef_ = new GslResourceReference(dev(), gslResource, gslResOriginal);
|
|
if (gslRef_ == NULL) {
|
|
LogError("Memory allocation failure!");
|
|
dev().resFree(gslResource);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
|
|
!cal()->buffer_) {
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if (0 == hwSrd_) {
|
|
return false;
|
|
}
|
|
dev().fillImageHwState(gslResource, hwState_, 8 * sizeof(uint32_t));
|
|
hwState_[8] = GetHSAILImageFormatType(cal()->format_);
|
|
hwState_[9] = GetHSAILImageOrderType(cal()->channelOrder_);
|
|
hwState_[10] = static_cast<uint32_t>(cal()->width_);
|
|
// Workaround for depth view, change tileIndex to 0 for depth view
|
|
if ((memoryType() == ImageView) &&
|
|
(viewChannelOrder == GSL_CHANNEL_ORDER_REPLICATE_R)) {
|
|
if ((hwState_[3] & 0x1f00000) == 0xe00000) {
|
|
hwState_[3] = hwState_[3] & 0xfe0fffff ;
|
|
}
|
|
}
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
}
|
|
|
|
if (desc.section == GSL_SECTION_SVM || desc.section == GSL_SECTION_SVM_ATOMICS)
|
|
{
|
|
params->owner_->setSvmPtr(reinterpret_cast<void*>(gslResource->getSurfaceAddress()));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::reallocate(CreateParams* params)
|
|
{
|
|
GslResourceReference* old;
|
|
GslResourceReference* active;
|
|
|
|
old = gslRef_;
|
|
if (!create(memoryType(), params)) {
|
|
gslRef_ = old;
|
|
return false;
|
|
}
|
|
// Get the new active resource
|
|
active = gslRef_;
|
|
gslRef_ = old;
|
|
|
|
dev().resCopy(old->gslResource(),
|
|
active->gslResource(), CAL_MEMCOPY_SYNC);
|
|
|
|
// Free all old resources
|
|
assert(renames_.size() == 0);
|
|
free();
|
|
|
|
gslRef_ = active;
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Resource::free()
|
|
{
|
|
if (NULL != byteView_) {
|
|
delete byteView_;
|
|
byteView_ = NULL;
|
|
}
|
|
if (NULL != shortView_) {
|
|
delete shortView_;
|
|
shortView_ = NULL;
|
|
}
|
|
|
|
if (gslRef_ == NULL) {
|
|
return;
|
|
}
|
|
|
|
// Sanity check for the map calls
|
|
if (mapCount_ != 0) {
|
|
LogWarning("Resource wasn't unlocked, but destroyed!");
|
|
}
|
|
const bool wait = (memoryType() != ImageView) &&
|
|
(memoryType() != ImageBuffer);
|
|
|
|
// Check if resource could be used in any queue(thread)
|
|
if (gpu_ == NULL) {
|
|
Device::ScopedLockVgpus lock(dev());
|
|
|
|
if (renames_.size() == 0) {
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release all virtual memory objects on all virtual GPUs
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
dev().vgpus()[idx]->releaseMemory(gslResource(), wait);
|
|
}
|
|
|
|
//! @note: This is a workaround for bad applications that
|
|
//! don't unmap memory
|
|
if (mapCount_ != 0) {
|
|
unmap(NULL);
|
|
}
|
|
|
|
// Add resource to the cache
|
|
if (!dev().resourceCache().addCalResource(&cal_, gslRef_)) {
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
renames_[curRename_]->cpuAddress_ = 0;
|
|
for (size_t i = 0; i < renames_.size(); ++i) {
|
|
gslRef_ = renames_[i];
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release all virtual memory objects on all virtual GPUs
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
dev().vgpus()[idx]->releaseMemory(gslResource());
|
|
}
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (renames_.size() == 0) {
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release virtual memory object on the specified virtual GPU
|
|
gpu_->releaseMemory(gslResource(), wait);
|
|
gslFree();
|
|
}
|
|
}
|
|
else for (size_t i = 0; i < renames_.size(); ++i) {
|
|
gslRef_ = renames_[i];
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release virtual memory object on the specified virtual GPUs
|
|
gpu_->releaseMemory(gslResource());
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Free SRD for images
|
|
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
|
|
!cal()->buffer_) {
|
|
dev().srds().freeSrdSlot(hwSrd_);
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::writeRawData(
|
|
VirtualGPU& gpu,
|
|
size_t size,
|
|
const void* data,
|
|
bool waitForEvent) const
|
|
{
|
|
GpuEvent event;
|
|
|
|
// Write data size bytes to surface
|
|
// size needs to be DWORD aligned
|
|
assert((size & 3) == 0);
|
|
gpu.writeSurfRaw(event, gslResource(), size, data);
|
|
|
|
setBusy(gpu, event);
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, false);
|
|
|
|
if (waitForEvent) {
|
|
// Wait for event to complete
|
|
gpu.waitForEvent(&event);
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::partialMemCopyTo(
|
|
VirtualGPU& gpu,
|
|
const amd::Coord3D& srcOrigin,
|
|
const amd::Coord3D& dstOrigin,
|
|
const amd::Coord3D& size,
|
|
Resource& dstResource,
|
|
bool enableCopyRect,
|
|
bool flushDMA) const
|
|
{
|
|
GpuEvent event;
|
|
bool result;
|
|
CALuint syncFlags = CAL_MEMCOPY_SYNC;
|
|
EngineType activeEngineID = gpu.engineID_;
|
|
static const bool waitOnBusyEngine = true;
|
|
// \note timing issues in Linux with sync mode
|
|
bool flush = true;
|
|
|
|
// Check if runtime can use async memory copy,
|
|
// even if a caller didn't request async
|
|
if (dev().settings().asyncMemCopy_ &&
|
|
// Keep ASYNC if profiling is disabled or sdma profiling is possible
|
|
(!gpu.profiling() || dev().settings().sdmaProfiling_) &&
|
|
(!cal()->cardMemory_ || !dstResource.cal()->cardMemory_)) {
|
|
// Switch to SDMA engine
|
|
gpu.engineID_ = SdmaEngine;
|
|
syncFlags = CAL_MEMCOPY_ASYNC;
|
|
flush = false;
|
|
}
|
|
|
|
// Wait for the resources, since runtime may use async transfers
|
|
wait(gpu, waitOnBusyEngine);
|
|
dstResource.wait(gpu, waitOnBusyEngine);
|
|
|
|
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
|
|
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
|
|
calSrcOrigin[1] = srcOrigin[1];
|
|
calSrcOrigin[2] = srcOrigin[2];
|
|
calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
|
|
calDstOrigin[1] = dstOrigin[1];
|
|
calDstOrigin[2] = dstOrigin[2];
|
|
calSize[0] = size[0];
|
|
calSize[1] = size[1];
|
|
calSize[2] = size[2];
|
|
|
|
result = gpu.copyPartial(event,
|
|
gslResource(), calSrcOrigin,
|
|
dstResource.gslResource(), calDstOrigin,
|
|
calSize, static_cast<CALmemcopyflags>(syncFlags), enableCopyRect);
|
|
|
|
if (result) {
|
|
// Mark source and destination as busy
|
|
setBusy(gpu, event);
|
|
dstResource.setBusy(gpu, event);
|
|
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, (flush | flushDMA));
|
|
}
|
|
|
|
// Restore the original engine
|
|
gpu.engineID_ = activeEngineID;
|
|
|
|
return result;
|
|
}
|
|
|
|
void
|
|
Resource::setBusy(
|
|
VirtualGPU& gpu,
|
|
GpuEvent gpuEvent
|
|
) const
|
|
{
|
|
gpu.assignGpuEvent(this, gpuEvent);
|
|
|
|
// If current resource is a view, then update the parent event as well
|
|
if (viewOwner_ != NULL) {
|
|
viewOwner_->setBusy(gpu, gpuEvent);
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const
|
|
{
|
|
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
|
|
|
|
// Check if we have to wait unconditionally
|
|
if (!waitOnBusyEngine ||
|
|
// or we have to wait only if another engine was used on this resource
|
|
(waitOnBusyEngine && (gpuEvent->engineId_ != gpu.engineID_))) {
|
|
gpu.waitForEvent(gpuEvent);
|
|
}
|
|
|
|
// If current resource is a view and not in the global heap,
|
|
// then wait for the parent event as well
|
|
if ((viewOwner_ != NULL) && (viewOwner_ != &dev().globalMem())) {
|
|
viewOwner_->wait(gpu, waitOnBusyEngine);
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::hostWrite(
|
|
VirtualGPU* gpu,
|
|
const void* hostPtr,
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& size,
|
|
uint flags,
|
|
size_t rowPitch,
|
|
size_t slicePitch)
|
|
{
|
|
void* dst;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
dst = map(gpu, flags, startLayer, numLayers);
|
|
if (NULL == dst) {
|
|
LogError("Couldn't map GPU memory for host write");
|
|
return false;
|
|
}
|
|
|
|
if (1 == cal()->dimSize_) {
|
|
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
dst = static_cast<void*>(static_cast<char*>(dst) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(dst, hostPtr, copySize);
|
|
}
|
|
else {
|
|
size_t srcOffs = 0;
|
|
size_t dstOffsBase = origin[0] * elementSize_;
|
|
size_t dstOffs;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust the destination offset with Y dimension
|
|
dstOffsBase += cal()->pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
dstOffsBase += cal()->slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory slice by slice
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
dstOffs = dstOffsBase + slice * cal()->slice_ * elementSize_;
|
|
srcOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(dst) + dstOffs),
|
|
(reinterpret_cast<const_address>(hostPtr) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
dstOffs += cal()->pitch_ * elementSize_;
|
|
srcOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::hostRead(
|
|
VirtualGPU* gpu,
|
|
void* hostPtr,
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& size,
|
|
size_t rowPitch,
|
|
size_t slicePitch)
|
|
{
|
|
void* src;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
src = map(gpu, ReadOnly, startLayer, numLayers);
|
|
if (NULL == src) {
|
|
LogError("Couldn't map GPU memory for host read");
|
|
return false;
|
|
}
|
|
|
|
if (1 == cal()->dimSize_) {
|
|
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
src = static_cast<void*>(static_cast<char*>(src) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(hostPtr, src, copySize);
|
|
}
|
|
else {
|
|
size_t srcOffsBase = origin[0] * elementSize_;
|
|
size_t srcOffs;
|
|
size_t dstOffs = 0;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust destination offset with Y dimension
|
|
srcOffsBase += cal()->pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
srcOffsBase += cal()->slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory line by line
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
srcOffs = srcOffsBase + slice * cal()->slice_ * elementSize_;
|
|
dstOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(hostPtr) + dstOffs),
|
|
(reinterpret_cast<const_address>(src) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
srcOffs += cal()->pitch_ * elementSize_;
|
|
dstOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::gslMap(void** ptr, size_t* pitch, gslMapAccessType flags, gslMemObject resource) const
|
|
{
|
|
bool result = true;
|
|
|
|
if (cal_.cardMemory_ || cal_.tiled_) {
|
|
// @todo remove const cast
|
|
result = const_cast<Device&>(dev()).resMapLocal(*ptr, *pitch, resource, flags);
|
|
}
|
|
else {
|
|
result = dev().resMapRemote(*ptr, *pitch, resource, flags);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
Resource::gslUnmap(gslMemObject resource) const
|
|
{
|
|
bool result = true;
|
|
|
|
if (cal_.cardMemory_) {
|
|
// @todo remove const cast
|
|
result = const_cast<Device&>(dev()).resUnmapLocal(resource);
|
|
}
|
|
else {
|
|
result = dev().resUnmapRemote(resource);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
Resource::gslGLAcquire()
|
|
{
|
|
bool retVal = true;
|
|
|
|
if (cal()->type_ == OGLInterop) {
|
|
//release is required only for depth resources
|
|
switch ((int)cal()->format_) {
|
|
case CM_SURF_FMT_DEPTH24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
retVal = dev().resGLAcquire(glPlatformContext_,glInteropMbRes_, glType_);
|
|
break;
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
bool
|
|
Resource::gslGLRelease()
|
|
{
|
|
bool retVal = true;
|
|
|
|
if (cal()->type_ == OGLInterop) {
|
|
//release is required only for depth resources
|
|
switch ((int)cal()->format_) {
|
|
case CM_SURF_FMT_DEPTH24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
retVal = dev().resGLRelease(glPlatformContext_,glInteropMbRes_);
|
|
break;
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
void
|
|
Resource::gslFree() const
|
|
{
|
|
if (cal()->type_ == OGLInterop) {
|
|
if (0 == gslRef_->resOriginal_) {
|
|
dev().resGLFree(glPlatformContext_, glDeviceContext_,
|
|
gslRef_->resource_, glInterop_, glInteropMbRes_, glType_);
|
|
gslRef_->resource_ = 0;
|
|
}
|
|
else {
|
|
dev().resFree(gslRef_->resource_);
|
|
gslRef_->resource_ = 0;
|
|
dev().resGLFree(glPlatformContext_, glDeviceContext_,
|
|
gslRef_->resOriginal_, glInterop_, glInteropMbRes_, glType_);
|
|
gslRef_->resOriginal_ = 0;
|
|
}
|
|
}
|
|
gslRef_->release();
|
|
}
|
|
|
|
bool
|
|
Resource::isMemoryType(MemoryType memType) const
|
|
{
|
|
if (memoryType() == memType) {
|
|
return true;
|
|
}
|
|
else if (memoryType() == View) {
|
|
return viewOwner_->isMemoryType(memType);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
Resource::isPersistentDirectMap() const
|
|
{
|
|
bool directMap = ((memoryType() == Resource::Persistent) &&
|
|
(cal()->dimSize_ < 3) && !cal()->imageArray_);
|
|
|
|
// If direct map is possible, then validate it with the current tiling
|
|
if (directMap && cal()->tiled_) {
|
|
//!@note IOL for Linux doesn't support tiling aperture
|
|
// and runtime doesn't force linear images in persistent
|
|
directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_;
|
|
}
|
|
|
|
return directMap;
|
|
}
|
|
|
|
void*
|
|
Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
|
|
{
|
|
if (isMemoryType(Pinned)) {
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != NULL) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
return address_;
|
|
}
|
|
|
|
gslMapAccessType mapFlags = GSL_MAP_READ_WRITE;
|
|
|
|
if (flags & ReadOnly) {
|
|
assert(!(flags & Discard) && "We can't use lock discard with read only!");
|
|
mapFlags = GSL_MAP_READ_ONLY;
|
|
}
|
|
|
|
if (flags & WriteOnly) {
|
|
mapFlags = GSL_MAP_WRITE_ONLY;
|
|
}
|
|
|
|
// Check if use map discard
|
|
if (flags & Discard) {
|
|
mapFlags = GSL_MAP_WRITE_ONLY;
|
|
if (gpu != NULL) {
|
|
// If we use a new renamed allocation, then skip the wait
|
|
if (rename(*gpu)) {
|
|
flags |= NoWait;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != NULL) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
|
|
// Check if memory wasn't mapped yet
|
|
if (++mapCount_ == 1) {
|
|
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
|
|
// Save map info for multilayer map/unmap
|
|
startLayer_ = startLayer;
|
|
numLayers_ = numLayers;
|
|
mapFlags_ = mapFlags;
|
|
// Map with layers
|
|
address_ = mapLayers(gpu, mapFlags);
|
|
}
|
|
else {
|
|
// Map current resource
|
|
if (!gslMap(&address_, &cal_.pitch_, mapFlags, gslResource())) {
|
|
LogError("cal::ResMap failed!");
|
|
--mapCount_;
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
//! \note the atomic operation with counter doesn't
|
|
// guarantee that the address will be valid,
|
|
// since GSL could still process the first map
|
|
if (address_ == NULL) {
|
|
amd::Os::sleep(10);
|
|
assert((address_ != NULL) && "Multiple maps failed!");
|
|
}
|
|
|
|
return address_;
|
|
}
|
|
|
|
void*
|
|
Resource::mapLayers(VirtualGPU* gpu, CALuint flags)
|
|
{
|
|
size_t srcOffs = 0;
|
|
size_t dstOffs = 0;
|
|
gslMemObject sliceResource = 0;
|
|
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
|
|
size_t layers = cal()->depth_;
|
|
size_t height = cal()->height_;
|
|
|
|
// Use 1D layers
|
|
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
|
|
gslDim = GSL_MOA_TEXTURE_1D;
|
|
height = 1;
|
|
layers = cal()->height_;
|
|
}
|
|
|
|
cal_.pitch_ = cal()->width_;
|
|
cal_.slice_ = cal()->pitch_ * height;
|
|
address_ = new char [cal()->slice_ * layers * elementSize()];
|
|
if (NULL == address_) {
|
|
return NULL;
|
|
}
|
|
|
|
// Check if map is write only
|
|
if (flags == GSL_MAP_WRITE_ONLY) {
|
|
return address_;
|
|
}
|
|
|
|
if (numLayers_ != 0) {
|
|
layers = startLayer_ + numLayers_;
|
|
}
|
|
|
|
dstOffs = startLayer_ * cal()->slice_ * elementSize();
|
|
|
|
// Loop through all layers
|
|
for (uint i = startLayer_; i < layers; ++i) {
|
|
gslResource3D gslSize;
|
|
CALdomain calOffset;
|
|
void* sliceAddr;
|
|
size_t pitch;
|
|
|
|
// Allocate a layer from the image
|
|
gslSize.width = cal()->width_;
|
|
gslSize.height = height;
|
|
gslSize.depth = 1;
|
|
calOffset.x = 0;
|
|
calOffset.y = 0;
|
|
calOffset.width = 0;
|
|
calOffset.height = 0;
|
|
|
|
sliceResource = dev().resAllocView(
|
|
gslResource(), gslSize,
|
|
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
|
|
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
|
|
if (0 == sliceResource) {
|
|
LogError("Map layer. resAllocSliceView failed!");
|
|
return NULL;
|
|
}
|
|
|
|
// Map 2D layer
|
|
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_READ_ONLY, sliceResource)) {
|
|
LogError("Map layer. CalResMap failed!");
|
|
return NULL;
|
|
}
|
|
|
|
srcOffs = 0;
|
|
// Copy memory line by line
|
|
for (size_t rows = 0; rows < height; ++rows) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(address_) + dstOffs),
|
|
(reinterpret_cast<const_address>(sliceAddr) + srcOffs),
|
|
cal()->width_ * elementSize_);
|
|
|
|
dstOffs += cal()->pitch_ * elementSize();
|
|
srcOffs += pitch * elementSize();
|
|
}
|
|
|
|
// Unmap a layer
|
|
if (!gslUnmap(sliceResource)) {
|
|
LogError("Map layer. CalResUnmap failed!");
|
|
}
|
|
dev().resFree(sliceResource);
|
|
}
|
|
|
|
return address_;
|
|
}
|
|
|
|
void
|
|
Resource::unmap(VirtualGPU* gpu)
|
|
{
|
|
if (isMemoryType(Pinned)) {
|
|
return;
|
|
}
|
|
|
|
// Decrement map counter
|
|
int count = --mapCount_;
|
|
|
|
// Check if it's the last unmap
|
|
if (count == 0) {
|
|
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
|
|
// Unmap layers
|
|
unmapLayers(gpu);
|
|
}
|
|
else {
|
|
// Unmap current resource
|
|
if (!gslUnmap(gslResource())) {
|
|
LogError("CalResUnmap failed!");
|
|
}
|
|
}
|
|
address_ = NULL;
|
|
}
|
|
else if (count < 0) {
|
|
LogError("dev().serialCalResUnmap failed!");
|
|
++mapCount_;
|
|
return;
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::unmapLayers(VirtualGPU* gpu)
|
|
{
|
|
size_t srcOffs = 0;
|
|
size_t dstOffs = 0;
|
|
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
|
|
gslMemObject sliceResource = NULL;
|
|
CALuint layers = cal()->depth_;
|
|
CALuint height = cal()->height_;
|
|
|
|
// Use 1D layers
|
|
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
|
|
gslDim = GSL_MOA_TEXTURE_1D;
|
|
height = 1;
|
|
layers = cal()->height_;
|
|
}
|
|
|
|
if (numLayers_ != 0) {
|
|
layers = startLayer_ + numLayers_;
|
|
}
|
|
|
|
srcOffs = startLayer_ * cal()->slice_ * elementSize();
|
|
|
|
// Check if map is write only
|
|
if (!(mapFlags_ == GSL_MAP_READ_ONLY)) {
|
|
// Loop through all layers
|
|
for (uint i = startLayer_; i < layers; ++i) {
|
|
gslResource3D gslSize;
|
|
CALdomain calOffset;
|
|
void* sliceAddr;
|
|
size_t pitch;
|
|
|
|
// Allocate a layer from the image
|
|
gslSize.width = cal()->width_;
|
|
gslSize.height = height;
|
|
gslSize.depth = 1;
|
|
calOffset.x = 0;
|
|
calOffset.y = 0;
|
|
calOffset.width = 0;
|
|
calOffset.height = 0;
|
|
|
|
sliceResource = dev().resAllocView(
|
|
gslResource(), gslSize,
|
|
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
|
|
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
|
|
if (0 == sliceResource) {
|
|
LogError("Unmap layer. resAllocSliceView failed!");
|
|
return;
|
|
}
|
|
|
|
// Map a layer
|
|
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_WRITE_ONLY, sliceResource)) {
|
|
LogError("Unmap layer. CalResMap failed!");
|
|
return;
|
|
}
|
|
|
|
dstOffs = 0;
|
|
// Copy memory line by line
|
|
for (size_t rows = 0; rows < height; ++rows) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(sliceAddr) + dstOffs),
|
|
(reinterpret_cast<const_address>(address_) + srcOffs),
|
|
cal()->width_ * elementSize_);
|
|
|
|
dstOffs += pitch * elementSize();
|
|
srcOffs += cal()->pitch_ * elementSize();
|
|
}
|
|
|
|
// Unmap a layer
|
|
if (!gslUnmap(sliceResource)) {
|
|
LogError("Unmap layer. CalResUnmap failed!");
|
|
}
|
|
dev().resFree(sliceResource);
|
|
}
|
|
}
|
|
|
|
// Destroy the mapped memory
|
|
delete [] reinterpret_cast<char*>(address_);
|
|
}
|
|
|
|
void
|
|
Resource::setActiveRename(VirtualGPU& gpu, GslResourceReference* rename)
|
|
{
|
|
// Copy the unique GSL data
|
|
gslRef_ = rename;
|
|
address_ = rename->cpuAddress_;
|
|
|
|
if (dev().heap()->isVirtual()) {
|
|
hbOffset_ = rename->gslResource()->getSurfaceAddress() -
|
|
dev().heap()->baseAddress();
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::getActiveRename(VirtualGPU& gpu, GslResourceReference** rename)
|
|
{
|
|
// Copy the old data to the rename descriptor
|
|
*rename = gslRef_;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::rename(VirtualGPU& gpu, bool force)
|
|
{
|
|
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
|
|
if (!gpuEvent->isValid() && !force) {
|
|
return true;
|
|
}
|
|
|
|
bool useNext = false;
|
|
CALuint resSize = cal()->width_ * ((cal()->height_) ? cal()->height_ : 1) *
|
|
elementSize_;
|
|
|
|
// Rename will work with real GSL resources
|
|
if (((memoryType() != Local) &&
|
|
(memoryType() != Persistent) &&
|
|
(memoryType() != Remote) &&
|
|
(memoryType() != RemoteUSWC)) ||
|
|
(dev().settings().maxRenames_ == 0)) {
|
|
return false;
|
|
}
|
|
|
|
// If the resource for renaming is too big, then lets check the current status first
|
|
// at the cost of an extra flush
|
|
if (resSize >= (dev().settings().maxRenameSize_ / dev().settings().maxRenames_)) {
|
|
if (gpu.isDone(gpuEvent)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Save the first
|
|
if (renames_.size() == 0) {
|
|
GslResourceReference* rename;
|
|
if (mapCount_ > 0) {
|
|
gslRef_->cpuAddress_ = address_;
|
|
}
|
|
if (!getActiveRename(gpu, &rename)) {
|
|
return false;
|
|
}
|
|
|
|
curRename_ = renames_.size();
|
|
renames_.push_back(rename);
|
|
}
|
|
|
|
// Can we use a new rename?
|
|
if ((renames_.size() <= dev().settings().maxRenames_) &&
|
|
((renames_.size() * resSize) <= dev().settings().maxRenameSize_)) {
|
|
GslResourceReference* rename;
|
|
|
|
// Create a new GSL allocation
|
|
if (create(memoryType())) {
|
|
if (mapCount_ > 0) {
|
|
assert(!cal()->cardMemory_ && "Unsupported memory type!");
|
|
if (!dev().resMapRemote(gslRef_->cpuAddress_, cal_.pitch_,
|
|
gslResource(), GSL_MAP_READ_WRITE)) {
|
|
LogError("gslMap fails on rename!");
|
|
}
|
|
address_ = gslRef_->cpuAddress_;
|
|
}
|
|
if (getActiveRename(gpu, &rename)) {
|
|
curRename_ = renames_.size();
|
|
renames_.push_back(rename);
|
|
}
|
|
else {
|
|
gslRef_->release();
|
|
useNext = true;
|
|
}
|
|
}
|
|
else {
|
|
useNext = true;
|
|
}
|
|
}
|
|
else {
|
|
useNext = true;
|
|
}
|
|
|
|
if (useNext) {
|
|
// Get the last submitted
|
|
curRename_++;
|
|
if (curRename_ >= renames_.size()) {
|
|
curRename_ = 0;
|
|
}
|
|
setActiveRename(gpu, renames_[curRename_]);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Resource::warmUpRenames(VirtualGPU& gpu)
|
|
{
|
|
for (uint i = 0; i < dev().settings().maxRenames_; ++i) {
|
|
const bool force = true;
|
|
rename(gpu, force);
|
|
}
|
|
}
|
|
|
|
Resource*
|
|
Resource::getAliasUAVBuffer(cmSurfFmt newFormat)
|
|
{
|
|
Resource* view = NULL;
|
|
uint byteSize;
|
|
|
|
// Lock device so a view allocation is unique operation
|
|
amd::ScopedLock k(dev().gslDeviceOps());
|
|
|
|
if (newFormat == CM_SURF_FMT_R8I) {
|
|
view = byteView_;
|
|
byteSize = 1;
|
|
}
|
|
else if (newFormat == CM_SURF_FMT_R16I) {
|
|
view = shortView_;
|
|
byteSize = 2;
|
|
}
|
|
else { // only take byte and short
|
|
assert(false && "Unsupported format for a view");
|
|
return NULL;
|
|
}
|
|
|
|
// allocate byte/short view
|
|
if (NULL == view) {
|
|
view = new Resource(dev(), (cal()->width_ * elementSize()) / byteSize, newFormat);
|
|
if (view == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
Resource::ViewParams params;
|
|
params.offset_ = 0;
|
|
params.size_ = cal()->width_ * elementSize();
|
|
params.resource_ = this;
|
|
|
|
if (!view->create(Resource::View, ¶ms)) {
|
|
delete view;
|
|
return NULL;
|
|
}
|
|
|
|
// save view resource
|
|
if (newFormat == CM_SURF_FMT_R8I) {
|
|
byteView_ = view;
|
|
}
|
|
else if (newFormat == CM_SURF_FMT_R16I) {
|
|
shortView_ = view;
|
|
}
|
|
}
|
|
|
|
return view;
|
|
}
|
|
|
|
ResourceCache::~ResourceCache()
|
|
{
|
|
free();
|
|
}
|
|
|
|
//! \note the cache works in FILO mode
|
|
bool
|
|
ResourceCache::addCalResource(
|
|
Resource::CalResourceDesc* desc, GslResourceReference* ref)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool result = false;
|
|
size_t size = getResourceSize(desc);
|
|
|
|
// Make sure current allocation isn't bigger than cache
|
|
if (((desc->type_ == Resource::Local) ||
|
|
(desc->type_ == Resource::Persistent) ||
|
|
(desc->type_ == Resource::Remote) ||
|
|
(desc->type_ == Resource::RemoteUSWC)) &&
|
|
(size < cacheSizeLimit_) &&
|
|
!desc->SVMRes_) {
|
|
// Validate the cache size limit. Loop until we have enough space
|
|
while ((cacheSize_ + size) > cacheSizeLimit_) {
|
|
removeLast();
|
|
}
|
|
Resource::CalResourceDesc* descCached = new Resource::CalResourceDesc;
|
|
if (descCached != NULL) {
|
|
// Copy the original desc to the cached version
|
|
memcpy(descCached, desc, sizeof(Resource::CalResourceDesc));
|
|
|
|
// Add the current resource to the cache
|
|
resCache_.push_front(std::make_pair(descCached, ref));
|
|
cacheSize_ += size;
|
|
result = true;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
GslResourceReference*
|
|
ResourceCache::findCalResource(Resource::CalResourceDesc* desc)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool found = false;
|
|
GslResourceReference* ref = NULL;
|
|
size_t size = getResourceSize(desc);
|
|
|
|
// Early exit if resource is too big
|
|
if (size >= cacheSizeLimit_ || desc->SVMRes_) {
|
|
//! \note we may need to free the cache here to reduce memory pressure
|
|
return ref;
|
|
}
|
|
|
|
// Serach the right resource through the cache list
|
|
std::list<std::pair<Resource::CalResourceDesc*,
|
|
GslResourceReference*> >::const_iterator it;
|
|
for (it = resCache_.begin(); it != resCache_.end(); ++it) {
|
|
Resource::CalResourceDesc* entry = it->first;
|
|
// Find if we can reuse this entry
|
|
if ((entry->dimension_ == desc->dimension_) &&
|
|
(entry->type_ == desc->type_) &&
|
|
(entry->width_ == desc->width_) &&
|
|
(entry->height_ == desc->height_) &&
|
|
(entry->depth_ == desc->depth_) &&
|
|
(entry->channelOrder_ == desc->channelOrder_) &&
|
|
(entry->format_ == desc->format_) &&
|
|
(entry->flags_ == desc->flags_)) {
|
|
ref = it->second;
|
|
delete it->first;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// Remove the found etry from the cache
|
|
resCache_.remove(*it);
|
|
cacheSize_ -= size;
|
|
}
|
|
|
|
return ref;
|
|
}
|
|
|
|
bool
|
|
ResourceCache::free(size_t minCacheEntries)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool result = false;
|
|
|
|
if (minCacheEntries < resCache_.size()) {
|
|
if (static_cast<int>(cacheSize_) > 0) {
|
|
result = true;
|
|
}
|
|
// Clear the cache
|
|
while (static_cast<int>(cacheSize_) > 0) {
|
|
removeLast();
|
|
}
|
|
CondLog((cacheSize_ != 0), "Incorrect size for cache release!");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
size_t
|
|
ResourceCache::getResourceSize(Resource::CalResourceDesc* desc)
|
|
{
|
|
// Find the total amount of elements
|
|
size_t size =
|
|
desc->width_ *
|
|
((desc->height_) ? desc->height_ : 1) *
|
|
((desc->depth_) ? desc->depth_: 1);
|
|
|
|
// Find total size in bytes
|
|
size *= static_cast<size_t>(memoryFormatSize(desc->format_).size_);
|
|
|
|
return size;
|
|
}
|
|
|
|
void
|
|
ResourceCache::removeLast()
|
|
{
|
|
std::pair<Resource::CalResourceDesc*, GslResourceReference*> entry;
|
|
entry = resCache_.back();
|
|
resCache_.pop_back();
|
|
|
|
size_t size = getResourceSize(entry.first);
|
|
|
|
// Delete CalResourceDesc
|
|
delete entry.first;
|
|
|
|
// Destroy GSL resource
|
|
entry.second->release();
|
|
cacheSize_ -= size;
|
|
}
|
|
|
|
} // namespace gpu
|