5efe63df44
ECR #304775 - Optimization for rectangular copies(Part2). Due to HW restriction of 14bits for src and dst pitch, its advantageous to choose optimal bpp. Higher the bpp the larger the byte pitch. This indirectly helps to reduce the number of packets for buffer copy(line by line vs a single sub_win raw packet) ReviewBoardURL = http://ocltc.amd.com/reviews/r/5605/diff/ Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#109 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#191 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#76 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.h#38 edit
2110 lines
62 KiB
C++
2110 lines
62 KiB
C++
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "os/os.hpp"
|
|
#include "device/device.hpp"
|
|
#include "utils/flags.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/gpu/gpuresource.hpp"
|
|
#include "device/gpu/gpudevice.hpp"
|
|
#include "device/gpu/gpublit.hpp"
|
|
#include "device/gpu/gputimestamp.hpp"
|
|
#include "thread/atomic.hpp"
|
|
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <cmath>
|
|
|
|
namespace gpu {
|
|
|
|
GslResourceReference::GslResourceReference(
|
|
const Device& gpuDev,
|
|
gslMemObject gslResource,
|
|
gslMemObject gslResOriginal
|
|
)
|
|
: device_(gpuDev)
|
|
, resource_(gslResource)
|
|
, resOriginal_(gslResOriginal)
|
|
, cpuAddress_(NULL)
|
|
{
|
|
}
|
|
|
|
GslResourceReference::~GslResourceReference()
|
|
{
|
|
if (cpuAddress_ != NULL) {
|
|
device_.resUnmapRemote(gslResource());
|
|
}
|
|
if (0 != gslResource()) {
|
|
device_.resFree(gslResource());
|
|
resource_ = NULL;
|
|
}
|
|
|
|
if (0 != gslOriginal()) {
|
|
device_.resFree(gslOriginal());
|
|
resOriginal_ = NULL;
|
|
}
|
|
}
|
|
|
|
Resource::Resource(
|
|
const Device& gpuDev,
|
|
size_t width,
|
|
cmSurfFmt format)
|
|
: elementSize_(0)
|
|
, gpuDevice_(gpuDev)
|
|
, mapCount_(0)
|
|
, address_(NULL)
|
|
, offset_(0)
|
|
, curRename_(0)
|
|
, gslRef_(NULL)
|
|
, viewOwner_(NULL)
|
|
, hbOffset_(0)
|
|
, hbSize_(0)
|
|
, pinOffset_(0)
|
|
, byteView_(NULL)
|
|
, shortView_(NULL)
|
|
, glInterop_(0)
|
|
, gpu_(NULL)
|
|
{
|
|
// Fill GSL descriptor fields
|
|
cal_.type_ = Empty;
|
|
cal_.width_ = width;
|
|
cal_.height_ = 1;
|
|
cal_.depth_ = 1;
|
|
cal_.format_ = format;
|
|
cal_.flags_ = 0;
|
|
cal_.pitch_ = 0;
|
|
cal_.slice_ = 0;
|
|
cal_.channelOrder_ = GSL_CHANNEL_ORDER_REPLICATE_R;
|
|
cal_.dimension_ = GSL_MOA_BUFFER;
|
|
cal_.cardMemory_ = true;
|
|
cal_.dimSize_ = 1;
|
|
cal_.buffer_ = true;
|
|
cal_.imageArray_ = false;
|
|
cal_.imageType_ = 0;
|
|
cal_.SVMRes_ = false;
|
|
cal_.scratch_ = false;
|
|
}
|
|
|
|
Resource::Resource(
|
|
const Device& gpuDev,
|
|
size_t width,
|
|
size_t height,
|
|
size_t depth,
|
|
cmSurfFmt format,
|
|
gslChannelOrder chOrder,
|
|
cl_mem_object_type imageType)
|
|
: elementSize_(0)
|
|
, gpuDevice_(gpuDev)
|
|
, mapCount_(0)
|
|
, address_(NULL)
|
|
, offset_(0)
|
|
, curRename_(0)
|
|
, gslRef_(NULL)
|
|
, viewOwner_(NULL)
|
|
, hbOffset_(0)
|
|
, hbSize_(0)
|
|
, pinOffset_(0)
|
|
, byteView_(NULL)
|
|
, shortView_(NULL)
|
|
, glInterop_(0)
|
|
, gpu_(NULL)
|
|
{
|
|
// Fill GSL descriptor fields
|
|
cal_.type_ = Empty;
|
|
cal_.width_ = width;
|
|
cal_.height_ = height;
|
|
cal_.depth_ = depth;
|
|
cal_.format_ = format;
|
|
cal_.flags_ = 0;
|
|
cal_.pitch_ = 0;
|
|
cal_.slice_ = 0;
|
|
cal_.channelOrder_ = chOrder;
|
|
cal_.cardMemory_ = true;
|
|
cal_.buffer_ = false;
|
|
cal_.imageArray_ = false;
|
|
cal_.imageType_ = imageType;
|
|
cal_.SVMRes_ = false;
|
|
cal_.scratch_ = false;
|
|
|
|
switch (imageType) {
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_2D;
|
|
cal_.dimSize_ = 2;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_3D;
|
|
cal_.dimSize_ = 3;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_2D_ARRAY;
|
|
cal_.dimSize_ = 3;
|
|
cal_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_1D;
|
|
cal_.dimSize_ = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_1D_ARRAY;
|
|
cal_.dimSize_ = 2;
|
|
cal_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
|
cal_.dimension_ = GSL_MOA_TEXTURE_BUFFER;
|
|
cal_.dimSize_ = 1;
|
|
break;
|
|
default:
|
|
cal_.dimSize_ = 1;
|
|
LogError("Unknown image type!");
|
|
break;
|
|
}
|
|
}
|
|
|
|
Resource::~Resource()
|
|
{
|
|
free();
|
|
}
|
|
|
|
static uint32_t GetHSAILImageFormatType(cmSurfFmt format)
|
|
{
|
|
uint32_t formatType = 0;
|
|
|
|
switch (format)
|
|
{
|
|
case CM_SURF_FMT_INTENSITY8:
|
|
case CM_SURF_FMT_RG8:
|
|
case CM_SURF_FMT_RGBA8:
|
|
case CM_SURF_FMT_RGBX8UI:
|
|
case CM_SURF_FMT_RGBA8_SRGB:
|
|
formatType = 2;
|
|
break;
|
|
case CM_SURF_FMT_R16:
|
|
case CM_SURF_FMT_RG16:
|
|
case CM_SURF_FMT_RGBA16:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
formatType = 3;
|
|
break;
|
|
/*
|
|
case HSA_IMAGE_FMT_R5G6B5_UNORM:
|
|
formatType = 4;
|
|
break;
|
|
case HSA_IMAGE_FMT_R5G5B5_UNORM:
|
|
formatType = 5;
|
|
break;
|
|
case HSA_IMAGE_FMT_R10G10B10_UNORM:
|
|
formatType = 6;
|
|
break;
|
|
*/
|
|
case CM_SURF_FMT_BGR10_X2:
|
|
formatType = 7;
|
|
break;
|
|
case CM_SURF_FMT_sR8:
|
|
case CM_SURF_FMT_sRG8:
|
|
case CM_SURF_FMT_sRGBA8:
|
|
formatType = 0;
|
|
break;
|
|
case CM_SURF_FMT_sU16:
|
|
case CM_SURF_FMT_sUV16:
|
|
case CM_SURF_FMT_sUVWQ16:
|
|
formatType = 1;
|
|
break;
|
|
case CM_SURF_FMT_R8I:
|
|
case CM_SURF_FMT_RG8I:
|
|
case CM_SURF_FMT_RGBA8UI:
|
|
formatType = 11;
|
|
break;
|
|
case CM_SURF_FMT_R16I:
|
|
case CM_SURF_FMT_RG16I:
|
|
case CM_SURF_FMT_RGBA16UI:
|
|
formatType = 12;
|
|
break;
|
|
case CM_SURF_FMT_R32I:
|
|
case CM_SURF_FMT_RG32I:
|
|
case CM_SURF_FMT_RGBA32UI:
|
|
formatType = 13;
|
|
break;
|
|
case CM_SURF_FMT_sR8I:
|
|
case CM_SURF_FMT_sRG8I:
|
|
case CM_SURF_FMT_sRGBA8I:
|
|
formatType = 8;
|
|
break;
|
|
case CM_SURF_FMT_sR16I:
|
|
case CM_SURF_FMT_sRG16I:
|
|
case CM_SURF_FMT_sRGBA16I:
|
|
formatType = 9;
|
|
break;
|
|
case CM_SURF_FMT_sR32I:
|
|
case CM_SURF_FMT_sRG32I:
|
|
case CM_SURF_FMT_sRGBA32I:
|
|
formatType = 10;
|
|
break;
|
|
case CM_SURF_FMT_R32F:
|
|
case CM_SURF_FMT_RG32F:
|
|
case CM_SURF_FMT_RGBA32F:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
formatType = 15;
|
|
break;
|
|
case CM_SURF_FMT_R16F:
|
|
case CM_SURF_FMT_RG16F:
|
|
case CM_SURF_FMT_RGBA16F:
|
|
formatType = 14;
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
|
|
return formatType;
|
|
}
|
|
|
|
static uint32_t GetHSAILImageOrderType(gslChannelOrder chOrder)
|
|
{
|
|
uint32_t orderType = 0;
|
|
|
|
switch (chOrder)
|
|
{
|
|
case GSL_CHANNEL_ORDER_R:
|
|
orderType = 1;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_A:
|
|
orderType = 0;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_LUMINANCE:
|
|
orderType = 17;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_INTENSITY:
|
|
orderType = 16;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RG:
|
|
orderType = 3;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RA:
|
|
orderType = 5;
|
|
break;
|
|
/*
|
|
case HSA_IMAGE_FMT_R5G6B5_UNORM:
|
|
case HSA_IMAGE_FMT_R5G5B5_UNORM:
|
|
case HSA_IMAGE_FMT_R10G10B10_UNORM:
|
|
orderType = 6;
|
|
break;*/
|
|
case GSL_CHANNEL_ORDER_RGB:
|
|
orderType = 6;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_RGBA:
|
|
orderType = 8;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_ARGB:
|
|
orderType = 10;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_BGRA:
|
|
orderType = 9;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGB:
|
|
orderType = 12;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGBX:
|
|
orderType = 13;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SRGBA:
|
|
orderType = 14;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_SBGRA:
|
|
orderType = 15;
|
|
break;
|
|
case GSL_CHANNEL_ORDER_REPLICATE_R:
|
|
orderType = 18;
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
|
|
return orderType;
|
|
}
|
|
|
|
bool
|
|
Resource::create(MemoryType memType, CreateParams* params, bool heap)
|
|
{
|
|
bool calRes = false;
|
|
gslMemObject gslResource = 0;
|
|
gslMemObject gslResOriginal = 0;
|
|
const amd::HostMemoryReference* hostMemRef = NULL;
|
|
bool imageCreateView = false;
|
|
CALuint hostMemOffset = 0;
|
|
bool foundCalRef = false;
|
|
bool viewDefined = false;
|
|
uint viewLayer = 0;
|
|
uint viewLevel = 0;
|
|
uint viewFlags = 0;
|
|
gslResource3D viewSize = {0};
|
|
CALdomain viewOffset = {0};
|
|
cmSurfFmt viewSurfFmt;
|
|
gslChannelOrder viewChannelOrder = GSL_CHANNEL_ORDER_UNSPECIFIED;
|
|
gslMemObjectAttribType viewResType;
|
|
CALresourceDesc desc;
|
|
uint64 bytePitch = (uint64)-1;
|
|
bool useRowPitch = false;
|
|
|
|
desc.vaBase = 0;
|
|
desc.minAlignment = 0;
|
|
desc.section = GSL_SECTION_REGULAR;
|
|
if (NULL != params && NULL != params->owner_) { //make sure params not NULL
|
|
mcaddr svmPtr = reinterpret_cast<mcaddr>(params->owner_->getSvmPtr());
|
|
desc.vaBase = (svmPtr == 1)? 0:svmPtr;
|
|
cal_.SVMRes_ = (svmPtr != 0);
|
|
desc.section = (svmPtr != 0) ? GSL_SECTION_SVM : GSL_SECTION_REGULAR;
|
|
|
|
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
|
|
desc.section = GSL_SECTION_SVM_ATOMICS;
|
|
}
|
|
}
|
|
// This is a thread safe operation
|
|
const_cast<Device&>(dev()).initializeHeapResources();
|
|
|
|
// Get the element size
|
|
elementSize_ = static_cast<CALuint>(memoryFormatSize(cal()->format_).size_);
|
|
cal_.type_ = memType;
|
|
if (memType == Scratch) {
|
|
cal_.type_ = Local;
|
|
cal_.scratch_ = true;
|
|
}
|
|
|
|
// Force remote allocation if it was requested in the settings
|
|
if (dev().settings().remoteAlloc_ && !heap &&
|
|
((memoryType() == Local) ||
|
|
(memoryType() == Persistent))) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
|
|
if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
|
|
if (cal()->buffer_) {
|
|
// Force linear tiling for buffer alloctions
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
}
|
|
|
|
if (params != NULL) {
|
|
gpu_ = params->gpu_;
|
|
}
|
|
|
|
switch (memoryType()) {
|
|
case Heap:
|
|
gslResource = dev().resGetHeap(0);
|
|
if (gslResource == 0) {
|
|
return false;
|
|
}
|
|
calRes = true;
|
|
cal_.width_ = static_cast<size_t>(gslResource->getPitch());
|
|
cal_.pitch_ = static_cast<size_t>(gslResource->getPitch());
|
|
break;
|
|
case Persistent:
|
|
if (dev().settings().linearPersistentImage_) {
|
|
// Force linear tiling for image allocations in persistent
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
}
|
|
// Fall through ...
|
|
case RemoteUSWC:
|
|
case Remote:
|
|
case BusAddressable:
|
|
case ExternalPhysical:
|
|
// Fall through to process the memory allocation ...
|
|
case Local: {
|
|
if (cal()->buffer_) {
|
|
//! @todo Remove alignment.
|
|
//! GSL asserts in mem copy with an unaligned size
|
|
cal_.width_ = amd::alignUp(cal_.width_, 64);
|
|
}
|
|
|
|
desc.dimension = cal()->dimension_;
|
|
desc.size.width = cal()->width_;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.flags = cal()->flags_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = NULL;
|
|
|
|
do {
|
|
// Find a type for allocation
|
|
if (memoryType() == Persistent) {
|
|
desc.type = GSL_MOA_MEMORY_CARD_LOCKABLE;
|
|
}
|
|
else if (memoryType() == Remote) {
|
|
desc.type = GSL_MOA_MEMORY_REMOTE_CACHEABLE;
|
|
}
|
|
else if (memoryType() == RemoteUSWC) {
|
|
desc.type = GSL_MOA_MEMORY_AGP;
|
|
}
|
|
else if (memoryType() == BusAddressable){
|
|
desc.type = GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE;
|
|
}
|
|
else if (memoryType() == ExternalPhysical){
|
|
desc.type = GSL_MOA_MEMORY_CARD_EXTERNAL_PHYSICAL;
|
|
cl_bus_address_amd bus_address =
|
|
(reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
|
|
desc.busAddress[0] = bus_address.surface_bus_address;
|
|
desc.busAddress[1] = bus_address.marker_bus_address;
|
|
}
|
|
else {
|
|
desc.type = GSL_MOA_MEMORY_CARD_EXT_NONEXT;
|
|
}
|
|
|
|
// Check resource cache first for an appropriate resource
|
|
gslRef_ = dev().resourceCache().findCalResource(&cal_);
|
|
if (memType == Scratch) {
|
|
if ((dev().settings().hsail_) || (dev().settings().oclVersion_ >= OpenCL20)) {
|
|
desc.minAlignment = 64 * Ki;
|
|
}
|
|
else {
|
|
desc.vaBase = static_cast<mcaddr>(0x100000000ULL);
|
|
}
|
|
}
|
|
else if ((gslRef_ != NULL) && (!dev().settings().use64BitPtr_)) {
|
|
// Make sure runtime didn't pick a resource with > 4GB address
|
|
if ((cal()->dimension_ == GSL_MOA_BUFFER) &&
|
|
(static_cast<uint64_t>(gslRef_->gslResource()->getSurfaceAddress() +
|
|
gslRef_->gslResource()->getSurfaceSize()) > (uint64_t(4) * Gi))) {
|
|
gslRef_->release();
|
|
gslRef_ = NULL;
|
|
}
|
|
}
|
|
// Try to allocate memory if we couldn't find a cached resource
|
|
if (gslRef_ == NULL) {
|
|
// Allocate memory
|
|
gslResource = dev().resAlloc(&desc);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
}
|
|
else {
|
|
calRes = true;
|
|
gslResource = gslRef_->gslOriginal();
|
|
foundCalRef = true;
|
|
}
|
|
|
|
// If GSL fails allocation then try other heaps
|
|
if (!calRes) {
|
|
// Free cache if we failed allocation
|
|
if (dev().resourceCache().free()) {
|
|
// We freed something - attempt to allocate memory again
|
|
continue;
|
|
}
|
|
|
|
// Local to Persistent
|
|
if (memoryType() == Local) {
|
|
cal_.type_ = Persistent;
|
|
}
|
|
else if (!heap && (memoryType() == Persistent)) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
// Remote cacheable to uncacheable
|
|
else if (memoryType() == Remote) {
|
|
cal_.type_ = RemoteUSWC;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
while (!calRes);
|
|
}
|
|
break;
|
|
case Pinned: {
|
|
PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
|
|
CALuint allocSize = static_cast<CALuint>(pinned->size_);
|
|
void* pinAddress;
|
|
hostMemRef = pinned->hostMemRef_;
|
|
pinAddress = address_ = hostMemRef->hostMem();
|
|
|
|
// Use untiled allocation
|
|
cal_.flags_ |= CAL_RESALLOC_GLOBAL_BUFFER;
|
|
|
|
desc.size.width = cal()->width_;
|
|
|
|
if (cal()->dimension_ == GSL_MOA_BUFFER) {
|
|
// Allign offset to 4K boundary (Vista/Win7 limitation)
|
|
char* tmpHost = const_cast<char*>(
|
|
amd::alignDown(reinterpret_cast<const char*>(address_),
|
|
PinnedMemoryAlignment));
|
|
|
|
// Find the partial size for unaligned copy
|
|
hostMemOffset = static_cast<CALuint>(
|
|
reinterpret_cast<const char*>(address_) - tmpHost);
|
|
|
|
pinOffset_ = hostMemOffset & 0xff;
|
|
//!@note GSL has a problem with the defines for flags and
|
|
//! view creation, so check the restriction here
|
|
if (!dev().heap()->isVirtual() && (pinOffset_ != 0)) {
|
|
return false;
|
|
}
|
|
|
|
pinAddress = tmpHost;
|
|
// Align width to avoid GSL useless assert with a view
|
|
if (hostMemOffset != 0) {
|
|
desc.size.width += hostMemOffset / elementSize();
|
|
desc.size.width = amd::alignUp(desc.size.width, 64);
|
|
}
|
|
hostMemOffset &= ~(0xff);
|
|
}
|
|
else if (cal()->dimension_ == GSL_MOA_TEXTURE_2D) {
|
|
//! @todo: Width has to be aligned for 3D.
|
|
//! Need to be replaced with a compute copy
|
|
// Width aligned by 8 texels
|
|
if (((cal()->width_ % 0x8) != 0) ||
|
|
// Pitch aligned by 64 bytes
|
|
(((cal()->width_ * elementSize()) % 0x40) != 0)) {
|
|
return false;
|
|
}
|
|
}
|
|
else {
|
|
//! @todo GSL doesn't support pinning with resAlloc_
|
|
return false;
|
|
}
|
|
|
|
// Fill the GSL desc info structure
|
|
desc.dimension = cal()->dimension_;
|
|
desc.type = GSL_MOA_MEMORY_SYSTEM;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = reinterpret_cast<CALvoid*>(pinAddress);
|
|
desc.flags = 0;
|
|
|
|
// Ensure page alignment
|
|
if ((CALuint64)desc.systemMemory & (amd::Os::pageSize() - 1)) {
|
|
return false;
|
|
}
|
|
|
|
gslResource = dev().resAlloc(&desc);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
else {
|
|
pinOffset_ = 0;
|
|
}
|
|
}
|
|
break;
|
|
case View: {
|
|
// Save the offset in the global heap
|
|
ViewParams* view = reinterpret_cast<ViewParams*>(params);
|
|
offset_ = view->offset_;
|
|
|
|
// Make sure parent was provided
|
|
if (NULL != view->resource_) {
|
|
viewOwner_ = view->resource_;
|
|
uint64 bytePitch = (view->size_ + viewOwner_->pinOffset());
|
|
viewSize.width = bytePitch / elementSize();
|
|
viewSize.height = 1;
|
|
viewSize.depth = 1;
|
|
viewOffset.x = static_cast<CALuint>(offset() / elementSize());
|
|
viewOffset.y = 0;
|
|
viewOffset.width = 0;
|
|
viewOffset.height = 0;
|
|
|
|
gslResource = dev().resAllocView(
|
|
view->resource_->gslResource(), viewSize, viewOffset,
|
|
cal()->format_, GSL_CHANNEL_ORDER_REPLICATE_R,
|
|
cal()->dimension_, 0, 0, cal()->flags_, bytePitch);
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
|
|
// Check if it's a heap allocation
|
|
if (!dev().heap()->isVirtual()) {
|
|
if (viewOwner_ == &dev().globalMem()) {
|
|
// Allocation directly from the heap
|
|
hbOffset_ = static_cast<uint64_t>(view->offset_);
|
|
}
|
|
else {
|
|
// Allocation from another memory object
|
|
hbOffset_ = static_cast<uint64_t>(view->offset_) +
|
|
viewOwner_->hbOffset();
|
|
}
|
|
hbSize_ = view->size_;
|
|
}
|
|
|
|
if (viewOwner_->isMemoryType(Pinned)) {
|
|
address_ = viewOwner_->data() + offset();
|
|
}
|
|
pinOffset_ = viewOwner_->pinOffset();
|
|
}
|
|
else {
|
|
cal_.type_ = Empty;
|
|
}
|
|
}
|
|
break;
|
|
case ImageView: {
|
|
ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
|
|
imageCreateView = true;
|
|
viewLayer = imageView->layer_;
|
|
viewLevel = imageView->level_;
|
|
gslResource = imageView->resource_->gslResource();
|
|
viewOwner_ = imageView->resource_;
|
|
if (viewLayer != 0) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
calRes = true;
|
|
}
|
|
break;
|
|
case ImageBuffer: {
|
|
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
|
|
imageCreateView = true;
|
|
gslResource = imageBuffer->resource_->gslResource();
|
|
viewOwner_ = imageBuffer->resource_;
|
|
calRes = true;
|
|
useRowPitch = true;
|
|
}
|
|
break;
|
|
case OGLInterop: {
|
|
OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
|
|
assert(oglRes->glPlatformContext_ &&
|
|
"We don't have OGL context!");
|
|
switch (oglRes->type_) {
|
|
case InteropVertexBuffer:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_VERTEXBUFFER;
|
|
break;
|
|
case InteropRenderBuffer:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_RENDERBUFFER;
|
|
break;
|
|
case InteropTexture:
|
|
case InteropTextureViewLevel:
|
|
case InteropTextureViewCube:
|
|
glType_ = CAL_RES_GL_BUFFER_TYPE_TEXTURE;
|
|
break;
|
|
default:
|
|
LogError("Unknown OGL interop type!");
|
|
return false;
|
|
break;
|
|
}
|
|
glPlatformContext_ = oglRes->glPlatformContext_;
|
|
glDeviceContext_ = oglRes->glDeviceContext_;
|
|
CALGSLDevice::GLResAssociate resData = {0};
|
|
resData.GLContext = oglRes->glPlatformContext_;
|
|
resData.GLdeviceContext = oglRes->glDeviceContext_;
|
|
resData.name = oglRes->handle_;
|
|
resData.type = glType_;
|
|
// We need not pass any flags down to OGL for interop
|
|
resData.flags = 0;
|
|
|
|
if (dev().resGLAssociate(resData)) {
|
|
gslResource = resData.memObject;
|
|
glInteropMbRes_ = resData.mbResHandle;
|
|
glInterop_ = resData.mem_base;
|
|
calRes = true;
|
|
}
|
|
|
|
// Check if we have to create a view
|
|
if (calRes &&
|
|
((oglRes->type_ == InteropTextureViewLevel) ||
|
|
(oglRes->type_ == InteropTextureViewCube))) {
|
|
imageCreateView = true;
|
|
viewLayer = oglRes->layer_;
|
|
viewLevel = oglRes->mipLevel_;
|
|
|
|
// Find the view parameters
|
|
if (InteropTextureViewLevel == oglRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
|
|
}
|
|
else if (InteropTextureViewCube == oglRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
else {
|
|
LogError("Unknown Interop View Type");
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
#ifdef _WIN32
|
|
case D3D9Interop:
|
|
case D3D10Interop:
|
|
case D3D11Interop: {
|
|
D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
|
|
desc.dimension = cal()->dimension_;
|
|
desc.size.width = cal()->width_;
|
|
desc.size.height = cal()->height_;
|
|
desc.size.depth = cal()->depth_;
|
|
desc.format = cal()->format_;
|
|
desc.channelOrder = cal()->channelOrder_;
|
|
desc.flags = cal()->flags_;
|
|
desc.mipLevels = 0;
|
|
desc.systemMemory = NULL;
|
|
switch (d3dRes->misc) {
|
|
case 1: // NV12 format
|
|
case 2: // YV12 format
|
|
// Readjust the size to the original NV12/YV12 size, since runtime
|
|
// creates an interop for all planes
|
|
switch (d3dRes->layer_) {
|
|
case 0:
|
|
desc.size.height = 3 * desc.size.height / 2;
|
|
break;
|
|
case 1:
|
|
case 2:
|
|
// Force R8 format for the interop allocation by default
|
|
if (1 == d3dRes->misc) {
|
|
desc.format = CM_SURF_FMT_R8;
|
|
desc.channelOrder = GSL_CHANNEL_ORDER_R;
|
|
}
|
|
desc.size.width = 2 * desc.size.width;
|
|
desc.size.height = 3 * desc.size.height;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Create an interop GSL object
|
|
gslResource = dev().resMapD3DResource(
|
|
&desc, (CALuint64)d3dRes->handle_, (memoryType() != D3D9Interop));
|
|
if (gslResource != 0) {
|
|
calRes = true;
|
|
}
|
|
else {
|
|
return false;
|
|
}
|
|
|
|
|
|
// Check if we have to create a view
|
|
if (calRes &&
|
|
((d3dRes->type_ == InteropTextureViewLevel) ||
|
|
(d3dRes->type_ == InteropTextureViewCube))) {
|
|
imageCreateView = true;
|
|
viewLayer = d3dRes->layer_;
|
|
viewLevel = d3dRes->mipLevel_;
|
|
|
|
// Find the view parameters
|
|
if (InteropTextureViewLevel == d3dRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL;
|
|
}
|
|
else if (InteropTextureViewCube == d3dRes->type_) {
|
|
viewFlags |= CAL_RESALLOCSLICEVIEW_LEVEL_AND_LAYER;
|
|
}
|
|
else {
|
|
LogError("Unknown Interop View Type");
|
|
}
|
|
}
|
|
|
|
switch (d3dRes->misc) {
|
|
case 0:
|
|
break;
|
|
case 1: // NV12 format
|
|
case 2: // YV12 format
|
|
// Create a view for the specified plane
|
|
viewDefined = true;
|
|
viewSize.width = cal()->width_;
|
|
viewSize.height = cal()->height_;
|
|
viewSize.depth = 1;
|
|
bytePitch = static_cast<size_t>(gslResource->getPitch());
|
|
viewOffset.x = 0;
|
|
viewSurfFmt = cal()->format_;
|
|
viewChannelOrder = cal()->channelOrder_;
|
|
switch (d3dRes->layer_) {
|
|
case -1:
|
|
break;
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
// Y - plane size to the offset
|
|
viewOffset.x = bytePitch * viewSize.height * 2;
|
|
if (d3dRes->misc == 2) {
|
|
// YV12 format U is 2 times smaller plane
|
|
bytePitch /= 2;
|
|
}
|
|
break;
|
|
case 2:
|
|
// Y + U plane sizes to the offest.
|
|
// U plane is 4 times smaller than Y => 5/2
|
|
viewOffset.x = bytePitch * viewSize.height * 5 / 2;
|
|
// V is 2 times smaller plane
|
|
bytePitch /= 2;
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
calRes = false;
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
calRes = false;
|
|
}
|
|
}
|
|
break;
|
|
#endif // _WIN32
|
|
default:
|
|
LogWarning("Resource::create() called with unknown memory type");
|
|
return false;
|
|
break;
|
|
}
|
|
|
|
// Create a view for interop, since the original buffer may have different format
|
|
// than the global buffer and GSL mem copy will fail
|
|
bool interopBufView = cal()->buffer_ &&
|
|
((memoryType() == D3D10Interop) || (memoryType() == OGLInterop) ||
|
|
(memoryType() == D3D11Interop));
|
|
|
|
bool ignoreParentHandle =
|
|
((memoryType() == ImageView) || (memoryType() == ImageBuffer));
|
|
|
|
// Create imageview if it was requested
|
|
if (calRes &&
|
|
(imageCreateView || interopBufView || hostMemOffset || viewDefined)) {
|
|
|
|
gslResOriginal = gslResource;
|
|
|
|
// Disable tiling if it's a buffer view
|
|
if (interopBufView || hostMemOffset) {
|
|
viewFlags = CAL_RESALLOCVIEW_GLOBAL_BUFFER;
|
|
}
|
|
|
|
viewResType = cal()->dimension_;
|
|
if (!viewDefined) {
|
|
viewSize.width = cal()->width_ + (pinOffset() / elementSize());
|
|
viewSize.height = cal()->height_;
|
|
viewSize.depth = cal()->depth_;
|
|
viewOffset.x = hostMemOffset / static_cast<CALuint>(elementSize());
|
|
viewOffset.y = 0;
|
|
viewOffset.width = 0;
|
|
viewOffset.height = 0;
|
|
viewSurfFmt = cal()->format_;
|
|
viewChannelOrder = cal()->channelOrder_;
|
|
}
|
|
|
|
if (useRowPitch && (params->owner_ != NULL) && params->owner_->asImage() &&
|
|
(params->owner_->asImage()->getRowPitch() != 0)) {
|
|
bytePitch = params->owner_->asImage()->getRowPitch();
|
|
}
|
|
|
|
// Allocate a view resource object
|
|
gslResource = dev().resAllocView(
|
|
gslResOriginal, viewSize, viewOffset, viewSurfFmt,
|
|
viewChannelOrder, viewResType, viewLevel, viewLayer, viewFlags, bytePitch);
|
|
|
|
if (gslResource == 0) {
|
|
// If we don't have to keep the parent handle,
|
|
// then destroy the original resource
|
|
if (!ignoreParentHandle) {
|
|
dev().resFree(gslResOriginal);
|
|
gslResOriginal = 0;
|
|
}
|
|
LogError("ResAlloc failed!");
|
|
return false;
|
|
}
|
|
|
|
if (ignoreParentHandle) {
|
|
gslResOriginal = 0;
|
|
}
|
|
}
|
|
|
|
if (!calRes) {
|
|
if (gslResource != 0) {
|
|
dev().resFree(gslResource);
|
|
}
|
|
if (memoryType() != Pinned) {
|
|
LogError("calResAlloc failed!");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Find memory location
|
|
switch (gslResource->getAttribs().location) {
|
|
case GSL_MOA_MEMORY_CARD:
|
|
case GSL_MOA_MEMORY_CARD_EXT:
|
|
case GSL_MOA_MEMORY_CARD_LOCKABLE:
|
|
case GSL_MOA_MEMORY_CARD_EXT_NONEXT:
|
|
case GSL_MOA_MEMORY_CARD_BUS_ADDRESSABLE:
|
|
cal_.cardMemory_ = true;
|
|
break;
|
|
default:
|
|
cal_.cardMemory_ = false;
|
|
break;
|
|
}
|
|
|
|
gslMemObjectAttribTiling tiling = gslResource->getAttribs().tiling;
|
|
cal_.tiled_ = (GSL_MOA_TILING_LINEAR != tiling) &&
|
|
(GSL_MOA_TILING_LINEAR_GENERAL != tiling);
|
|
|
|
// Get the heap block offset if it's a virtual heap
|
|
if (dev().heap()->isVirtual()) {
|
|
hbOffset_ = gslResource->getSurfaceAddress() -
|
|
dev().heap()->baseAddress();
|
|
}
|
|
hbSize_ = static_cast<uint64_t>(gslResource->getSurfaceSize());
|
|
|
|
if (!dev().settings().use64BitPtr_ &&
|
|
!((memType == Scratch) || ((memType == View) && viewOwner_->cal()->scratch_))) {
|
|
// Make sure runtime doesn't go over the address space limit for buffers
|
|
if ((memoryType() != Heap) &&
|
|
(cal()->dimension_ == GSL_MOA_BUFFER) &&
|
|
((hbOffset_ + hbSize_) > (uint64_t(4) * Gi))) {
|
|
if (cal_.cardMemory_) {
|
|
LogPrintfError(
|
|
"Out of 4GB address space. Base: 0x%016llX, size: 0x%016llX!",
|
|
hbOffset_, hbSize_);
|
|
|
|
dev().resFree(gslResource);
|
|
//! @note: A workaround for a Windows delay on memory destruction
|
|
//! Runtime submits a fake memory fill to force KMD to return
|
|
//! the freed memory ranges
|
|
if (IS_WINDOWS) {
|
|
uint32_t pattern = 0;
|
|
Memory* dummy = reinterpret_cast<Memory*>(
|
|
dev().dummyPage()->getDeviceMemory(dev()));
|
|
dev().xferMgr().fillBuffer(*dummy, &pattern, sizeof(uint32_t),
|
|
amd::Coord3D(0), amd::Coord3D(sizeof(uint32_t)));
|
|
}
|
|
if ((gslResOriginal != 0) && !ignoreParentHandle) {
|
|
dev().resFree(gslResOriginal);
|
|
gslResOriginal = 0;
|
|
}
|
|
return false;
|
|
}
|
|
else {
|
|
LogWarning("Out of 4GB address space for AHP/UHP!");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!foundCalRef) {
|
|
gslRef_ = new GslResourceReference(dev(), gslResource, gslResOriginal);
|
|
if (gslRef_ == NULL) {
|
|
LogError("Memory allocation failure!");
|
|
dev().resFree(gslResource);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
|
|
!cal()->buffer_) {
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if (0 == hwSrd_) {
|
|
return false;
|
|
}
|
|
dev().fillImageHwState(gslResource, hwState_, 8 * sizeof(uint32_t));
|
|
hwState_[8] = GetHSAILImageFormatType(cal()->format_);
|
|
hwState_[9] = GetHSAILImageOrderType(cal()->channelOrder_);
|
|
hwState_[10] = static_cast<uint32_t>(cal()->width_);
|
|
// Workaround for depth view, change tileIndex to 0 for depth view
|
|
if ((memoryType() == ImageView) &&
|
|
(viewChannelOrder == GSL_CHANNEL_ORDER_REPLICATE_R)) {
|
|
if ((hwState_[3] & 0x1f00000) == 0xe00000) {
|
|
hwState_[3] = hwState_[3] & 0xfe0fffff ;
|
|
}
|
|
}
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
}
|
|
|
|
if (desc.section == GSL_SECTION_SVM || desc.section == GSL_SECTION_SVM_ATOMICS)
|
|
{
|
|
params->owner_->setSvmPtr(reinterpret_cast<void*>(gslResource->getSurfaceAddress()));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::reallocate(CreateParams* params)
|
|
{
|
|
GslResourceReference* old;
|
|
GslResourceReference* active;
|
|
|
|
old = gslRef_;
|
|
if (!create(memoryType(), params)) {
|
|
gslRef_ = old;
|
|
return false;
|
|
}
|
|
// Get the new active resource
|
|
active = gslRef_;
|
|
gslRef_ = old;
|
|
|
|
dev().resCopy(old->gslResource(),
|
|
active->gslResource(), CAL_MEMCOPY_SYNC);
|
|
|
|
// Free all old resources
|
|
assert(renames_.size() == 0);
|
|
free();
|
|
|
|
gslRef_ = active;
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Resource::free()
|
|
{
|
|
if (NULL != byteView_) {
|
|
delete byteView_;
|
|
byteView_ = NULL;
|
|
}
|
|
if (NULL != shortView_) {
|
|
delete shortView_;
|
|
shortView_ = NULL;
|
|
}
|
|
|
|
if (gslRef_ == NULL) {
|
|
return;
|
|
}
|
|
|
|
// Sanity check for the map calls
|
|
if (mapCount_ != 0) {
|
|
LogWarning("Resource wasn't unlocked, but destroyed!");
|
|
}
|
|
const bool wait = (memoryType() != ImageView) &&
|
|
(memoryType() != ImageBuffer);
|
|
|
|
// Check if resource could be used in any queue(thread)
|
|
if (gpu_ == NULL) {
|
|
Device::ScopedLockVgpus lock(dev());
|
|
|
|
if (renames_.size() == 0) {
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release all virtual memory objects on all virtual GPUs
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
dev().vgpus()[idx]->releaseMemory(gslResource(), wait);
|
|
}
|
|
|
|
//! @note: This is a workaround for bad applications that
|
|
//! don't unmap memory
|
|
if (mapCount_ != 0) {
|
|
unmap(NULL);
|
|
}
|
|
|
|
// Add resource to the cache
|
|
if (!dev().resourceCache().addCalResource(&cal_, gslRef_)) {
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
renames_[curRename_]->cpuAddress_ = 0;
|
|
for (size_t i = 0; i < renames_.size(); ++i) {
|
|
gslRef_ = renames_[i];
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release all virtual memory objects on all virtual GPUs
|
|
for (uint idx = 0; idx < dev().vgpus().size(); ++idx) {
|
|
dev().vgpus()[idx]->releaseMemory(gslResource());
|
|
}
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (renames_.size() == 0) {
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release virtual memory object on the specified virtual GPU
|
|
gpu_->releaseMemory(gslResource(), wait);
|
|
gslFree();
|
|
}
|
|
}
|
|
else for (size_t i = 0; i < renames_.size(); ++i) {
|
|
gslRef_ = renames_[i];
|
|
// Destroy GSL resource
|
|
if (gslResource() != 0) {
|
|
// Release virtual memory object on the specified virtual GPUs
|
|
gpu_->releaseMemory(gslResource());
|
|
gslFree();
|
|
}
|
|
}
|
|
}
|
|
|
|
// Free SRD for images
|
|
if ((dev().settings().hsail_ || (dev().settings().oclVersion_ == OpenCL20)) &&
|
|
!cal()->buffer_) {
|
|
dev().srds().freeSrdSlot(hwSrd_);
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::writeRawData(
|
|
VirtualGPU& gpu,
|
|
size_t size,
|
|
const void* data,
|
|
bool waitForEvent) const
|
|
{
|
|
GpuEvent event;
|
|
|
|
// Write data size bytes to surface
|
|
// size needs to be DWORD aligned
|
|
assert((size & 3) == 0);
|
|
gpu.writeSurfRaw(event, gslResource(), size, data);
|
|
|
|
setBusy(gpu, event);
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, false);
|
|
|
|
if (waitForEvent) {
|
|
// Wait for event to complete
|
|
gpu.waitForEvent(&event);
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::partialMemCopyTo(
|
|
VirtualGPU& gpu,
|
|
const amd::Coord3D& srcOrigin,
|
|
const amd::Coord3D& dstOrigin,
|
|
const amd::Coord3D& size,
|
|
Resource& dstResource,
|
|
bool enableCopyRect,
|
|
bool flushDMA,
|
|
uint bytesPerElement) const
|
|
{
|
|
GpuEvent event;
|
|
bool result;
|
|
CALuint syncFlags = CAL_MEMCOPY_SYNC;
|
|
EngineType activeEngineID = gpu.engineID_;
|
|
static const bool waitOnBusyEngine = true;
|
|
// \note timing issues in Linux with sync mode
|
|
bool flush = true;
|
|
|
|
// Check if runtime can use async memory copy,
|
|
// even if a caller didn't request async
|
|
if (dev().settings().asyncMemCopy_ &&
|
|
// Keep ASYNC if profiling is disabled or sdma profiling is possible
|
|
(!gpu.profiling() || dev().settings().sdmaProfiling_) &&
|
|
(!cal()->cardMemory_ || !dstResource.cal()->cardMemory_)) {
|
|
// Switch to SDMA engine
|
|
gpu.engineID_ = SdmaEngine;
|
|
syncFlags = CAL_MEMCOPY_ASYNC;
|
|
flush = false;
|
|
}
|
|
|
|
// Wait for the resources, since runtime may use async transfers
|
|
wait(gpu, waitOnBusyEngine);
|
|
dstResource.wait(gpu, waitOnBusyEngine);
|
|
|
|
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
|
|
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
|
|
calSrcOrigin[1] = srcOrigin[1];
|
|
calSrcOrigin[2] = srcOrigin[2];
|
|
calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
|
|
calDstOrigin[1] = dstOrigin[1];
|
|
calDstOrigin[2] = dstOrigin[2];
|
|
calSize[0] = size[0];
|
|
calSize[1] = size[1];
|
|
calSize[2] = size[2];
|
|
|
|
result = gpu.copyPartial(event,
|
|
gslResource(), calSrcOrigin,
|
|
dstResource.gslResource(), calDstOrigin,
|
|
calSize, static_cast<CALmemcopyflags>(syncFlags), enableCopyRect, bytesPerElement);
|
|
|
|
if (result) {
|
|
// Mark source and destination as busy
|
|
setBusy(gpu, event);
|
|
dstResource.setBusy(gpu, event);
|
|
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, (flush | flushDMA));
|
|
}
|
|
|
|
// Restore the original engine
|
|
gpu.engineID_ = activeEngineID;
|
|
|
|
return result;
|
|
}
|
|
|
|
void
|
|
Resource::setBusy(
|
|
VirtualGPU& gpu,
|
|
GpuEvent gpuEvent
|
|
) const
|
|
{
|
|
gpu.assignGpuEvent(this, gpuEvent);
|
|
|
|
// If current resource is a view, then update the parent event as well
|
|
if (viewOwner_ != NULL) {
|
|
viewOwner_->setBusy(gpu, gpuEvent);
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const
|
|
{
|
|
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
|
|
|
|
// Check if we have to wait unconditionally
|
|
if (!waitOnBusyEngine ||
|
|
// or we have to wait only if another engine was used on this resource
|
|
(waitOnBusyEngine && (gpuEvent->engineId_ != gpu.engineID_))) {
|
|
gpu.waitForEvent(gpuEvent);
|
|
}
|
|
|
|
// If current resource is a view and not in the global heap,
|
|
// then wait for the parent event as well
|
|
if ((viewOwner_ != NULL) && (viewOwner_ != &dev().globalMem())) {
|
|
viewOwner_->wait(gpu, waitOnBusyEngine);
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::hostWrite(
|
|
VirtualGPU* gpu,
|
|
const void* hostPtr,
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& size,
|
|
uint flags,
|
|
size_t rowPitch,
|
|
size_t slicePitch)
|
|
{
|
|
void* dst;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
dst = map(gpu, flags, startLayer, numLayers);
|
|
if (NULL == dst) {
|
|
LogError("Couldn't map GPU memory for host write");
|
|
return false;
|
|
}
|
|
|
|
if (1 == cal()->dimSize_) {
|
|
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
dst = static_cast<void*>(static_cast<char*>(dst) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(dst, hostPtr, copySize);
|
|
}
|
|
else {
|
|
size_t srcOffs = 0;
|
|
size_t dstOffsBase = origin[0] * elementSize_;
|
|
size_t dstOffs;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust the destination offset with Y dimension
|
|
dstOffsBase += cal()->pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
dstOffsBase += cal()->slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory slice by slice
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
dstOffs = dstOffsBase + slice * cal()->slice_ * elementSize_;
|
|
srcOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(dst) + dstOffs),
|
|
(reinterpret_cast<const_address>(hostPtr) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
dstOffs += cal()->pitch_ * elementSize_;
|
|
srcOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::hostRead(
|
|
VirtualGPU* gpu,
|
|
void* hostPtr,
|
|
const amd::Coord3D& origin,
|
|
const amd::Coord3D& size,
|
|
size_t rowPitch,
|
|
size_t slicePitch)
|
|
{
|
|
void* src;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (cal()->dimension_ == GSL_MOA_TEXTURE_1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
src = map(gpu, ReadOnly, startLayer, numLayers);
|
|
if (NULL == src) {
|
|
LogError("Couldn't map GPU memory for host read");
|
|
return false;
|
|
}
|
|
|
|
if (1 == cal()->dimSize_) {
|
|
size_t copySize = (cal()->buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
src = static_cast<void*>(static_cast<char*>(src) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(hostPtr, src, copySize);
|
|
}
|
|
else {
|
|
size_t srcOffsBase = origin[0] * elementSize_;
|
|
size_t srcOffs;
|
|
size_t dstOffs = 0;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust destination offset with Y dimension
|
|
srcOffsBase += cal()->pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
srcOffsBase += cal()->slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory line by line
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
srcOffs = srcOffsBase + slice * cal()->slice_ * elementSize_;
|
|
dstOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(hostPtr) + dstOffs),
|
|
(reinterpret_cast<const_address>(src) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
srcOffs += cal()->pitch_ * elementSize_;
|
|
dstOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::gslMap(void** ptr, size_t* pitch, gslMapAccessType flags, gslMemObject resource) const
|
|
{
|
|
bool result = true;
|
|
|
|
if (cal_.cardMemory_ || cal_.tiled_) {
|
|
// @todo remove const cast
|
|
result = const_cast<Device&>(dev()).resMapLocal(*ptr, *pitch, resource, flags);
|
|
}
|
|
else {
|
|
result = dev().resMapRemote(*ptr, *pitch, resource, flags);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
Resource::gslUnmap(gslMemObject resource) const
|
|
{
|
|
bool result = true;
|
|
|
|
if (cal_.cardMemory_) {
|
|
// @todo remove const cast
|
|
result = const_cast<Device&>(dev()).resUnmapLocal(resource);
|
|
}
|
|
else {
|
|
result = dev().resUnmapRemote(resource);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool
|
|
Resource::gslGLAcquire()
|
|
{
|
|
bool retVal = true;
|
|
|
|
if (cal()->type_ == OGLInterop) {
|
|
//release is required only for depth resources
|
|
switch ((int)cal()->format_) {
|
|
case CM_SURF_FMT_DEPTH24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
retVal = dev().resGLAcquire(glPlatformContext_,glInteropMbRes_, glType_);
|
|
break;
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
bool
|
|
Resource::gslGLRelease()
|
|
{
|
|
bool retVal = true;
|
|
|
|
if (cal()->type_ == OGLInterop) {
|
|
//release is required only for depth resources
|
|
switch ((int)cal()->format_) {
|
|
case CM_SURF_FMT_DEPTH24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F_X24_STEN8:
|
|
case CM_SURF_FMT_DEPTH32F:
|
|
case CM_SURF_FMT_DEPTH16:
|
|
retVal = dev().resGLRelease(glPlatformContext_,glInteropMbRes_);
|
|
break;
|
|
}
|
|
}
|
|
return retVal;
|
|
}
|
|
void
|
|
Resource::gslFree() const
|
|
{
|
|
if (cal()->type_ == OGLInterop) {
|
|
if (0 == gslRef_->resOriginal_) {
|
|
dev().resGLFree(glPlatformContext_, glDeviceContext_,
|
|
gslRef_->resource_, glInterop_, glInteropMbRes_, glType_);
|
|
gslRef_->resource_ = 0;
|
|
}
|
|
else {
|
|
dev().resFree(gslRef_->resource_);
|
|
gslRef_->resource_ = 0;
|
|
dev().resGLFree(glPlatformContext_, glDeviceContext_,
|
|
gslRef_->resOriginal_, glInterop_, glInteropMbRes_, glType_);
|
|
gslRef_->resOriginal_ = 0;
|
|
}
|
|
}
|
|
gslRef_->release();
|
|
}
|
|
|
|
bool
|
|
Resource::isMemoryType(MemoryType memType) const
|
|
{
|
|
if (memoryType() == memType) {
|
|
return true;
|
|
}
|
|
else if (memoryType() == View) {
|
|
return viewOwner_->isMemoryType(memType);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
Resource::isPersistentDirectMap() const
|
|
{
|
|
bool directMap = ((memoryType() == Resource::Persistent) &&
|
|
(cal()->dimSize_ < 3) && !cal()->imageArray_);
|
|
|
|
// If direct map is possible, then validate it with the current tiling
|
|
if (directMap && cal()->tiled_) {
|
|
//!@note IOL for Linux doesn't support tiling aperture
|
|
// and runtime doesn't force linear images in persistent
|
|
directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_;
|
|
}
|
|
|
|
return directMap;
|
|
}
|
|
|
|
void*
|
|
Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers)
|
|
{
|
|
if (isMemoryType(Pinned)) {
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != NULL) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
return address_;
|
|
}
|
|
|
|
gslMapAccessType mapFlags = GSL_MAP_READ_WRITE;
|
|
|
|
if (flags & ReadOnly) {
|
|
assert(!(flags & Discard) && "We can't use lock discard with read only!");
|
|
mapFlags = GSL_MAP_READ_ONLY;
|
|
}
|
|
|
|
if (flags & WriteOnly) {
|
|
mapFlags = GSL_MAP_WRITE_ONLY;
|
|
}
|
|
|
|
// Check if use map discard
|
|
if (flags & Discard) {
|
|
mapFlags = GSL_MAP_WRITE_ONLY;
|
|
if (gpu != NULL) {
|
|
// If we use a new renamed allocation, then skip the wait
|
|
if (rename(*gpu)) {
|
|
flags |= NoWait;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != NULL) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
|
|
// Check if memory wasn't mapped yet
|
|
if (++mapCount_ == 1) {
|
|
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
|
|
// Save map info for multilayer map/unmap
|
|
startLayer_ = startLayer;
|
|
numLayers_ = numLayers;
|
|
mapFlags_ = mapFlags;
|
|
// Map with layers
|
|
address_ = mapLayers(gpu, mapFlags);
|
|
}
|
|
else {
|
|
// Map current resource
|
|
if (!gslMap(&address_, &cal_.pitch_, mapFlags, gslResource())) {
|
|
LogError("cal::ResMap failed!");
|
|
--mapCount_;
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
//! \note the atomic operation with counter doesn't
|
|
// guarantee that the address will be valid,
|
|
// since GSL could still process the first map
|
|
if (address_ == NULL) {
|
|
amd::Os::sleep(10);
|
|
assert((address_ != NULL) && "Multiple maps failed!");
|
|
}
|
|
|
|
return address_;
|
|
}
|
|
|
|
void*
|
|
Resource::mapLayers(VirtualGPU* gpu, CALuint flags)
|
|
{
|
|
size_t srcOffs = 0;
|
|
size_t dstOffs = 0;
|
|
gslMemObject sliceResource = 0;
|
|
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
|
|
size_t layers = cal()->depth_;
|
|
size_t height = cal()->height_;
|
|
|
|
// Use 1D layers
|
|
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
|
|
gslDim = GSL_MOA_TEXTURE_1D;
|
|
height = 1;
|
|
layers = cal()->height_;
|
|
}
|
|
|
|
cal_.pitch_ = cal()->width_;
|
|
cal_.slice_ = cal()->pitch_ * height;
|
|
address_ = new char [cal()->slice_ * layers * elementSize()];
|
|
if (NULL == address_) {
|
|
return NULL;
|
|
}
|
|
|
|
// Check if map is write only
|
|
if (flags == GSL_MAP_WRITE_ONLY) {
|
|
return address_;
|
|
}
|
|
|
|
if (numLayers_ != 0) {
|
|
layers = startLayer_ + numLayers_;
|
|
}
|
|
|
|
dstOffs = startLayer_ * cal()->slice_ * elementSize();
|
|
|
|
// Loop through all layers
|
|
for (uint i = startLayer_; i < layers; ++i) {
|
|
gslResource3D gslSize;
|
|
CALdomain calOffset;
|
|
void* sliceAddr;
|
|
size_t pitch;
|
|
|
|
// Allocate a layer from the image
|
|
gslSize.width = cal()->width_;
|
|
gslSize.height = height;
|
|
gslSize.depth = 1;
|
|
calOffset.x = 0;
|
|
calOffset.y = 0;
|
|
calOffset.width = 0;
|
|
calOffset.height = 0;
|
|
|
|
sliceResource = dev().resAllocView(
|
|
gslResource(), gslSize,
|
|
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
|
|
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
|
|
if (0 == sliceResource) {
|
|
LogError("Map layer. resAllocSliceView failed!");
|
|
return NULL;
|
|
}
|
|
|
|
// Map 2D layer
|
|
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_READ_ONLY, sliceResource)) {
|
|
LogError("Map layer. CalResMap failed!");
|
|
return NULL;
|
|
}
|
|
|
|
srcOffs = 0;
|
|
// Copy memory line by line
|
|
for (size_t rows = 0; rows < height; ++rows) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(address_) + dstOffs),
|
|
(reinterpret_cast<const_address>(sliceAddr) + srcOffs),
|
|
cal()->width_ * elementSize_);
|
|
|
|
dstOffs += cal()->pitch_ * elementSize();
|
|
srcOffs += pitch * elementSize();
|
|
}
|
|
|
|
// Unmap a layer
|
|
if (!gslUnmap(sliceResource)) {
|
|
LogError("Map layer. CalResUnmap failed!");
|
|
}
|
|
dev().resFree(sliceResource);
|
|
}
|
|
|
|
return address_;
|
|
}
|
|
|
|
void
|
|
Resource::unmap(VirtualGPU* gpu)
|
|
{
|
|
if (isMemoryType(Pinned)) {
|
|
return;
|
|
}
|
|
|
|
// Decrement map counter
|
|
int count = --mapCount_;
|
|
|
|
// Check if it's the last unmap
|
|
if (count == 0) {
|
|
if ((cal()->dimSize_ == 3) || cal()->imageArray_) {
|
|
// Unmap layers
|
|
unmapLayers(gpu);
|
|
}
|
|
else {
|
|
// Unmap current resource
|
|
if (!gslUnmap(gslResource())) {
|
|
LogError("CalResUnmap failed!");
|
|
}
|
|
}
|
|
address_ = NULL;
|
|
}
|
|
else if (count < 0) {
|
|
LogError("dev().serialCalResUnmap failed!");
|
|
++mapCount_;
|
|
return;
|
|
}
|
|
}
|
|
|
|
void
|
|
Resource::unmapLayers(VirtualGPU* gpu)
|
|
{
|
|
size_t srcOffs = 0;
|
|
size_t dstOffs = 0;
|
|
gslMemObjectAttribType gslDim = GSL_MOA_TEXTURE_2D;
|
|
gslMemObject sliceResource = NULL;
|
|
CALuint layers = cal()->depth_;
|
|
CALuint height = cal()->height_;
|
|
|
|
// Use 1D layers
|
|
if (GSL_MOA_TEXTURE_1D_ARRAY == cal()->dimension_) {
|
|
gslDim = GSL_MOA_TEXTURE_1D;
|
|
height = 1;
|
|
layers = cal()->height_;
|
|
}
|
|
|
|
if (numLayers_ != 0) {
|
|
layers = startLayer_ + numLayers_;
|
|
}
|
|
|
|
srcOffs = startLayer_ * cal()->slice_ * elementSize();
|
|
|
|
// Check if map is write only
|
|
if (!(mapFlags_ == GSL_MAP_READ_ONLY)) {
|
|
// Loop through all layers
|
|
for (uint i = startLayer_; i < layers; ++i) {
|
|
gslResource3D gslSize;
|
|
CALdomain calOffset;
|
|
void* sliceAddr;
|
|
size_t pitch;
|
|
|
|
// Allocate a layer from the image
|
|
gslSize.width = cal()->width_;
|
|
gslSize.height = height;
|
|
gslSize.depth = 1;
|
|
calOffset.x = 0;
|
|
calOffset.y = 0;
|
|
calOffset.width = 0;
|
|
calOffset.height = 0;
|
|
|
|
sliceResource = dev().resAllocView(
|
|
gslResource(), gslSize,
|
|
calOffset, cal()->format_, cal()->channelOrder_, gslDim,
|
|
0, i, CAL_RESALLOCSLICEVIEW_LAYER);
|
|
if (0 == sliceResource) {
|
|
LogError("Unmap layer. resAllocSliceView failed!");
|
|
return;
|
|
}
|
|
|
|
// Map a layer
|
|
if (!gslMap(&sliceAddr, &pitch, GSL_MAP_WRITE_ONLY, sliceResource)) {
|
|
LogError("Unmap layer. CalResMap failed!");
|
|
return;
|
|
}
|
|
|
|
dstOffs = 0;
|
|
// Copy memory line by line
|
|
for (size_t rows = 0; rows < height; ++rows) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(
|
|
(reinterpret_cast<address>(sliceAddr) + dstOffs),
|
|
(reinterpret_cast<const_address>(address_) + srcOffs),
|
|
cal()->width_ * elementSize_);
|
|
|
|
dstOffs += pitch * elementSize();
|
|
srcOffs += cal()->pitch_ * elementSize();
|
|
}
|
|
|
|
// Unmap a layer
|
|
if (!gslUnmap(sliceResource)) {
|
|
LogError("Unmap layer. CalResUnmap failed!");
|
|
}
|
|
dev().resFree(sliceResource);
|
|
}
|
|
}
|
|
|
|
// Destroy the mapped memory
|
|
delete [] reinterpret_cast<char*>(address_);
|
|
}
|
|
|
|
void
|
|
Resource::setActiveRename(VirtualGPU& gpu, GslResourceReference* rename)
|
|
{
|
|
// Copy the unique GSL data
|
|
gslRef_ = rename;
|
|
address_ = rename->cpuAddress_;
|
|
|
|
if (dev().heap()->isVirtual()) {
|
|
hbOffset_ = rename->gslResource()->getSurfaceAddress() -
|
|
dev().heap()->baseAddress();
|
|
}
|
|
}
|
|
|
|
bool
|
|
Resource::getActiveRename(VirtualGPU& gpu, GslResourceReference** rename)
|
|
{
|
|
// Copy the old data to the rename descriptor
|
|
*rename = gslRef_;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
Resource::rename(VirtualGPU& gpu, bool force)
|
|
{
|
|
GpuEvent* gpuEvent = gpu.getGpuEvent(this);
|
|
if (!gpuEvent->isValid() && !force) {
|
|
return true;
|
|
}
|
|
|
|
bool useNext = false;
|
|
CALuint resSize = cal()->width_ * ((cal()->height_) ? cal()->height_ : 1) *
|
|
elementSize_;
|
|
|
|
// Rename will work with real GSL resources
|
|
if (((memoryType() != Local) &&
|
|
(memoryType() != Persistent) &&
|
|
(memoryType() != Remote) &&
|
|
(memoryType() != RemoteUSWC)) ||
|
|
(dev().settings().maxRenames_ == 0)) {
|
|
return false;
|
|
}
|
|
|
|
// If the resource for renaming is too big, then lets check the current status first
|
|
// at the cost of an extra flush
|
|
if (resSize >= (dev().settings().maxRenameSize_ / dev().settings().maxRenames_)) {
|
|
if (gpu.isDone(gpuEvent)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Save the first
|
|
if (renames_.size() == 0) {
|
|
GslResourceReference* rename;
|
|
if (mapCount_ > 0) {
|
|
gslRef_->cpuAddress_ = address_;
|
|
}
|
|
if (!getActiveRename(gpu, &rename)) {
|
|
return false;
|
|
}
|
|
|
|
curRename_ = renames_.size();
|
|
renames_.push_back(rename);
|
|
}
|
|
|
|
// Can we use a new rename?
|
|
if ((renames_.size() <= dev().settings().maxRenames_) &&
|
|
((renames_.size() * resSize) <= dev().settings().maxRenameSize_)) {
|
|
GslResourceReference* rename;
|
|
|
|
// Create a new GSL allocation
|
|
if (create(memoryType())) {
|
|
if (mapCount_ > 0) {
|
|
assert(!cal()->cardMemory_ && "Unsupported memory type!");
|
|
if (!dev().resMapRemote(gslRef_->cpuAddress_, cal_.pitch_,
|
|
gslResource(), GSL_MAP_READ_WRITE)) {
|
|
LogError("gslMap fails on rename!");
|
|
}
|
|
address_ = gslRef_->cpuAddress_;
|
|
}
|
|
if (getActiveRename(gpu, &rename)) {
|
|
curRename_ = renames_.size();
|
|
renames_.push_back(rename);
|
|
}
|
|
else {
|
|
gslRef_->release();
|
|
useNext = true;
|
|
}
|
|
}
|
|
else {
|
|
useNext = true;
|
|
}
|
|
}
|
|
else {
|
|
useNext = true;
|
|
}
|
|
|
|
if (useNext) {
|
|
// Get the last submitted
|
|
curRename_++;
|
|
if (curRename_ >= renames_.size()) {
|
|
curRename_ = 0;
|
|
}
|
|
setActiveRename(gpu, renames_[curRename_]);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
Resource::warmUpRenames(VirtualGPU& gpu)
|
|
{
|
|
for (uint i = 0; i < dev().settings().maxRenames_; ++i) {
|
|
const bool force = true;
|
|
rename(gpu, force);
|
|
}
|
|
}
|
|
|
|
Resource*
|
|
Resource::getAliasUAVBuffer(cmSurfFmt newFormat)
|
|
{
|
|
Resource* view = NULL;
|
|
uint byteSize;
|
|
|
|
// Lock device so a view allocation is unique operation
|
|
amd::ScopedLock k(dev().gslDeviceOps());
|
|
|
|
if (newFormat == CM_SURF_FMT_R8I) {
|
|
view = byteView_;
|
|
byteSize = 1;
|
|
}
|
|
else if (newFormat == CM_SURF_FMT_R16I) {
|
|
view = shortView_;
|
|
byteSize = 2;
|
|
}
|
|
else { // only take byte and short
|
|
assert(false && "Unsupported format for a view");
|
|
return NULL;
|
|
}
|
|
|
|
// allocate byte/short view
|
|
if (NULL == view) {
|
|
view = new Resource(dev(), (cal()->width_ * elementSize()) / byteSize, newFormat);
|
|
if (view == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
Resource::ViewParams params;
|
|
params.offset_ = 0;
|
|
params.size_ = cal()->width_ * elementSize();
|
|
params.resource_ = this;
|
|
|
|
if (!view->create(Resource::View, ¶ms)) {
|
|
delete view;
|
|
return NULL;
|
|
}
|
|
|
|
// save view resource
|
|
if (newFormat == CM_SURF_FMT_R8I) {
|
|
byteView_ = view;
|
|
}
|
|
else if (newFormat == CM_SURF_FMT_R16I) {
|
|
shortView_ = view;
|
|
}
|
|
}
|
|
|
|
return view;
|
|
}
|
|
|
|
ResourceCache::~ResourceCache()
|
|
{
|
|
free();
|
|
}
|
|
|
|
//! \note the cache works in FILO mode
|
|
bool
|
|
ResourceCache::addCalResource(
|
|
Resource::CalResourceDesc* desc, GslResourceReference* ref)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool result = false;
|
|
size_t size = getResourceSize(desc);
|
|
|
|
// Make sure current allocation isn't bigger than cache
|
|
if (((desc->type_ == Resource::Local) ||
|
|
(desc->type_ == Resource::Persistent) ||
|
|
(desc->type_ == Resource::Remote) ||
|
|
(desc->type_ == Resource::RemoteUSWC)) &&
|
|
(size < cacheSizeLimit_) &&
|
|
!desc->SVMRes_) {
|
|
// Validate the cache size limit. Loop until we have enough space
|
|
while ((cacheSize_ + size) > cacheSizeLimit_) {
|
|
removeLast();
|
|
}
|
|
Resource::CalResourceDesc* descCached = new Resource::CalResourceDesc;
|
|
if (descCached != NULL) {
|
|
// Copy the original desc to the cached version
|
|
memcpy(descCached, desc, sizeof(Resource::CalResourceDesc));
|
|
|
|
// Add the current resource to the cache
|
|
resCache_.push_front(std::make_pair(descCached, ref));
|
|
cacheSize_ += size;
|
|
result = true;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
GslResourceReference*
|
|
ResourceCache::findCalResource(Resource::CalResourceDesc* desc)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool found = false;
|
|
GslResourceReference* ref = NULL;
|
|
size_t size = getResourceSize(desc);
|
|
|
|
// Early exit if resource is too big
|
|
if (size >= cacheSizeLimit_ || desc->SVMRes_) {
|
|
//! \note we may need to free the cache here to reduce memory pressure
|
|
return ref;
|
|
}
|
|
|
|
// Serach the right resource through the cache list
|
|
std::list<std::pair<Resource::CalResourceDesc*,
|
|
GslResourceReference*> >::const_iterator it;
|
|
for (it = resCache_.begin(); it != resCache_.end(); ++it) {
|
|
Resource::CalResourceDesc* entry = it->first;
|
|
// Find if we can reuse this entry
|
|
if ((entry->dimension_ == desc->dimension_) &&
|
|
(entry->type_ == desc->type_) &&
|
|
(entry->width_ == desc->width_) &&
|
|
(entry->height_ == desc->height_) &&
|
|
(entry->depth_ == desc->depth_) &&
|
|
(entry->channelOrder_ == desc->channelOrder_) &&
|
|
(entry->format_ == desc->format_) &&
|
|
(entry->flags_ == desc->flags_)) {
|
|
ref = it->second;
|
|
delete it->first;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found) {
|
|
// Remove the found etry from the cache
|
|
resCache_.remove(*it);
|
|
cacheSize_ -= size;
|
|
}
|
|
|
|
return ref;
|
|
}
|
|
|
|
bool
|
|
ResourceCache::free(size_t minCacheEntries)
|
|
{
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
bool result = false;
|
|
|
|
if (minCacheEntries < resCache_.size()) {
|
|
if (static_cast<int>(cacheSize_) > 0) {
|
|
result = true;
|
|
}
|
|
// Clear the cache
|
|
while (static_cast<int>(cacheSize_) > 0) {
|
|
removeLast();
|
|
}
|
|
CondLog((cacheSize_ != 0), "Incorrect size for cache release!");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
size_t
|
|
ResourceCache::getResourceSize(Resource::CalResourceDesc* desc)
|
|
{
|
|
// Find the total amount of elements
|
|
size_t size =
|
|
desc->width_ *
|
|
((desc->height_) ? desc->height_ : 1) *
|
|
((desc->depth_) ? desc->depth_: 1);
|
|
|
|
// Find total size in bytes
|
|
size *= static_cast<size_t>(memoryFormatSize(desc->format_).size_);
|
|
|
|
return size;
|
|
}
|
|
|
|
void
|
|
ResourceCache::removeLast()
|
|
{
|
|
std::pair<Resource::CalResourceDesc*, GslResourceReference*> entry;
|
|
entry = resCache_.back();
|
|
resCache_.pop_back();
|
|
|
|
size_t size = getResourceSize(entry.first);
|
|
|
|
// Delete CalResourceDesc
|
|
delete entry.first;
|
|
|
|
// Destroy GSL resource
|
|
entry.second->release();
|
|
cacheSize_ -= size;
|
|
}
|
|
|
|
} // namespace gpu
|