58e4bca449
SWDEV-79445 - OCL generic changes and code clean-up - Make sure transfer doesn't exceed CP dma limit Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#81 edit
2213 lines
80 KiB
C++
2213 lines
80 KiB
C++
// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "os/os.hpp"
|
|
#include "device/device.hpp"
|
|
#include "utils/flags.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/pal/palresource.hpp"
|
|
#include "device/pal/paldevice.hpp"
|
|
#include "device/pal/palblit.hpp"
|
|
#include "device/pal/paltimestamp.hpp"
|
|
#include "thread/atomic.hpp"
|
|
#include "hsa_ext_image.h"
|
|
#ifdef _WIN32
|
|
#include <d3d10_1.h>
|
|
#include "CL/cl_d3d10.h"
|
|
#include "CL/cl_d3d11.h"
|
|
#endif // _WIN32
|
|
#include <GL/gl.h>
|
|
#include "GL/glATIInternal.h"
|
|
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <cmath>
|
|
|
|
namespace pal {
|
|
|
|
// ================================================================================================
|
|
Pal::Result GpuMemoryReference::MakeResident() const {
|
|
Pal::Result result = Pal::Result::Success;
|
|
if (device_.settings().alwaysResident_) {
|
|
Pal::GpuMemoryRef memRef = {};
|
|
memRef.pGpuMemory = gpuMem_;
|
|
result = device_.iDev()->AddGpuMemoryReferences(1, &memRef, nullptr, Pal::GpuMemoryRefCantTrim);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::GpuMemoryCreateInfo& createInfo) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = dev.iDev()->GetGpuMemorySize(createInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
|
if ((result != Pal::Result::Success) &&
|
|
// Free cache if PAL failed allocation
|
|
dev.resourceCache().free()) {
|
|
// If cache was freed, then try to allocate again
|
|
result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
|
}
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
if (!createInfo.flags.sdiExternal) {
|
|
// Update free memory size counters
|
|
dev.updateAllocedMemory(createInfo.heaps[0], createInfo.size, false);
|
|
}
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::PinnedGpuMemoryCreateInfo& createInfo) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = dev.iDev()->GetPinnedGpuMemorySize(createInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
Pal::VaRange vaRange = Pal::VaRange::Default;
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->CreatePinnedGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
// Update free memory size counters
|
|
dev.updateAllocedMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::SvmGpuMemoryCreateInfo& createInfo) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = dev.iDev()->GetSvmGpuMemorySize(createInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->CreateSvmGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
// Update free memory size counters
|
|
dev.updateAllocedMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::ExternalGpuMemoryOpenInfo& openInfo) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = dev.iDev()->GetExternalSharedGpuMemorySize(&result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->OpenExternalSharedGpuMemory(openInfo, &memRef[1], &createInfo,
|
|
&memRef->gpuMem_);
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::ExternalImageOpenInfo& openInfo,
|
|
Pal::ImageCreateInfo* imgCreateInfo,
|
|
Pal::IImage** image) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = 0;
|
|
size_t imageSize = 0;
|
|
if (Pal::Result::Success !=
|
|
dev.iDev()->GetExternalSharedImageSizes(openInfo, &imageSize, &gpuMemSize, imgCreateInfo)) {
|
|
return nullptr;
|
|
}
|
|
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
char* imgMem = new char[imageSize];
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->OpenExternalSharedImage(openInfo, imgMem, &memRef[1], &createInfo, image,
|
|
&memRef->gpuMem_);
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
|
|
const Pal::PeerGpuMemoryOpenInfo& openInfo) {
|
|
Pal::Result result;
|
|
size_t gpuMemSize = dev.iDev()->GetPeerGpuMemorySize(openInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return nullptr;
|
|
}
|
|
|
|
GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
|
|
if (memRef != nullptr) {
|
|
result = dev.iDev()->OpenPeerGpuMemory(openInfo, &memRef[1], &memRef->gpuMem_);
|
|
if (result == Pal::Result::Success) {
|
|
result = memRef->MakeResident();
|
|
}
|
|
if (result != Pal::Result::Success) {
|
|
memRef->release();
|
|
return nullptr;
|
|
}
|
|
}
|
|
return memRef;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference::GpuMemoryReference(const Device& dev)
|
|
: gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr) {}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference::~GpuMemoryReference() {
|
|
if (nullptr == iMem()) {
|
|
return;
|
|
}
|
|
if (gpu_ == nullptr) {
|
|
Device::ScopedLockVgpus lock(device_);
|
|
// Release all memory objects on all virtual GPUs
|
|
for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
|
|
device_.vgpus()[idx]->releaseMemory(this);
|
|
}
|
|
} else {
|
|
amd::ScopedLock l(gpu_->execution());
|
|
gpu_->releaseMemory(this);
|
|
}
|
|
if (device_.vgpus().size() != 0) {
|
|
assert(device_.vgpus()[0] == device_.xferQueue() && "Wrong transfer queue!");
|
|
// Lock the transfer queue, since it's not handled by ScopedLockVgpus
|
|
amd::ScopedLock k(device_.xferMgr().lockXfer());
|
|
device_.vgpus()[0]->releaseMemory(this);
|
|
}
|
|
|
|
// Destroy PAL object if it's not a suballocation
|
|
if (cpuAddress_ != nullptr) {
|
|
iMem()->Unmap();
|
|
}
|
|
if (!(iMem()->Desc().flags.isShared || iMem()->Desc().flags.isExternal ||
|
|
iMem()->Desc().flags.isExternPhys)) {
|
|
// Update free memory size counters
|
|
device_.updateAllocedMemory(iMem()->Desc().preferredHeap, iMem()->Desc().size, true);
|
|
}
|
|
iMem()->Destroy();
|
|
gpuMem_ = nullptr;
|
|
}
|
|
|
|
// ================================================================================================
|
|
Resource::Resource(const Device& gpuDev, size_t size)
|
|
: elementSize_(0),
|
|
gpuDevice_(gpuDev),
|
|
mapCount_(0),
|
|
address_(nullptr),
|
|
offset_(0),
|
|
memRef_(nullptr),
|
|
subOffset_(0),
|
|
viewOwner_(nullptr),
|
|
image_(nullptr),
|
|
hwSrd_(0),
|
|
events_(gpuDev.numOfVgpus()) {
|
|
// Fill resource descriptor fields
|
|
desc_.state_ = 0;
|
|
desc_.type_ = Empty;
|
|
desc_.width_ = amd::alignUp(size, Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint)) /
|
|
Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint);
|
|
desc_.height_ = 1;
|
|
desc_.depth_ = 1;
|
|
desc_.mipLevels_ = 1;
|
|
desc_.format_.image_channel_order = CL_R;
|
|
desc_.format_.image_channel_data_type = CL_FLOAT;
|
|
desc_.flags_ = 0;
|
|
desc_.pitch_ = 0;
|
|
desc_.slice_ = 0;
|
|
desc_.cardMemory_ = true;
|
|
desc_.dimSize_ = 1;
|
|
desc_.buffer_ = true;
|
|
desc_.imageArray_ = false;
|
|
desc_.topology_ = CL_MEM_OBJECT_BUFFER;
|
|
desc_.SVMRes_ = false;
|
|
desc_.scratch_ = false;
|
|
desc_.isAllocExecute_ = false;
|
|
desc_.baseLevel_ = 0;
|
|
desc_.gl2CacheDisabled_ = false;
|
|
gpuDev.addResource(this);
|
|
}
|
|
|
|
// ================================================================================================
|
|
Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t depth,
|
|
cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
|
|
: elementSize_(0),
|
|
gpuDevice_(gpuDev),
|
|
mapCount_(0),
|
|
address_(nullptr),
|
|
offset_(0),
|
|
memRef_(nullptr),
|
|
subOffset_(0),
|
|
viewOwner_(nullptr),
|
|
image_(nullptr),
|
|
hwSrd_(0),
|
|
events_(gpuDev.numOfVgpus()) {
|
|
// Fill resource descriptor fields
|
|
desc_.state_ = 0;
|
|
desc_.type_ = Empty;
|
|
desc_.width_ = width;
|
|
desc_.height_ = height;
|
|
desc_.depth_ = depth;
|
|
desc_.mipLevels_ = mipLevels;
|
|
desc_.format_ = format;
|
|
desc_.flags_ = 0;
|
|
desc_.pitch_ = 0;
|
|
desc_.slice_ = 0;
|
|
desc_.cardMemory_ = true;
|
|
desc_.buffer_ = false;
|
|
desc_.imageArray_ = false;
|
|
desc_.topology_ = imageType;
|
|
desc_.SVMRes_ = false;
|
|
desc_.scratch_ = false;
|
|
desc_.isAllocExecute_ = false;
|
|
desc_.baseLevel_ = 0;
|
|
desc_.gl2CacheDisabled_ = false;
|
|
switch (imageType) {
|
|
case CL_MEM_OBJECT_IMAGE2D:
|
|
desc_.dimSize_ = 2;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
desc_.dimSize_ = 3;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
|
desc_.dimSize_ = 3;
|
|
desc_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
desc_.dimSize_ = 1;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
desc_.dimSize_ = 2;
|
|
desc_.imageArray_ = true;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
|
desc_.dimSize_ = 1;
|
|
break;
|
|
default:
|
|
desc_.dimSize_ = 1;
|
|
LogError("Unknown image type!");
|
|
break;
|
|
}
|
|
gpuDev.addResource(this);
|
|
}
|
|
|
|
// ================================================================================================
|
|
Resource::~Resource() {
|
|
free();
|
|
|
|
if ((nullptr != image_) &&
|
|
((memoryType() != ImageView) ||
|
|
//! @todo PAL doesn't allow an SRD view creation with different pixel size
|
|
(elementSize() != viewOwner_->elementSize()))) {
|
|
image_->Destroy();
|
|
delete[] reinterpret_cast<char*>(image_);
|
|
}
|
|
|
|
// Remove the current resource from the global resource list
|
|
gpuDevice_.removeResource(this);
|
|
}
|
|
|
|
// ================================================================================================
|
|
static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
|
|
static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT,
|
|
HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24};
|
|
|
|
uint idx = format.image_channel_data_type - CL_SNORM_INT8;
|
|
assert((idx <= (CL_UNORM_INT24 - CL_SNORM_INT8)) && "Out of range format channel!");
|
|
return FormatType[idx];
|
|
}
|
|
|
|
// ================================================================================================
|
|
static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
|
|
static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_A,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RA,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RX,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RGX,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA,
|
|
HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR};
|
|
|
|
uint idx = format.image_channel_order - CL_R;
|
|
assert((idx <= (CL_ABGR - CL_R)) && "Out of range format order!");
|
|
return OrderType[idx];
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
|
|
createInfo->heapCount = 1;
|
|
switch (memoryType()) {
|
|
case Persistent:
|
|
createInfo->heapCount = 2;
|
|
createInfo->heaps[0] = Pal::GpuHeapLocal;
|
|
createInfo->heaps[1] = Pal::GpuHeapGartUswc;
|
|
createInfo->flags.peerWritable = dev().P2PAccessAllowed();
|
|
#ifdef ATI_OS_LINUX
|
|
// Note: SSG in Linux requires DGMA heap
|
|
if (dev().properties().gpuMemoryProperties.busAddressableMemSize > 0) {
|
|
createInfo->flags.busAddressable = true;
|
|
}
|
|
#endif
|
|
break;
|
|
case RemoteUSWC:
|
|
createInfo->heaps[0] = Pal::GpuHeapGartUswc;
|
|
desc_.cardMemory_ = false;
|
|
break;
|
|
case Remote:
|
|
createInfo->heaps[0] = Pal::GpuHeapGartCacheable;
|
|
desc_.cardMemory_ = false;
|
|
break;
|
|
case ExternalPhysical:
|
|
desc_.cardMemory_ = false;
|
|
case Shader:
|
|
// Fall through to process the memory allocation ...
|
|
case Local:
|
|
createInfo->heapCount = 3;
|
|
createInfo->heaps[0] = Pal::GpuHeapInvisible;
|
|
createInfo->heaps[1] = Pal::GpuHeapLocal;
|
|
createInfo->heaps[2] = Pal::GpuHeapGartUswc;
|
|
createInfo->flags.peerWritable = dev().P2PAccessAllowed();
|
|
break;
|
|
default:
|
|
createInfo->heaps[0] = Pal::GpuHeapLocal;
|
|
break;
|
|
}
|
|
|
|
#if !IS_MAINLINE
|
|
// Pick the appropriate mall policy based on the mem type
|
|
switch (memoryType()) {
|
|
case Local:
|
|
case Scratch:
|
|
createInfo->mallPolicy = static_cast<Pal::GpuMemMallPolicy>(dev().settings().mallPolicy_);
|
|
break;
|
|
default:
|
|
createInfo->mallPolicy = Pal::GpuMemMallPolicy::Never;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
|
|
Pal::Result result;
|
|
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
|
|
Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
|
|
Pal::ChannelMapping channels;
|
|
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
|
|
|
|
if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
|
|
if (memoryType() == ImageBuffer) {
|
|
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
|
|
viewOwner_ = imageBuffer->resource_;
|
|
memRef_ = viewOwner_->memRef_;
|
|
memRef_->retain();
|
|
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
|
|
offset_ += viewOwner_->offset_;
|
|
} else {
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = desc().width_ * elementSize();
|
|
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
|
|
createInfo.alignment = MaxGpuAlignment;
|
|
createInfo.vaRange = Pal::VaRange::Default;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
memTypeToHeap(&createInfo);
|
|
// createInfo.priority;
|
|
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
|
nullptr, &subOffset_);
|
|
if (nullptr == memRef_) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
if (nullptr == memRef_) {
|
|
LogError("Failed PAL memory allocation!");
|
|
return false;
|
|
}
|
|
}
|
|
offset_ += static_cast<size_t>(subOffset_);
|
|
}
|
|
// Check if memory is locked already and restore CPU pointer
|
|
if (memRef_->cpuAddress_ != nullptr) {
|
|
address_ = memRef_->cpuAddress_;
|
|
memRef_->cpuAddress_ = nullptr;
|
|
mapCount_++;
|
|
}
|
|
Pal::BufferViewInfo viewInfo = {};
|
|
viewInfo.gpuAddr = vmAddress();
|
|
viewInfo.range = memRef_->iMem()->Desc().size;
|
|
viewInfo.stride = elementSize();
|
|
viewInfo.swizzledFormat.format = format;
|
|
viewInfo.swizzledFormat.swizzle = channels;
|
|
// viewInfo.channels = channels;
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if ((0 == hwSrd_) && (memoryType() != ImageView)) {
|
|
return false;
|
|
}
|
|
|
|
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
|
|
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
|
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
|
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
return true;
|
|
}
|
|
|
|
Pal::ImageViewInfo viewInfo = {};
|
|
Pal::ImageCreateInfo imgCreateInfo = {};
|
|
Pal::GpuMemoryRequirements req = {};
|
|
imgCreateInfo.imageType = Pal::ImageType::Tex2d;
|
|
viewInfo.viewType = Pal::ImageViewType::Tex2d;
|
|
viewInfo.possibleLayouts.engines = Pal::LayoutComputeEngine | Pal::LayoutDmaEngine;
|
|
viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
|
|
imgCreateInfo.extent.width = desc_.width_;
|
|
imgCreateInfo.extent.height = desc_.height_;
|
|
imgCreateInfo.extent.depth = desc_.depth_;
|
|
imgCreateInfo.arraySize = 1;
|
|
|
|
switch (desc_.topology_) {
|
|
case CL_MEM_OBJECT_IMAGE3D:
|
|
imgCreateInfo.imageType = Pal::ImageType::Tex3d;
|
|
viewInfo.viewType = Pal::ImageViewType::Tex3d;
|
|
break;
|
|
case CL_MEM_OBJECT_IMAGE1D:
|
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
|
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
|
imgCreateInfo.imageType = Pal::ImageType::Tex1d;
|
|
viewInfo.viewType = Pal::ImageViewType::Tex1d;
|
|
break;
|
|
}
|
|
if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
|
|
imgCreateInfo.extent.depth = desc_.height_;
|
|
imgCreateInfo.extent.height = 1;
|
|
}
|
|
if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
|
|
ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
|
|
}
|
|
|
|
if (memoryType() == ImageView) {
|
|
ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
|
|
ImgSubresRange.startSubres.mipLevel = imageView->level_;
|
|
desc_.baseLevel_ = imageView->level_;
|
|
ImgSubresRange.startSubres.arraySlice = imageView->layer_;
|
|
viewOwner_ = imageView->resource_;
|
|
image_ = viewOwner_->image_;
|
|
} else if (memoryType() == ImageBuffer) {
|
|
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
|
|
viewOwner_ = imageBuffer->resource_;
|
|
}
|
|
if (nullptr != viewOwner_) {
|
|
offset_ = viewOwner_->offset();
|
|
}
|
|
ImgSubresRange.numMips = desc().mipLevels_;
|
|
|
|
if ((memoryType() != ImageView) ||
|
|
//! @todo PAL doesn't allow an SRD view creation with different pixel size
|
|
(elementSize() != viewOwner_->elementSize())) {
|
|
imgCreateInfo.usageFlags.shaderRead = true;
|
|
imgCreateInfo.usageFlags.shaderWrite =
|
|
(format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
|
|
imgCreateInfo.swizzledFormat.format = format;
|
|
imgCreateInfo.swizzledFormat.swizzle = channels;
|
|
imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
|
|
imgCreateInfo.samples = 1;
|
|
imgCreateInfo.fragments = 1;
|
|
Pal::ImageTiling tiling = forceLinear ? Pal::ImageTiling::Linear : Pal::ImageTiling::Optimal;
|
|
uint32_t rowPitch = 0;
|
|
|
|
if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
|
|
(memoryType() == ImageBuffer)) {
|
|
tiling = Pal::ImageTiling::Linear;
|
|
} else if (memoryType() == ImageView) {
|
|
tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
|
|
// Find the new pitch in pixels for the new format
|
|
rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
|
|
}
|
|
|
|
if (memoryType() == ImageBuffer) {
|
|
if ((params->owner_ != NULL) && params->owner_->asImage() &&
|
|
(params->owner_->asImage()->getRowPitch() != 0)) {
|
|
rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
|
|
} else {
|
|
rowPitch = desc().width_;
|
|
}
|
|
}
|
|
desc_.pitch_ = rowPitch;
|
|
// Make sure the row pitch is aligned to pixels
|
|
imgCreateInfo.rowPitch =
|
|
amd::alignUp(elementSize() * rowPitch, dev().info().imagePitchAlignment_);
|
|
imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
|
|
imgCreateInfo.tiling = tiling;
|
|
|
|
size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
|
|
char* memImg = new char[imageSize];
|
|
if (memImg != nullptr) {
|
|
result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
|
|
if (result != Pal::Result::Success) {
|
|
delete[] memImg;
|
|
return false;
|
|
}
|
|
}
|
|
image_->GetGpuMemoryRequirements(&req);
|
|
// createInfo.priority;
|
|
}
|
|
|
|
if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
|
|
createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
|
|
createInfo.vaRange = Pal::VaRange::Default;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
memTypeToHeap(&createInfo);
|
|
|
|
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
|
nullptr, &subOffset_);
|
|
if (nullptr == memRef_) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
if (nullptr == memRef_) {
|
|
LogError("Failed PAL memory allocation!");
|
|
return false;
|
|
}
|
|
}
|
|
offset_ += static_cast<size_t>(subOffset_);
|
|
} else {
|
|
memRef_ = viewOwner_->memRef_;
|
|
memRef_->retain();
|
|
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
|
|
if (req.size > viewOwner_->iMem()->Desc().size) {
|
|
LogWarning("Image is bigger than the original mem object!");
|
|
}
|
|
}
|
|
// Check if memory is locked already and restore CPU pointer
|
|
if (memRef_->cpuAddress_ != nullptr) {
|
|
address_ = memRef_->cpuAddress_;
|
|
memRef_->cpuAddress_ = nullptr;
|
|
mapCount_++;
|
|
}
|
|
result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if ((0 == hwSrd_) && (memoryType() != ImageView)) {
|
|
return false;
|
|
}
|
|
viewInfo.pImage = image_;
|
|
viewInfo.swizzledFormat.format = format;
|
|
viewInfo.swizzledFormat.swizzle = channels;
|
|
viewInfo.subresRange = ImgSubresRange;
|
|
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
|
|
|
|
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
|
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
|
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::CreateInterop(CreateParams* params) {
|
|
Pal::Result result;
|
|
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
|
|
Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
|
|
Pal::ChannelMapping channels;
|
|
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
|
|
Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
|
|
Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
|
|
uint misc = 0;
|
|
uint layer = 0;
|
|
uint mipLevel = 0;
|
|
InteropType type = InteropTypeless;
|
|
|
|
if (memoryType() == OGLInterop) {
|
|
OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
|
|
assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
|
|
switch (oglRes->type_) {
|
|
case InteropVertexBuffer:
|
|
glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
|
|
break;
|
|
case InteropRenderBuffer:
|
|
glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
|
|
break;
|
|
case InteropTexture:
|
|
case InteropTextureViewLevel:
|
|
case InteropTextureViewCube:
|
|
glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
|
|
break;
|
|
default:
|
|
LogError("Unknown OGL interop type!");
|
|
return false;
|
|
break;
|
|
}
|
|
glPlatformContext_ = oglRes->glPlatformContext_;
|
|
layer = oglRes->layer_;
|
|
type = oglRes->type_;
|
|
mipLevel = oglRes->mipLevel_;
|
|
|
|
if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
|
|
&openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
|
|
#ifdef ATI_OS_WIN
|
|
,
|
|
openInfo.doppDesktopInfo
|
|
#endif
|
|
)) {
|
|
return false;
|
|
}
|
|
desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
|
|
format = dev().getPalFormat(desc().format_, &channels);
|
|
}
|
|
#ifdef ATI_OS_WIN
|
|
else {
|
|
D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
|
|
openInfo.hExternalResource = d3dRes->handle_;
|
|
misc = d3dRes->misc;
|
|
layer = d3dRes->layer_;
|
|
type = d3dRes->type_;
|
|
mipLevel = d3dRes->mipLevel_;
|
|
}
|
|
#endif
|
|
//! @todo PAL query for image/buffer object doesn't work properly!
|
|
#if 0
|
|
bool isImage = false;
|
|
if (Pal::Result::Success !=
|
|
dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
|
|
return false;
|
|
}
|
|
#endif // 0
|
|
if (desc().buffer_ || misc) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
|
|
if (nullptr == memRef_) {
|
|
return false;
|
|
}
|
|
|
|
if (misc) {
|
|
Pal::ImageCreateInfo imgCreateInfo = {};
|
|
Pal::ExternalImageOpenInfo imgOpenInfo = {};
|
|
imgOpenInfo.resourceInfo = openInfo;
|
|
imgOpenInfo.swizzledFormat.format = format;
|
|
imgOpenInfo.swizzledFormat.swizzle = channels;
|
|
imgOpenInfo.usage.shaderRead = true;
|
|
imgOpenInfo.usage.shaderWrite = true;
|
|
size_t imageSize;
|
|
size_t gpuMemSize;
|
|
|
|
if (Pal::Result::Success !=
|
|
dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
|
|
&imgCreateInfo)) {
|
|
return false;
|
|
}
|
|
|
|
Pal::gpusize viewOffset = 0;
|
|
imgCreateInfo.flags.shareable = false;
|
|
imgCreateInfo.imageType = Pal::ImageType::Tex2d;
|
|
imgCreateInfo.extent.width = desc().width_;
|
|
imgCreateInfo.extent.height = desc().height_;
|
|
imgCreateInfo.extent.depth = desc().depth_;
|
|
imgCreateInfo.arraySize = 1;
|
|
imgCreateInfo.usageFlags.shaderRead = true;
|
|
imgCreateInfo.usageFlags.shaderWrite = true;
|
|
imgCreateInfo.swizzledFormat.format = format;
|
|
imgCreateInfo.swizzledFormat.swizzle = channels;
|
|
imgCreateInfo.mipLevels = 1;
|
|
imgCreateInfo.samples = 1;
|
|
imgCreateInfo.fragments = 1;
|
|
imgCreateInfo.tiling = Pal::ImageTiling::Linear;
|
|
imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
|
|
|
|
switch (misc) {
|
|
case 1: // NV12 or P010 formats
|
|
switch (layer) {
|
|
case -1:
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
// Y - plane size to the offset
|
|
// NV12 format. UV is 2 times smaller plane Y
|
|
viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
|
|
imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
return false;
|
|
}
|
|
break;
|
|
case 2: // YV12 format
|
|
switch (layer) {
|
|
case -1:
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
// Y - plane size to the offset
|
|
// YV12 format. U is 4 times smaller plane than Y
|
|
viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
|
|
imgCreateInfo.rowPitch >>= 1;
|
|
break;
|
|
case 2:
|
|
// Y + U plane sizes to the offest.
|
|
// U plane is 4 times smaller than Y and U == V
|
|
viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
|
|
imgCreateInfo.rowPitch >>= 1;
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
return false;
|
|
}
|
|
imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
|
|
break;
|
|
case 3: // YUY2 format
|
|
imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
|
|
break;
|
|
default:
|
|
LogError("Unknown Interop View Type");
|
|
return false;
|
|
}
|
|
|
|
imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
|
|
char* memImg = new char[imageSize];
|
|
if (memImg != nullptr) {
|
|
result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
|
|
if (result != Pal::Result::Success) {
|
|
delete[] memImg;
|
|
return false;
|
|
}
|
|
}
|
|
offset_ += static_cast<size_t>(viewOffset);
|
|
result = image_->BindGpuMemory(iMem(), offset_);
|
|
if (result != Pal::Result::Success) {
|
|
return false;
|
|
}
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if ((0 == hwSrd_) && (memoryType() != ImageView)) {
|
|
return false;
|
|
}
|
|
Pal::ImageViewInfo viewInfo = {};
|
|
viewInfo.viewType = Pal::ImageViewType::Tex2d;
|
|
viewInfo.pImage = image_;
|
|
viewInfo.swizzledFormat.format = format;
|
|
viewInfo.swizzledFormat.swizzle = channels;
|
|
viewInfo.subresRange = ImgSubresRange;
|
|
viewInfo.possibleLayouts.engines = Pal::LayoutComputeEngine | Pal::LayoutDmaEngine;
|
|
viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
|
|
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
|
|
|
|
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
|
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
|
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
}
|
|
} else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
|
|
if (nullptr == memRef_) {
|
|
return false;
|
|
}
|
|
Pal::BufferViewInfo viewInfo = {};
|
|
viewInfo.gpuAddr = vmAddress();
|
|
viewInfo.range = memRef_->iMem()->Desc().size;
|
|
viewInfo.stride = elementSize();
|
|
viewInfo.swizzledFormat.format = format;
|
|
viewInfo.swizzledFormat.swizzle = channels;
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if ((0 == hwSrd_) && (memoryType() != ImageView)) {
|
|
return false;
|
|
}
|
|
|
|
dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
|
|
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
|
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
|
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
} else {
|
|
Pal::ExternalImageOpenInfo imgOpenInfo = {};
|
|
Pal::ImageCreateInfo imgCreateInfo = {};
|
|
imgOpenInfo.resourceInfo = openInfo;
|
|
imgOpenInfo.swizzledFormat.format = format;
|
|
imgOpenInfo.swizzledFormat.swizzle = channels;
|
|
imgOpenInfo.usage.shaderRead = true;
|
|
imgOpenInfo.usage.shaderWrite = true;
|
|
memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
|
|
if (nullptr == memRef_) {
|
|
return false;
|
|
}
|
|
|
|
hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
|
|
if ((0 == hwSrd_) && (memoryType() != ImageView)) {
|
|
return false;
|
|
}
|
|
Pal::ImageViewInfo viewInfo = {};
|
|
viewInfo.possibleLayouts.engines = Pal::LayoutComputeEngine | Pal::LayoutDmaEngine;
|
|
viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
|
|
viewInfo.viewType = Pal::ImageViewType::Tex2d;
|
|
switch (imgCreateInfo.imageType) {
|
|
case Pal::ImageType::Tex3d:
|
|
viewInfo.viewType = Pal::ImageViewType::Tex3d;
|
|
break;
|
|
case Pal::ImageType::Tex1d:
|
|
viewInfo.viewType = Pal::ImageViewType::Tex1d;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
viewInfo.pImage = image_;
|
|
viewInfo.swizzledFormat.format = format;
|
|
viewInfo.swizzledFormat.swizzle = channels;
|
|
if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
|
|
ImgSubresRange.startSubres.mipLevel = mipLevel;
|
|
if (type == InteropTextureViewCube) {
|
|
ImgSubresRange.startSubres.arraySlice = layer;
|
|
viewInfo.viewType = Pal::ImageViewType::Tex2d;
|
|
}
|
|
}
|
|
if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
ImgSubresRange.numSlices = desc_.height_;
|
|
}
|
|
if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
|
|
ImgSubresRange.numSlices = desc_.depth_;
|
|
}
|
|
ImgSubresRange.numMips = desc().mipLevels_;
|
|
viewInfo.subresRange = ImgSubresRange;
|
|
|
|
dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
|
|
//! It's a workaround for D24S8 format, since PAL doesn't support this format
|
|
//! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
|
|
if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
|
|
(desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
|
|
if (dev().settings().gfx10Plus_) {
|
|
hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
|
|
} else {
|
|
hwState_[1] &= ~0x3c000000;
|
|
hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
|
|
}
|
|
}
|
|
hwState_[8] = GetHSAILImageFormatType(desc().format_);
|
|
hwState_[9] = GetHSAILImageOrderType(desc().format_);
|
|
hwState_[10] = static_cast<uint32_t>(desc().width_);
|
|
hwState_[11] = 0; // one extra reserved field in the argument
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::CreateP2PAccess(CreateParams* params) {
|
|
Pal::PeerGpuMemoryOpenInfo openInfo = {};
|
|
openInfo.pOriginalMem = params->svmBase_->iMem();
|
|
|
|
memRef_ = GpuMemoryReference::Create(dev(), openInfo);
|
|
if (nullptr == memRef_) {
|
|
return false;
|
|
}
|
|
desc_.cardMemory_ = false;
|
|
offset_ = params->svmBase_->offset();
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::CreatePinned(CreateParams* params) {
|
|
PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
|
|
size_t allocSize = pinned->size_;
|
|
const amd::HostMemoryReference* hostMemRef = pinned->hostMemRef_;
|
|
void* pinAddress = address_ = hostMemRef->hostMem();
|
|
uint hostMemOffset = 0;
|
|
// assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
|
|
if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
|
|
// Allign offset to 4K boundary (Vista/Win7 limitation)
|
|
char* tmpHost = const_cast<char*>(
|
|
amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));
|
|
|
|
// Find the partial size for unaligned copy
|
|
hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
|
|
|
|
offset_ = hostMemOffset;
|
|
|
|
pinAddress = tmpHost;
|
|
|
|
if (hostMemOffset != 0) {
|
|
allocSize += hostMemOffset;
|
|
}
|
|
allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
|
|
// hostMemOffset &= ~(0xff);
|
|
} else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
|
|
//! @todo: Width has to be aligned for 3D.
|
|
//! Need to be replaced with a compute copy
|
|
// Width aligned by 8 texels
|
|
if (((desc().width_ % 0x8) != 0) ||
|
|
// Pitch aligned by 64 bytes
|
|
(((desc().width_ * elementSize()) % 0x40) != 0)) {
|
|
return false;
|
|
}
|
|
} else {
|
|
//! @todo GSL doesn't support pinning with resAlloc_
|
|
return false;
|
|
}
|
|
|
|
if (dev().settings().svmFineGrainSystem_) {
|
|
desc_.SVMRes_ = true;
|
|
}
|
|
|
|
// Ensure page alignment
|
|
if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
|
|
return false;
|
|
}
|
|
Pal::PinnedGpuMemoryCreateInfo createInfo = {};
|
|
createInfo.pSysMem = pinAddress;
|
|
createInfo.size = allocSize;
|
|
createInfo.vaRange = Pal::VaRange::Default;
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
if (nullptr == memRef_) {
|
|
LogError("Failed PAL memory allocation!");
|
|
return false;
|
|
}
|
|
desc_.cardMemory_ = false;
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) {
|
|
const bool isFineGrain = (memoryType() == RemoteUSWC) || (memoryType() == Remote);
|
|
size_t allocSize = amd::alignUp(desc().width_ * elementSize_,
|
|
dev().properties().gpuMemoryProperties.fragmentSize);
|
|
if (isFineGrain) {
|
|
Pal::SvmGpuMemoryCreateInfo createInfo = {};
|
|
createInfo.isUsedForKernel = desc_.isAllocExecute_;
|
|
createInfo.size = allocSize;
|
|
createInfo.alignment = MaxGpuAlignment;
|
|
if (svmPtr != 0) {
|
|
createInfo.flags.useReservedGpuVa = true;
|
|
createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
|
|
} else {
|
|
createInfo.flags.useReservedGpuVa = false;
|
|
createInfo.pReservedGpuVaOwner = nullptr;
|
|
}
|
|
if (!dev().settings().svmFineGrainSystem_) {
|
|
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
|
createInfo.pReservedGpuVaOwner, &subOffset_);
|
|
}
|
|
if (memRef_ == nullptr) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
}
|
|
} else {
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = allocSize;
|
|
createInfo.alignment = MaxGpuAlignment;
|
|
createInfo.vaRange = Pal::VaRange::Svm;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
if (svmPtr != 0) {
|
|
createInfo.flags.useReservedGpuVa = true;
|
|
createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
|
|
}
|
|
memTypeToHeap(&createInfo);
|
|
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
|
createInfo.pReservedGpuVaOwner, &subOffset_);
|
|
if (memRef_ == nullptr) {
|
|
createInfo.alignment = dev().properties().gpuMemoryProperties.fragmentSize;
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
}
|
|
}
|
|
if (nullptr == memRef_) {
|
|
LogError("Failed PAL memory allocation!");
|
|
return false;
|
|
}
|
|
desc_.cardMemory_ = false;
|
|
if ((nullptr != params) && (nullptr != params->owner_) &&
|
|
(nullptr != params->owner_->getSvmPtr())) {
|
|
params->owner_->setSvmPtr(
|
|
reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_));
|
|
offset_ += static_cast<size_t>(subOffset_);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::create(MemoryType memType, CreateParams* params, bool forceLinear) {
|
|
bool imageCreateView = false;
|
|
bool foundCalRef = false;
|
|
bool viewDefined = false;
|
|
uint viewLayer = 0;
|
|
uint viewLevel = 0;
|
|
uint viewFlags = 0;
|
|
Pal::ChannelMapping channels;
|
|
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
|
|
// Set the initial offset value for any resource to 0.
|
|
// Note: Runtime can call create() more than once, if the initial memory type failed
|
|
offset_ = 0;
|
|
|
|
// This is a thread safe operation
|
|
const_cast<Device&>(dev()).initializeHeapResources();
|
|
|
|
if (memType == Shader) {
|
|
if (dev().settings().svmFineGrainSystem_) {
|
|
desc_.isAllocExecute_ = true;
|
|
desc_.SVMRes_ = true;
|
|
memType = RemoteUSWC;
|
|
} else {
|
|
memType = Local;
|
|
}
|
|
// force to use remote memory for HW DEBUG or use
|
|
// local memory once we determine if FGS is supported
|
|
// memType = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
|
|
}
|
|
|
|
// Get the element size
|
|
elementSize_ = Pal::Formats::BytesPerPixel(format);
|
|
desc_.type_ = memType;
|
|
if (memType == Scratch) {
|
|
// use local memory for scratch buffer unless it is using HW DEBUG
|
|
desc_.type_ = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
|
|
desc_.scratch_ = true;
|
|
}
|
|
|
|
// Force remote allocation if it was requested in the settings
|
|
if (dev().settings().remoteAlloc_ && ((memoryType() == Local) || (memoryType() == Persistent))) {
|
|
if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
|
|
desc_.type_ = Remote;
|
|
} else {
|
|
desc_.type_ = RemoteUSWC;
|
|
}
|
|
}
|
|
|
|
if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
|
|
desc_.type_ = RemoteUSWC;
|
|
}
|
|
switch (memoryType()) {
|
|
case OGLInterop:
|
|
case D3D9Interop:
|
|
case D3D10Interop:
|
|
case D3D11Interop:
|
|
return CreateInterop(params);
|
|
case P2PAccess:
|
|
return CreateP2PAccess(params);
|
|
case Pinned:
|
|
return CreatePinned(params);
|
|
case View: {
|
|
// Save the offset in the global heap
|
|
ViewParams* view = reinterpret_cast<ViewParams*>(params);
|
|
offset_ = view->offset_;
|
|
|
|
// Make sure parent was provided
|
|
if (nullptr != view->resource_) {
|
|
viewOwner_ = view->resource_;
|
|
offset_ += viewOwner_->offset();
|
|
if (viewOwner_->data() != nullptr) {
|
|
address_ = viewOwner_->data() + view->offset_;
|
|
mapCount_++;
|
|
}
|
|
memRef_ = viewOwner_->memRef_;
|
|
memRef_->retain();
|
|
desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
|
|
} else {
|
|
desc_.type_ = Empty;
|
|
}
|
|
return true;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (!desc_.buffer_) {
|
|
return CreateImage(params, forceLinear);
|
|
}
|
|
|
|
Pal::gpusize svmPtr = 0;
|
|
if ((nullptr != params) && (nullptr != params->owner_) &&
|
|
(nullptr != params->owner_->getSvmPtr())) {
|
|
svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
|
|
desc_.SVMRes_ = true;
|
|
svmPtr = (svmPtr == 1) ? 0 : svmPtr;
|
|
if (params->owner_->getMemFlags() & CL_MEM_SVM_ATOMICS) {
|
|
desc_.gl2CacheDisabled_ = true;
|
|
}
|
|
}
|
|
if (desc_.SVMRes_) {
|
|
return CreateSvm(params, svmPtr);
|
|
}
|
|
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = desc().width_ * elementSize_;
|
|
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
|
|
createInfo.alignment = desc().scratch_ ? 64 * Ki : MaxGpuAlignment;
|
|
createInfo.vaRange = Pal::VaRange::Default;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
|
|
if (memoryType() == ExternalPhysical) {
|
|
cl_bus_address_amd bus_address = (reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
|
|
createInfo.surfaceBusAddr = bus_address.surface_bus_address;
|
|
createInfo.markerBusAddr = bus_address.marker_bus_address;
|
|
createInfo.flags.sdiExternal = true;
|
|
} else if (memoryType() == BusAddressable) {
|
|
createInfo.flags.busAddressable = true;
|
|
}
|
|
|
|
memTypeToHeap(&createInfo);
|
|
// createInfo.priority;
|
|
memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
|
|
nullptr, &subOffset_);
|
|
if (nullptr == memRef_) {
|
|
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
|
|
if (nullptr == memRef_) {
|
|
LogError("Failed PAL memory allocation!");
|
|
return false;
|
|
}
|
|
}
|
|
offset_ += static_cast<size_t>(subOffset_);
|
|
// Check if memory is locked already and restore CPU pointer
|
|
if (memRef_->cpuAddress_ != nullptr) {
|
|
address_ = memRef_->cpuAddress_;
|
|
memRef_->cpuAddress_ = nullptr;
|
|
mapCount_++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::free() {
|
|
if (memRef_ == nullptr) {
|
|
return;
|
|
}
|
|
|
|
const bool wait =
|
|
(memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
|
|
|
|
// OCL has to wait, even if resource is placed in the cache, since reallocation can occur
|
|
// and resource can be reused on another async queue without a wait on a busy operation
|
|
if (wait) {
|
|
if (memRef_->gpu_ == nullptr) {
|
|
Device::ScopedLockVgpus lock(dev());
|
|
// Release all memory objects on all virtual GPUs
|
|
for (uint idx = 1; idx < dev().vgpus().size(); ++idx) {
|
|
dev().vgpus()[idx]->waitForEvent(&events_[idx]);
|
|
}
|
|
} else {
|
|
amd::ScopedLock l(memRef_->gpu_->execution());
|
|
memRef_->gpu_->waitForEvent(&events_[memRef_->gpu_->index()]);
|
|
}
|
|
} else {
|
|
// After a view destruction the original object is no longer can be associated with a vgpu
|
|
memRef_->gpu_ = nullptr;
|
|
}
|
|
|
|
// Destroy PAL resource
|
|
if (iMem() != 0) {
|
|
if (mapCount_ != 0 && wait) {
|
|
if ((memoryType() != Remote) && (memoryType() != RemoteUSWC)) {
|
|
//! @note: This is a workaround for bad applications that don't unmap memory
|
|
unmap(nullptr);
|
|
} else {
|
|
// Delay CPU address unmap until memRef_ destruction
|
|
if (!desc_.SVMRes_) {
|
|
assert(memRef_->cpuAddress_ == nullptr && "Memref shouldn't have a valid CPU address");
|
|
memRef_->cpuAddress_ = address_;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add resource to the cache
|
|
if (!dev().resourceCache().addGpuMemory(&desc_, memRef_, subOffset_)) {
|
|
// Free PAL resource
|
|
palFree();
|
|
}
|
|
}
|
|
|
|
// Free SRD for images
|
|
if (!desc().buffer_) {
|
|
dev().srds().freeSrdSlot(hwSrd_);
|
|
}
|
|
|
|
memRef_ = nullptr;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const void* data,
|
|
bool waitForEvent) const {
|
|
GpuEvent event;
|
|
|
|
// Write data size bytes to surface
|
|
// size needs to be DWORD aligned
|
|
assert((size & 3) == 0);
|
|
gpu.eventBegin(MainEngine);
|
|
gpu.queue(MainEngine).addCmdMemRef(memRef());
|
|
gpu.iCmd()->CmdUpdateMemory(*iMem(), offset_ + offset, size,
|
|
reinterpret_cast<const uint32_t*>(data));
|
|
gpu.eventEnd(MainEngine, event);
|
|
|
|
if (waitForEvent) {
|
|
//! @note: We don't really have to mark the allocations as busy
|
|
//! if we are waiting for a transfer
|
|
|
|
// Wait for event to complete
|
|
gpu.waitForEvent(&event);
|
|
} else {
|
|
setBusy(gpu, event);
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, false);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
|
|
if (bytesPerElement == 16) {
|
|
return Pal::ChNumFormat::X32Y32Z32W32_Uint;
|
|
} else if (bytesPerElement == 8) {
|
|
return Pal::ChNumFormat::X32Y32_Uint;
|
|
} else if (bytesPerElement == 4) {
|
|
return Pal::ChNumFormat::X32_Uint;
|
|
} else if (bytesPerElement == 2) {
|
|
return Pal::ChNumFormat::X16_Uint;
|
|
} else {
|
|
return Pal::ChNumFormat::X8_Uint;
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
|
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
|
|
Resource& dstResource, bool enableCopyRect, bool flushDMA,
|
|
uint bytesPerElement) const {
|
|
GpuEvent event;
|
|
EngineType activeEngineID = gpu.engineID_;
|
|
static const bool waitOnBusyEngine = true;
|
|
assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) && "Unsupported configuraiton!");
|
|
uint64_t gpuMemoryOffset = 0;
|
|
uint64_t gpuMemoryRowPitch = 0;
|
|
uint64_t imageOffsetx = 0;
|
|
bool img1Darray = false;
|
|
bool img2Darray = false;
|
|
|
|
if (desc().buffer_ && !dstResource.desc().buffer_) {
|
|
imageOffsetx = dstOrigin[0] % dstResource.elementSize();
|
|
gpuMemoryOffset = srcOrigin[0] + offset();
|
|
gpuMemoryRowPitch = (srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
|
|
img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
|
|
img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
|
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
|
|
imageOffsetx = srcOrigin[0] % elementSize();
|
|
gpuMemoryOffset = dstOrigin[0] + dstResource.offset();
|
|
gpuMemoryRowPitch = (dstOrigin[1]) ? dstOrigin[1] : size[0] * elementSize();
|
|
img1Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
|
|
img2Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
|
}
|
|
|
|
if ((desc().buffer_ && !dstResource.desc().buffer_) ||
|
|
(!desc().buffer_ && dstResource.desc().buffer_)) {
|
|
// sDMA cannot be used for the below conditions
|
|
// Make sure linear pitch in bytes is 4 bytes aligned
|
|
if (((gpuMemoryRowPitch % 4) != 0) ||
|
|
// another DRM restriciton... SI has 4 pixels
|
|
(gpuMemoryOffset % 4 != 0) || (dev().settings().sdamPageFaultWar_ && (imageOffsetx != 0))) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (dev().settings().disableSdma_) {
|
|
// Make sure compute is done before CP DMA start
|
|
gpu.addBarrier(RgpSqqtBarrierReason::MemDependency);
|
|
} else {
|
|
gpu.engineID_ = SdmaEngine;
|
|
}
|
|
|
|
// Wait for the resources, since runtime may use async transfers
|
|
wait(gpu, waitOnBusyEngine);
|
|
dstResource.wait(gpu, waitOnBusyEngine);
|
|
|
|
if (gpu.validateSdmaOverlap(*this, dstResource)) {
|
|
// Note: PAL should insert a NOP into the command buffer for synchronization
|
|
gpu.addBarrier();
|
|
}
|
|
|
|
Pal::ImageLayout imgLayout = {};
|
|
gpu.eventBegin(gpu.engineID_);
|
|
gpu.queue(gpu.engineID_).addCmdMemRef(memRef());
|
|
gpu.queue(gpu.engineID_).addCmdMemRef(dstResource.memRef());
|
|
if (desc().buffer_ && !dstResource.desc().buffer_) {
|
|
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, dstResource.desc().baseLevel_, 0};
|
|
Pal::MemoryImageCopyRegion copyRegion = {};
|
|
copyRegion.imageSubres = ImgSubresId;
|
|
copyRegion.imageOffset.x = dstOrigin[0];
|
|
copyRegion.imageOffset.y = dstOrigin[1];
|
|
copyRegion.imageOffset.z = dstOrigin[2];
|
|
copyRegion.imageExtent.width = size[0];
|
|
copyRegion.imageExtent.height = size[1];
|
|
copyRegion.imageExtent.depth = size[2];
|
|
copyRegion.numSlices = 1;
|
|
if (img1Darray) {
|
|
copyRegion.numSlices = copyRegion.imageExtent.height;
|
|
copyRegion.imageExtent.height = 1;
|
|
} else if (img2Darray) {
|
|
copyRegion.numSlices = copyRegion.imageExtent.depth;
|
|
copyRegion.imageExtent.depth = 1;
|
|
}
|
|
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
|
|
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
|
|
copyRegion.gpuMemoryDepthPitch = (srcOrigin[2])
|
|
? srcOrigin[2]
|
|
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
|
|
gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_, imgLayout, 1, ©Region);
|
|
} else if (!desc().buffer_ && dstResource.desc().buffer_) {
|
|
Pal::MemoryImageCopyRegion copyRegion = {};
|
|
Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, desc().baseLevel_, 0};
|
|
copyRegion.imageSubres = ImgSubresId;
|
|
copyRegion.imageOffset.x = srcOrigin[0];
|
|
copyRegion.imageOffset.y = srcOrigin[1];
|
|
copyRegion.imageOffset.z = srcOrigin[2];
|
|
copyRegion.imageExtent.width = size[0];
|
|
copyRegion.imageExtent.height = size[1];
|
|
copyRegion.imageExtent.depth = size[2];
|
|
copyRegion.numSlices = 1;
|
|
if (img1Darray) {
|
|
copyRegion.numSlices = copyRegion.imageExtent.height;
|
|
copyRegion.imageExtent.height = 1;
|
|
} else if (img2Darray) {
|
|
copyRegion.numSlices = copyRegion.imageExtent.depth;
|
|
copyRegion.imageExtent.depth = 1;
|
|
}
|
|
copyRegion.gpuMemoryOffset = gpuMemoryOffset;
|
|
copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
|
|
copyRegion.gpuMemoryDepthPitch = (dstOrigin[2])
|
|
? dstOrigin[2]
|
|
: copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
|
|
gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, ©Region);
|
|
} else {
|
|
if (enableCopyRect) {
|
|
Pal::TypedBufferCopyRegion copyRegion = {};
|
|
Pal::ChannelMapping channels = {Pal::ChannelSwizzle::X, Pal::ChannelSwizzle::Y,
|
|
Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::W};
|
|
copyRegion.srcBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
|
|
copyRegion.srcBuffer.swizzledFormat.swizzle = channels;
|
|
copyRegion.srcBuffer.offset = srcOrigin[0] + offset();
|
|
copyRegion.srcBuffer.rowPitch = srcOrigin[1];
|
|
copyRegion.srcBuffer.depthPitch = srcOrigin[2];
|
|
copyRegion.extent.width = size[0] / bytesPerElement;
|
|
copyRegion.extent.height = size[1];
|
|
copyRegion.extent.depth = size[2];
|
|
copyRegion.dstBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
|
|
copyRegion.dstBuffer.swizzledFormat.swizzle = channels;
|
|
copyRegion.dstBuffer.offset = dstOrigin[0] + dstResource.offset();
|
|
copyRegion.dstBuffer.rowPitch = dstOrigin[1];
|
|
copyRegion.dstBuffer.depthPitch = dstOrigin[2];
|
|
gpu.iCmd()->CmdCopyTypedBuffer(*iMem(), *dstResource.iMem(), 1, ©Region);
|
|
} else {
|
|
Pal::MemoryCopyRegion copyRegion = {};
|
|
copyRegion.srcOffset = srcOrigin[0] + offset();
|
|
copyRegion.dstOffset = dstOrigin[0] + dstResource.offset();
|
|
copyRegion.copySize = size[0];
|
|
constexpr size_t CpCopySizeLimit = (1 << 26) - sizeof(uint64_t);
|
|
if (dev().settings().disableSdma_ && (size[0] > CpCopySizeLimit)) {
|
|
size_t orgSize = size[0];
|
|
copyRegion.copySize = CpCopySizeLimit;
|
|
do {
|
|
gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, ©Region);
|
|
copyRegion.srcOffset += CpCopySizeLimit;
|
|
copyRegion.dstOffset += CpCopySizeLimit;
|
|
orgSize -= (orgSize > CpCopySizeLimit) ? CpCopySizeLimit : orgSize;
|
|
if (orgSize < CpCopySizeLimit) {
|
|
copyRegion.copySize = orgSize;
|
|
}
|
|
} while (orgSize > 0);
|
|
} else {
|
|
gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, ©Region);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (dev().settings().disableSdma_) {
|
|
// Make sure CP dma is done
|
|
gpu.addBarrier(RgpSqqtBarrierReason::MemDependency);
|
|
}
|
|
|
|
gpu.eventEnd(gpu.engineID_, event);
|
|
|
|
// Mark source and destination as busy
|
|
setBusy(gpu, event);
|
|
dstResource.setBusy(gpu, event);
|
|
|
|
// Update the global GPU event
|
|
gpu.setGpuEvent(event, flushDMA);
|
|
|
|
// Restore the original engine
|
|
gpu.engineID_ = activeEngineID;
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
|
|
addGpuEvent(gpu, gpuEvent);
|
|
|
|
// If current resource is a view, then update the parent event as well
|
|
if (viewOwner_ != nullptr) {
|
|
viewOwner_->setBusy(gpu, gpuEvent);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
|
|
GpuEvent* gpuEvent = getGpuEvent(gpu);
|
|
|
|
// Check if we have to wait unconditionally
|
|
if (!waitOnBusyEngine ||
|
|
// or we have to wait only if another engine was used on this resource
|
|
(gpuEvent->engineId_ != gpu.engineID_)) {
|
|
gpu.waitForEvent(gpuEvent);
|
|
}
|
|
|
|
// If current resource is a view and not in the global heap,
|
|
// then wait for the parent event as well
|
|
if (viewOwner_ != nullptr) {
|
|
viewOwner_->wait(gpu, waitOnBusyEngine);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3D& origin,
|
|
const amd::Coord3D& size, uint flags, size_t rowPitch, size_t slicePitch) {
|
|
void* dst;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
dst = map(gpu, flags, startLayer, numLayers);
|
|
if (nullptr == dst) {
|
|
LogError("Couldn't map GPU memory for host write");
|
|
return false;
|
|
}
|
|
|
|
if (1 == desc().dimSize_) {
|
|
size_t copySize = (desc().buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
dst = static_cast<void*>(static_cast<char*>(dst) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(dst, hostPtr, copySize);
|
|
} else {
|
|
size_t dstOffsBase = origin[0] * elementSize_;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust the destination offset with Y dimension
|
|
dstOffsBase += desc().pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
dstOffsBase += desc().slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory slice by slice
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
size_t dstOffs = dstOffsBase + slice * desc().slice_ * elementSize_;
|
|
size_t srcOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
|
|
(reinterpret_cast<const_address>(hostPtr) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
dstOffs += desc().pitch_ * elementSize_;
|
|
srcOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& origin,
|
|
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch) {
|
|
void* src;
|
|
|
|
size_t startLayer = origin[2];
|
|
size_t numLayers = size[2];
|
|
if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
|
startLayer = origin[1];
|
|
numLayers = size[1];
|
|
}
|
|
|
|
// Get physical GPU memmory
|
|
src = map(gpu, ReadOnly, startLayer, numLayers);
|
|
if (nullptr == src) {
|
|
LogError("Couldn't map GPU memory for host read");
|
|
return false;
|
|
}
|
|
|
|
if (1 == desc().dimSize_) {
|
|
size_t copySize = (desc().buffer_) ? size[0] : size[0] * elementSize_;
|
|
|
|
// Update the pointer
|
|
src = static_cast<void*>(static_cast<char*>(src) + origin[0]);
|
|
|
|
// Copy memory
|
|
amd::Os::fastMemcpy(hostPtr, src, copySize);
|
|
} else {
|
|
size_t srcOffsBase = origin[0] * elementSize_;
|
|
|
|
// Make sure we use the right pitch if it's not specified
|
|
if (rowPitch == 0) {
|
|
rowPitch = size[0] * elementSize_;
|
|
}
|
|
|
|
// Make sure we use the right slice if it's not specified
|
|
if (slicePitch == 0) {
|
|
slicePitch = size[0] * size[1] * elementSize_;
|
|
}
|
|
|
|
// Adjust destination offset with Y dimension
|
|
srcOffsBase += desc().pitch_ * origin[1] * elementSize_;
|
|
|
|
// Adjust the destination offset with Z dimension
|
|
srcOffsBase += desc().slice_ * origin[2] * elementSize_;
|
|
|
|
// Copy memory line by line
|
|
for (size_t slice = 0; slice < size[2]; ++slice) {
|
|
size_t srcOffs = srcOffsBase + slice * desc().slice_ * elementSize_;
|
|
size_t dstOffs = slice * slicePitch;
|
|
|
|
// Copy memory line by line
|
|
for (size_t row = 0; row < size[1]; ++row) {
|
|
// Copy memory
|
|
amd::Os::fastMemcpy((reinterpret_cast<address>(hostPtr) + dstOffs),
|
|
(reinterpret_cast<const_address>(src) + srcOffs),
|
|
size[0] * elementSize_);
|
|
|
|
srcOffs += desc().pitch_ * elementSize_;
|
|
dstOffs += rowPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Unmap GPU memory
|
|
unmap(gpu);
|
|
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resource) const {
|
|
if (desc_.cardMemory_ && !isPersistentDirectMap()) {
|
|
// @todo remove const cast
|
|
Unimplemented();
|
|
return nullptr;
|
|
// return const_cast<Device&>(dev()).resMapLocal(*pitch, resource, flags);
|
|
} else {
|
|
amd::ScopedLock lk(dev().lockPAL());
|
|
void* address;
|
|
if (image_ != nullptr) {
|
|
constexpr Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
|
|
Pal::SubresLayout layout;
|
|
image_->GetSubresourceLayout(ImgSubresId, &layout);
|
|
*pitch = layout.rowPitch / elementSize();
|
|
}
|
|
*pitch = desc().width_;
|
|
if (Pal::Result::Success == resource->Map(&address)) {
|
|
return address;
|
|
} else {
|
|
LogError("PAL GpuMemory->Map() failed!");
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
|
|
if (desc_.cardMemory_ && !isPersistentDirectMap()) {
|
|
// @todo remove const cast
|
|
Unimplemented();
|
|
// const_cast<Device&>(dev()).resUnmapLocal(resource);
|
|
} else {
|
|
Pal::Result result = resource->Unmap();
|
|
if (Pal::Result::Success != result) {
|
|
LogError("PAL GpuMemory->Unmap() failed!");
|
|
}
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::glAcquire() {
|
|
bool retVal = true;
|
|
if (desc().type_ == OGLInterop) {
|
|
retVal = dev().resGLAcquire(glPlatformContext_, glInteropMbRes_, glType_);
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::glRelease() {
|
|
bool retVal = true;
|
|
if (desc().type_ == OGLInterop) {
|
|
retVal = dev().resGLRelease(glPlatformContext_, glInteropMbRes_, glType_);
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::addGpuEvent(const VirtualGPU& gpu, GpuEvent event) const {
|
|
uint idx = gpu.index();
|
|
assert(idx < events_.size());
|
|
events_[idx] = event;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
|
|
uint idx = gpu.index();
|
|
assert((idx < events_.size()) && "Undeclared queue access!");
|
|
return &events_[idx];
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::setModified(VirtualGPU& gpu, bool modified) const {
|
|
uint idx = gpu.index();
|
|
assert(idx < events_.size());
|
|
events_[idx].modified_ = modified;
|
|
|
|
// If current resource is a view, then update the parent as well
|
|
if (viewOwner_ != nullptr) {
|
|
viewOwner_->setModified(gpu, modified);
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::isModified(VirtualGPU& gpu) const {
|
|
uint idx = gpu.index();
|
|
assert(idx < events_.size());
|
|
bool modified = events_[idx].modified_;
|
|
|
|
// If current resource is a view, then get the parent state as well
|
|
if (viewOwner_ != nullptr) {
|
|
modified |= viewOwner_->isModified(gpu);
|
|
}
|
|
return modified;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::palFree() const {
|
|
if (desc().type_ == OGLInterop) {
|
|
amd::ScopedLock lk(dev().lockPAL());
|
|
dev().resGLFree(glPlatformContext_, glInteropMbRes_, glType_);
|
|
}
|
|
memRef_->release();
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::isMemoryType(MemoryType memType) const {
|
|
if (memoryType() == memType) {
|
|
return true;
|
|
} else if (memoryType() == View) {
|
|
return viewOwner_->isMemoryType(memType);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool Resource::isPersistentDirectMap() const {
|
|
bool directMap =
|
|
((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_);
|
|
|
|
// If direct map is possible, then validate it with the current tiling
|
|
if (directMap && desc().tiled_) {
|
|
//!@note IOL for Linux doesn't support tiling aperture
|
|
// and runtime doesn't force linear images in persistent
|
|
directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_;
|
|
}
|
|
|
|
return directMap;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) {
|
|
if (isMemoryType(Pinned)) {
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != nullptr) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
return address_;
|
|
}
|
|
|
|
if (flags & ReadOnly) {
|
|
}
|
|
|
|
if (flags & WriteOnly) {
|
|
}
|
|
|
|
// Check if we have to wait
|
|
if (!(flags & NoWait)) {
|
|
if (gpu != nullptr) {
|
|
wait(*gpu);
|
|
}
|
|
}
|
|
|
|
// Check if memory wasn't mapped yet
|
|
if (++mapCount_ == 1) {
|
|
if ((desc().dimSize_ == 3) || desc().imageArray_ ||
|
|
((desc().type_ == ImageView) && viewOwner_->mipMapped())) {
|
|
// Save map info for multilayer map/unmap
|
|
startLayer_ = startLayer;
|
|
numLayers_ = numLayers;
|
|
mapFlags_ = flags;
|
|
// Map with layers
|
|
address_ = mapLayers(gpu, flags);
|
|
} else {
|
|
// Map current resource
|
|
if (memRef_->cpuAddress_ != nullptr) {
|
|
// Suballocations are mapped by the memory suballocator
|
|
address_ = reinterpret_cast<uint8_t*>(memRef_->cpuAddress_) + subOffset_;
|
|
} else {
|
|
address_ = gpuMemoryMap(&desc_.pitch_, flags, iMem());
|
|
address_ = reinterpret_cast<address>(address_) + offset_;
|
|
}
|
|
if (address_ == nullptr) {
|
|
LogError("cal::ResMap failed!");
|
|
--mapCount_;
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
//! \note the atomic operation with counter doesn't
|
|
// guarantee that the address will be valid,
|
|
// since PAL could still process the first map
|
|
if (address_ == nullptr) {
|
|
for (uint i = 0; address_ == NULL && i < 10; ++i) {
|
|
amd::Os::sleep(1);
|
|
}
|
|
assert((address_ != nullptr) && "Multiple maps failed!");
|
|
}
|
|
|
|
return address_;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void* Resource::mapLayers(VirtualGPU* gpu, uint flags) {
|
|
Unimplemented();
|
|
return nullptr;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::unmap(VirtualGPU* gpu) {
|
|
if (isMemoryType(Pinned)) {
|
|
return;
|
|
}
|
|
|
|
// Decrement map counter
|
|
int count = --mapCount_;
|
|
|
|
// Check if it's the last unmap
|
|
if (count == 0) {
|
|
if ((desc().dimSize_ == 3) || desc().imageArray_ ||
|
|
((desc().type_ == ImageView) && viewOwner_->mipMapped())) {
|
|
// Unmap layers
|
|
unmapLayers(gpu);
|
|
} else {
|
|
// Unmap current resource
|
|
gpuMemoryUnmap(iMem());
|
|
}
|
|
address_ = nullptr;
|
|
} else if (count < 0) {
|
|
LogError("dev().serialCalResUnmap failed!");
|
|
++mapCount_;
|
|
return;
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
void Resource::unmapLayers(VirtualGPU* gpu) { Unimplemented(); }
|
|
|
|
// ================================================================================================
|
|
bool MemorySubAllocator::InitAllocator(GpuMemoryReference* mem_ref) {
|
|
MemBuddyAllocator* allocator =
|
|
new MemBuddyAllocator(device_, device_->settings().subAllocationChunkSize_,
|
|
device_->settings().subAllocationMinSize_);
|
|
if (!((allocator != nullptr) && (allocator->Init() == Pal::Result::Success) &&
|
|
heaps_.insert({mem_ref, allocator}).second)) {
|
|
mem_ref->release();
|
|
delete allocator;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool MemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = device_->settings().subAllocationChunkSize_;
|
|
createInfo.alignment = device_->properties().gpuMemoryProperties.fragmentSize;
|
|
createInfo.vaRange = Pal::VaRange::Default;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
createInfo.heapCount = 1;
|
|
createInfo.heaps[0] = Pal::GpuHeapInvisible;
|
|
createInfo.flags.peerWritable = device_->P2PAccessAllowed();.mallPolicy_);)
|
|
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
|
|
if (mem_ref != nullptr) {
|
|
return InitAllocator(mem_ref);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool CoarseMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
|
|
Pal::GpuMemoryCreateInfo createInfo = {};
|
|
createInfo.size = device_->settings().subAllocationChunkSize_;
|
|
createInfo.alignment = device_->properties().gpuMemoryProperties.fragmentSize;
|
|
createInfo.vaRange = Pal::VaRange::Svm;
|
|
createInfo.priority = Pal::GpuMemPriority::Normal;
|
|
createInfo.flags.useReservedGpuVa = (reserved_va != nullptr);
|
|
createInfo.pReservedGpuVaOwner = reserved_va;
|
|
createInfo.heapCount = 2;
|
|
createInfo.heaps[0] = Pal::GpuHeapInvisible;
|
|
createInfo.heaps[1] = Pal::GpuHeapLocal;.mallPolicy_);)
|
|
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
|
|
if (mem_ref != nullptr) {
|
|
return InitAllocator(mem_ref);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool FineMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
|
|
Pal::SvmGpuMemoryCreateInfo createInfo = {};
|
|
createInfo.isUsedForKernel = false;
|
|
createInfo.size = device_->settings().subAllocationChunkSize_;
|
|
createInfo.alignment = MaxGpuAlignment;
|
|
createInfo.flags.useReservedGpuVa = (reserved_va != nullptr);
|
|
createInfo.pReservedGpuVaOwner = reserved_va;
|
|
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
|
|
if ((mem_ref != nullptr) && InitAllocator(mem_ref)) {
|
|
mem_ref->iMem()->Map(&mem_ref->cpuAddress_);
|
|
return mem_ref->cpuAddress_ != nullptr;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool FineUncachedMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
|
|
Pal::SvmGpuMemoryCreateInfo createInfo = {};
|
|
createInfo.isUsedForKernel = false;
|
|
createInfo.size = device_->settings().subAllocationChunkSize_;
|
|
createInfo.alignment = MaxGpuAlignment;
|
|
createInfo.flags.useReservedGpuVa = (reserved_va != nullptr);
|
|
createInfo.pReservedGpuVaOwner = reserved_va;
|
|
createInfo.flags.gl2Uncached = true;
|
|
GpuMemoryReference* mem_ref = GpuMemoryReference::Create(*device_, createInfo);
|
|
if ((mem_ref != nullptr) && InitAllocator(mem_ref)) {
|
|
mem_ref->iMem()->Map(&mem_ref->cpuAddress_);
|
|
return mem_ref->cpuAddress_ != nullptr;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// ================================================================================================
|
|
MemorySubAllocator::~MemorySubAllocator() {
|
|
// Release memory heap for suballocations
|
|
for (const auto& it : heaps_) {
|
|
it.first->release();
|
|
delete it.second;
|
|
}
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* MemorySubAllocator::Allocate(Pal::gpusize size, Pal::gpusize alignment,
|
|
const Pal::IGpuMemory* reserved_va,
|
|
Pal::gpusize* offset) {
|
|
GpuMemoryReference* mem_ref = nullptr;
|
|
MemBuddyAllocator* allocator = nullptr;
|
|
// Check if the resource size and alignment are allowed for suballocation
|
|
if ((size < device_->settings().subAllocationMaxSize_) &&
|
|
(alignment <= device_->properties().gpuMemoryProperties.fragmentSize)) {
|
|
uint i = 0;
|
|
size = amd::alignUp(size, device_->settings().subAllocationMinSize_);
|
|
do {
|
|
// Find if current heap has enough empty space
|
|
for (const auto& it : heaps_) {
|
|
mem_ref = it.first;
|
|
allocator = it.second;
|
|
// SVM allocations may required a fixed VA, make sure we find the heap with the same VA
|
|
if (reserved_va &&
|
|
(reserved_va->Desc().gpuVirtAddr != mem_ref->iMem()->Desc().gpuVirtAddr)) {
|
|
continue;
|
|
}
|
|
// If we have found a valid chunk, then suballocate memory
|
|
if (Pal::Result::Success == allocator->Allocate(size, alignment, offset)) {
|
|
return mem_ref;
|
|
}
|
|
}
|
|
// We didn't find a valid chunk, so create a new one
|
|
if (!CreateChunk(reserved_va)) {
|
|
return nullptr;
|
|
}
|
|
i++;
|
|
} while (i < 2);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool MemorySubAllocator::Free(amd::Monitor* monitor, GpuMemoryReference* ref, Pal::gpusize offset) {
|
|
bool release_mem = false;
|
|
{
|
|
amd::ScopedLock l(monitor);
|
|
// Find if current memory reference is a chunk allocation
|
|
auto it = heaps_.find(ref);
|
|
if (it == heaps_.end()) {
|
|
return false;
|
|
}
|
|
|
|
it->second->Free(offset);
|
|
// If this suballocator empty, then release memory chunk
|
|
if (it->second->IsEmpty()) {
|
|
delete it->second;
|
|
heaps_.erase(it);
|
|
release_mem = true;
|
|
}
|
|
}
|
|
if (release_mem) {
|
|
ref->release();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// ================================================================================================
|
|
ResourceCache::~ResourceCache() { free(); }
|
|
|
|
// ================================================================================================
|
|
//! \note the cache works in FILO mode
|
|
bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference* ref,
|
|
Pal::gpusize offset) {
|
|
bool result = false;
|
|
size_t size = ref->iMem()->Desc().size;
|
|
|
|
// Check if runtime can free suballocation
|
|
if ((desc->type_ == Resource::Local) && !desc->SVMRes_) {
|
|
result = mem_sub_alloc_local_.Free(&lockCacheOps_, ref, offset);
|
|
} else if ((desc->type_ == Resource::Local) && desc->SVMRes_) {
|
|
result = mem_sub_alloc_coarse_.Free(&lockCacheOps_, ref, offset);
|
|
} else if (desc->SVMRes_) {
|
|
if (desc->gl2CacheDisabled_) {
|
|
result = mem_sub_alloc_fine_uncached_.Free(&lockCacheOps_, ref, offset);
|
|
} else {
|
|
result = mem_sub_alloc_fine_.Free(&lockCacheOps_, ref, offset);
|
|
}
|
|
}
|
|
|
|
// If a resource was a suballocation, don't try to cache it
|
|
if (result == true) {
|
|
return result;
|
|
}
|
|
|
|
// Make sure current allocation isn't bigger than cache
|
|
if (((desc->type_ == Resource::Local) || (desc->type_ == Resource::Persistent) ||
|
|
(desc->type_ == Resource::Remote) || (desc->type_ == Resource::RemoteUSWC)) &&
|
|
(size < cacheSizeLimit_) && !desc->SVMRes_) {
|
|
// Validate the cache size limit. Loop until we have enough space
|
|
while ((cacheSize_ + size) > cacheSizeLimit_) {
|
|
removeLast();
|
|
}
|
|
|
|
Resource::Descriptor* descCached = new Resource::Descriptor;
|
|
if (descCached != nullptr) {
|
|
// Copy the original desc to the cached version
|
|
memcpy(descCached, desc, sizeof(Resource::Descriptor));
|
|
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
// Add the current resource to the cache
|
|
resCache_.push_front({descCached, ref});
|
|
ref->gpu_ = nullptr;
|
|
cacheSize_ += size;
|
|
if (desc->type_ == Resource::Local) {
|
|
lclCacheSize_ += size;
|
|
}
|
|
result = true;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// ================================================================================================
|
|
GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal::gpusize size,
|
|
Pal::gpusize alignment,
|
|
const Pal::IGpuMemory* reserved_va,
|
|
Pal::gpusize* offset) {
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
GpuMemoryReference* ref = nullptr;
|
|
|
|
// Check if the runtime can suballocate memory
|
|
if ((desc->type_ == Resource::Local) && !desc->SVMRes_) {
|
|
ref = mem_sub_alloc_local_.Allocate(size, alignment, reserved_va, offset);
|
|
} else if ((desc->type_ == Resource::Local) && desc->SVMRes_) {
|
|
ref = mem_sub_alloc_coarse_.Allocate(size, alignment, reserved_va, offset);
|
|
} else if (desc->SVMRes_) {
|
|
if (desc->gl2CacheDisabled_) {
|
|
ref = mem_sub_alloc_fine_uncached_.Allocate(size, alignment, reserved_va, offset);
|
|
} else {
|
|
ref = mem_sub_alloc_fine_.Allocate(size, alignment, reserved_va, offset);
|
|
}
|
|
}
|
|
|
|
if (ref != nullptr) {
|
|
return ref;
|
|
}
|
|
|
|
// Early exit if resource is too big
|
|
if (size >= cacheSizeLimit_ || desc->SVMRes_) {
|
|
//! \note we may need to free the cache here to reduce memory pressure
|
|
return ref;
|
|
}
|
|
|
|
// Serach the right resource through the cache list
|
|
for (const auto& it : resCache_) {
|
|
Resource::Descriptor* entry = it.first;
|
|
size_t sizeRes = it.second->iMem()->Desc().size;
|
|
// Find if we can reuse this entry
|
|
if ((entry->type_ == desc->type_) && (entry->flags_ == desc->flags_) && (size <= sizeRes) &&
|
|
(size > (sizeRes >> 1)) && ((it.second->iMem()->Desc().gpuVirtAddr % alignment) == 0) &&
|
|
(entry->isAllocExecute_ == desc->isAllocExecute_)) {
|
|
ref = it.second;
|
|
cacheSize_ -= sizeRes;
|
|
if (entry->type_ == Resource::Local) {
|
|
lclCacheSize_ -= sizeRes;
|
|
}
|
|
delete it.first;
|
|
// Remove the found etry from the cache
|
|
resCache_.remove(it);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ref;
|
|
}
|
|
|
|
// ================================================================================================
|
|
bool ResourceCache::free(size_t minCacheEntries) {
|
|
bool result = false;
|
|
if (minCacheEntries < resCache_.size()) {
|
|
result = true;
|
|
// Clear the cache
|
|
while (static_cast<int>(cacheSize_) > 0) {
|
|
removeLast();
|
|
}
|
|
CondLog((cacheSize_ != 0), "Incorrect size for cache release!");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// ================================================================================================
|
|
void ResourceCache::removeLast() {
|
|
std::pair<Resource::Descriptor*, GpuMemoryReference*> entry;
|
|
{
|
|
// Protect access to the global data
|
|
amd::ScopedLock l(&lockCacheOps_);
|
|
entry = resCache_.back();
|
|
resCache_.pop_back();
|
|
cacheSize_ -= entry.second->iMem()->Desc().size;
|
|
if (entry.first->type_ == Resource::Local) {
|
|
lclCacheSize_ -= entry.second->iMem()->Desc().size;
|
|
}
|
|
// Delete Descriptor
|
|
delete entry.first;
|
|
}
|
|
|
|
// Destroy PAL resource
|
|
entry.second->release();
|
|
}
|
|
|
|
} // namespace pal
|