From 7ae94da05b856ea8f678d5565d72e8ec37fbab15 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 15 Mar 2018 17:26:25 -0400
Subject: [PATCH] P4 to Git Change 1527848 by gandryey@gera-w8 on 2018/03/15
17:11:43
SWDEV-79445 - OCL generic changes and code clean-up
- Add suballocations support for local(invisible) memory. It should significantly improve memory footprint and TLB usage with 2MB pages
- Implementation uses BuddyAllocator provided in PAL
- The chunk allocation size is 64MB, min allocation 4KB and max 4MB. GPU_MAX_SUBALLOC_SIZE controls the max size in KB
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#56 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#285 edit
---
rocclr/runtime/device/pal/paldefs.hpp | 1 +
rocclr/runtime/device/pal/paldevice.cpp | 25 +-
rocclr/runtime/device/pal/paldevice.hpp | 25 +-
rocclr/runtime/device/pal/palprogram.cpp | 4 +-
rocclr/runtime/device/pal/palresource.cpp | 1427 ++++++++++++---------
rocclr/runtime/device/pal/palresource.hpp | 101 +-
rocclr/runtime/device/pal/palsettings.cpp | 6 +
rocclr/runtime/device/pal/palsettings.hpp | 4 +
rocclr/runtime/device/pal/palvirtual.cpp | 14 +-
rocclr/runtime/device/pal/palvirtual.hpp | 2 +-
rocclr/runtime/utils/flags.hpp | 2 +
11 files changed, 947 insertions(+), 664 deletions(-)
diff --git a/rocclr/runtime/device/pal/paldefs.hpp b/rocclr/runtime/device/pal/paldefs.hpp
index 7a21fb6852..06740582cf 100644
--- a/rocclr/runtime/device/pal/paldefs.hpp
+++ b/rocclr/runtime/device/pal/paldefs.hpp
@@ -8,6 +8,7 @@
#include "palGpuMemory.h"
#include "palImage.h"
#include "palFormatInfo.h"
+#include "util/palSysMemory.h"
//
/// Memory Object Type
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index b8c59aa5df..1a71e3adf4 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -54,6 +54,10 @@ void PalDeviceUnload() { pal::Device::tearDown(); }
namespace pal {
+Util::GenericAllocator NullDevice::allocator_;
+char* Device::platformObj_;
+Pal::IPlatform* Device::platform_;
+
NullDevice::Compiler* NullDevice::compiler_;
AppProfile Device::appProfile_;
@@ -183,6 +187,7 @@ bool NullDevice::init() {
return true;
}
+
bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
uint xNACKSupported) {
online_ = false;
@@ -736,7 +741,7 @@ bool Device::create(Pal::IDevice* device) {
if (!amd::Device::create()) {
return false;
}
- resourceList_ = new std::list();
+ resourceList_ = new std::list();
if (nullptr == resourceList_) {
return false;
}
@@ -865,7 +870,7 @@ bool Device::create(Pal::IDevice* device) {
size_t resourceCacheSize = settings().resourceCacheSize_;
// Create resource cache.
// \note Cache must be created before any resource creation to avoid nullptr check
- resourceCache_ = new ResourceCache(resourceCacheSize);
+ resourceCache_ = new ResourceCache(this, resourceCacheSize);
if (nullptr == resourceCache_) {
return false;
}
@@ -925,8 +930,6 @@ bool Device::create(Pal::IDevice* device) {
return true;
}
-static Pal::IPlatform* platform;
-
bool Device::initializeHeapResources() {
amd::ScopedLock k(lockForInitHeap_);
if (!heapInitComplete_) {
@@ -998,7 +1001,7 @@ bool Device::initializeHeapResources() {
xferQueue_->enableSyncedBlit();
// Create RGP capture manager
- rgpCaptureMgr_ = RgpCaptureMgr::Create(platform, *this);
+ rgpCaptureMgr_ = RgpCaptureMgr::Create(platform_, *this);
}
return true;
}
@@ -1096,8 +1099,6 @@ static int reportHook(int reportType, char* message, int* returnValue) {
}
#endif // _WIN32 & DEBUG
-static char* platformObj;
-
bool Device::init() {
uint32_t numDevices = 0;
bool useDeviceList = false;
@@ -1123,7 +1124,7 @@ bool Device::init() {
#endif // !defined(WITH_LIGHTNING_COMPILER)
size_t size = Pal::GetPlatformSize();
- platformObj = new char[size];
+ platformObj_ = new char[size];
Pal::PlatformCreateInfo info = {};
info.flags.disableGpuTimeout = true;
#if !defined(PAL_BUILD_DTIF)
@@ -1138,14 +1139,14 @@ bool Device::init() {
info.maxSvmSize = static_cast(OCL_SET_SVM_SIZE * Mi);
// PAL init
- if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj, &platform)) {
+ if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj_, &platform_)) {
return false;
}
// Get the total number of active devices
// Count up all the devices in the system.
Pal::IDevice* deviceList[Pal::MaxDevices] = {};
- platform->EnumerateDevices(&numDevices, &deviceList[0]);
+ platform_->EnumerateDevices(&numDevices, &deviceList[0]);
uint ordinal = 0;
const char* selectDeviceByName = nullptr;
@@ -1175,8 +1176,8 @@ bool Device::init() {
}
void Device::tearDown() {
- platform->Destroy();
- delete platformObj;
+ platform_->Destroy();
+ delete platformObj_;
#if !defined(WITH_LIGHTNING_COMPILER)
if (compiler_ != nullptr) {
diff --git a/rocclr/runtime/device/pal/paldevice.hpp b/rocclr/runtime/device/pal/paldevice.hpp
index b92ea73456..fc0640c917 100644
--- a/rocclr/runtime/device/pal/paldevice.hpp
+++ b/rocclr/runtime/device/pal/paldevice.hpp
@@ -120,7 +120,12 @@ class NullDevice : public amd::Device {
amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
#endif
+ void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); }
+ void Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
+
protected:
+ static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
+
Pal::AsicRevision asicRevision_; //!< ASIC revision
Pal::GfxIpLevel ipLevel_; //!< Device IP level
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
@@ -464,6 +469,9 @@ class Device : public NullDevice {
//! Returns PAL device properties
const Pal::DeviceProperties& properties() const { return properties_; }
+ //! Returns PAL platform interface
+ Pal::IPlatform* iPlat() const { return platform_; }
+
//! Returns PAL device interface
Pal::IDevice* iDev() const { return device_; }
@@ -496,19 +504,19 @@ class Device : public NullDevice {
bool resGLFree(void* GLplatformContext, void* mbResHandle, uint type) const;
//! Adds a resource to the global list
- void addResource(GpuMemoryReference* mem) const {
+ void addResource(Resource* res) const {
amd::ScopedLock lock(lockResources());
- auto findIt = std::find(resourceList_->begin(), resourceList_->end(), mem);
- mem->events_.resize(numOfVgpus());
+ auto findIt = std::find(resourceList_->begin(), resourceList_->end(), res);
+ res->resizeGpuEvents(numOfVgpus() - 1);
if (resourceList_->end() == findIt) {
- resourceList_->push_back(mem);
+ resourceList_->push_back(res);
}
}
//! Removes a resource from the global list
- void removeResource(GpuMemoryReference* mem) const {
+ void removeResource(Resource* res) const {
amd::ScopedLock lock(lockResources());
- resourceList_->remove(mem);
+ resourceList_->remove(res);
}
//! Resizes global resource list to accumulate a new queue
@@ -566,6 +574,9 @@ class Device : public NullDevice {
bool glAssociate(void* GLplatformContext, void* GLdeviceContext) const;
bool glDissociate(void* GLplatformContext, void* GLdeviceContext) const;
+ static char* platformObj_; //!< Memory allocated for PAL platform object
+ static Pal::IPlatform* platform_; //!< Pointer to the PAL platform object
+
amd::Context* context_; //!< A dummy context for internal allocations
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
amd::Monitor*
@@ -592,7 +603,7 @@ class Device : public NullDevice {
Pal::IDevice* device_; //!< PAL device object
std::atomic freeMem[Pal::GpuHeap::GpuHeapCount]; //!< Free memory counter
amd::Monitor* lockResourceOps_; //!< Lock to serialise resource access
- std::list* resourceList_; //!< Active resource list
+ std::list* resourceList_; //!< Active resource list
RgpCaptureMgr* rgpCaptureMgr_; //!< RGP capture manager
};
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index 54c0839063..9e9bb5d356 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -89,14 +89,14 @@ void Segment::copy(size_t offset, const void* src, size_t size) {
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
- size_t tmpSize = std::min(static_cast(xferBuf.vmSize()), size);
+ size_t tmpSize = std::min(static_cast(xferBuf.size()), size);
size_t srcOffs = 0;
while (size != 0) {
xferBuf.hostWrite(&gpu, reinterpret_cast(src) + srcOffs, 0, tmpSize);
xferBuf.partialMemCopyTo(gpu, 0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
size -= tmpSize;
srcOffs += tmpSize;
- tmpSize = std::min(static_cast(xferBuf.vmSize()), size);
+ tmpSize = std::min(static_cast(xferBuf.size()), size);
}
gpu.waitAllEngines();
}
diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp
index 9e67e72b7b..0524ad2c72 100644
--- a/rocclr/runtime/device/pal/palresource.cpp
+++ b/rocclr/runtime/device/pal/palresource.cpp
@@ -28,6 +28,7 @@
namespace pal {
+// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::GpuMemoryCreateInfo& createInfo) {
Pal::Result result;
@@ -48,10 +49,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
}
// Update free memory size counters
const_cast(dev).updateFreeMemory(createInfo.heaps[0], createInfo.size, false);
- dev.addResource(memRef);
return memRef;
}
+// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::PinnedGpuMemoryCreateInfo& createInfo) {
Pal::Result result;
@@ -71,10 +72,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
}
// Update free memory size counters
const_cast(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
- dev.addResource(memRef);
return memRef;
}
+// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::SvmGpuMemoryCreateInfo& createInfo) {
Pal::Result result;
@@ -94,10 +95,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
// Update free memory size counters
const_cast(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size,
false);
- dev.addResource(memRef);
return memRef;
}
+// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::ExternalGpuMemoryOpenInfo& openInfo) {
Pal::Result result;
@@ -116,10 +117,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
return nullptr;
}
}
- dev.addResource(memRef);
return memRef;
}
+// ================================================================================================
GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
const Pal::ExternalImageOpenInfo& openInfo,
Pal::ImageCreateInfo* imgCreateInfo,
@@ -143,33 +144,34 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
return nullptr;
}
}
- dev.addResource(memRef);
return memRef;
}
+// ================================================================================================
GpuMemoryReference::GpuMemoryReference(const Device& dev)
- : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr), events_(dev.numOfVgpus()) {}
+ : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr)
+{}
+// ================================================================================================
GpuMemoryReference::~GpuMemoryReference() {
if (gpu_ == nullptr) {
- {
- Device::ScopedLockVgpus lock(device_);
- // Release all memory objects on all virtual GPUs
- for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
- device_.vgpus()[idx]->releaseMemory(this, &events_[idx]);
- }
+ Device::ScopedLockVgpus lock(device_);
+ // Release all memory objects on all virtual GPUs
+ for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
+ device_.vgpus()[idx]->releaseMemory(this);
}
} else {
amd::ScopedLock l(gpu_->execution());
- gpu_->releaseMemory(this, &events_[gpu_->index()]);
+ gpu_->releaseMemory(this);
}
if (device_.vgpus().size() != 0) {
assert(device_.vgpus()[0] == device_.xferQueue() && "Wrong transfer queue!");
// Lock the transfer queue, since it's not handled by ScopedLockVgpus
amd::ScopedLock k(device_.xferMgr().lockXfer());
- device_.vgpus()[0]->releaseMemory(this, &events_[0]);
+ device_.vgpus()[0]->releaseMemory(this);
}
+ // Destroy PAL object if it's not a suballocation
if (cpuAddress_ != nullptr) {
iMem()->Unmap();
}
@@ -177,9 +179,9 @@ GpuMemoryReference::~GpuMemoryReference() {
iMem()->Destroy();
gpuMem_ = nullptr;
}
- device_.removeResource(this);
}
+// ================================================================================================
Resource::Resource(const Device& gpuDev, size_t size)
: elementSize_(0),
gpuDevice_(gpuDev),
@@ -188,9 +190,11 @@ Resource::Resource(const Device& gpuDev, size_t size)
offset_(0),
curRename_(0),
memRef_(nullptr),
+ subOffset_(0),
viewOwner_(nullptr),
image_(nullptr),
- hwSrd_(0) {
+ hwSrd_(0),
+ events_(gpuDev.numOfVgpus()) {
// Fill resource descriptor fields
desc_.state_ = 0;
desc_.type_ = Empty;
@@ -213,8 +217,10 @@ Resource::Resource(const Device& gpuDev, size_t size)
desc_.scratch_ = false;
desc_.isAllocExecute_ = false;
desc_.baseLevel_ = 0;
+ gpuDev.addResource(this);
}
+// ================================================================================================
Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t depth,
cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
: elementSize_(0),
@@ -224,9 +230,11 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
offset_(0),
curRename_(0),
memRef_(nullptr),
+ subOffset_(0),
viewOwner_(nullptr),
image_(nullptr),
- hwSrd_(0) {
+ hwSrd_(0),
+ events_(gpuDev.numOfVgpus()) {
// Fill resource descriptor fields
desc_.state_ = 0;
desc_.type_ = Empty;
@@ -273,8 +281,10 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
LogError("Unknown image type!");
break;
}
+ gpuDev.addResource(this);
}
+// ================================================================================================
Resource::~Resource() {
Pal::GpuHeap heap = Pal::GpuHeapCount;
switch (memoryType()) {
@@ -313,8 +323,10 @@ Resource::~Resource() {
image_->Destroy();
delete[] reinterpret_cast(image_);
}
+ gpuDevice_.removeResource(this);
}
+// ================================================================================================
static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
@@ -338,6 +350,7 @@ static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
return FormatType[idx];
}
+// ================================================================================================
static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
HSA_EXT_IMAGE_CHANNEL_ORDER_A,
@@ -365,6 +378,7 @@ static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
return OrderType[idx];
}
+// ================================================================================================
void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
createInfo->heapCount = 1;
switch (memoryType()) {
@@ -400,18 +414,623 @@ void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
}
}
-bool Resource::create(MemoryType memType, CreateParams* params) {
- static const Pal::gpusize MaxGpuAlignment = 64 * Ki;
- const amd::HostMemoryReference* hostMemRef = nullptr;
- bool imageCreateView = false;
+// ================================================================================================
+bool Resource::CreateImage(CreateParams* params)
+{
+ Pal::Result result;
+ Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
+ Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+ Pal::ChannelMapping channels;
+ Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
+
+ if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ if (memoryType() == ImageBuffer) {
+ ImageBufferParams* imageBuffer = reinterpret_cast(params);
+ viewOwner_ = imageBuffer->resource_;
+ memRef_ = viewOwner_->memRef_;
+ memRef_->retain();
+ desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
+ }
+ else {
+ Pal::GpuMemoryCreateInfo createInfo = {};
+ createInfo.size = desc().width_ * elementSize();
+ createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
+ createInfo.alignment = MaxGpuAlignment;
+ createInfo.vaRange = Pal::VaRange::Default;
+ createInfo.priority = Pal::GpuMemPriority::Normal;
+ memTypeToHeap(&createInfo);
+ // createInfo.priority;
+ memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+ createInfo.alignment, &subOffset_);
+ if (nullptr == memRef_) {
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ if (nullptr == memRef_) {
+ LogError("Failed PAL memory allocation!");
+ return false;
+ }
+ }
+ offset_ += static_cast(subOffset_);
+ }
+ // Check if memory is locked already and restore CPU pointer
+ if (memRef_->cpuAddress_ != nullptr) {
+ address_ = memRef_->cpuAddress_;
+ memRef_->cpuAddress_ = nullptr;
+ mapCount_++;
+ }
+ Pal::BufferViewInfo viewInfo = {};
+ viewInfo.gpuAddr = vmAddress();
+ viewInfo.range = memRef_->iMem()->Desc().size;
+ viewInfo.stride = elementSize();
+ viewInfo.swizzledFormat.format = format;
+ viewInfo.swizzledFormat.swizzle = channels;
+ // viewInfo.channels = channels;
+ hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
+ if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+ return false;
+ }
+
+ dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
+ hwState_[8] = GetHSAILImageFormatType(desc().format_);
+ hwState_[9] = GetHSAILImageOrderType(desc().format_);
+ hwState_[10] = static_cast(desc().width_);
+ hwState_[11] = 0; // one extra reserved field in the argument
+ return true;
+ }
+
+ Pal::ImageViewInfo viewInfo = {};
+ Pal::ImageCreateInfo imgCreateInfo = {};
+ Pal::GpuMemoryRequirements req = {};
+ imgCreateInfo.imageType = Pal::ImageType::Tex2d;
+ viewInfo.viewType = Pal::ImageViewType::Tex2d;
+ imgCreateInfo.extent.width = desc_.width_;
+ imgCreateInfo.extent.height = desc_.height_;
+ imgCreateInfo.extent.depth = desc_.depth_;
+ imgCreateInfo.arraySize = 1;
+
+ switch (desc_.topology_) {
+ case CL_MEM_OBJECT_IMAGE3D:
+ imgCreateInfo.imageType = Pal::ImageType::Tex3d;
+ viewInfo.viewType = Pal::ImageViewType::Tex3d;
+ break;
+ case CL_MEM_OBJECT_IMAGE1D:
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+ case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+ imgCreateInfo.imageType = Pal::ImageType::Tex1d;
+ viewInfo.viewType = Pal::ImageViewType::Tex1d;
+ break;
+ }
+ if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
+ imgCreateInfo.extent.depth = desc_.height_;
+ imgCreateInfo.extent.height = 1;
+ }
+ if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
+ }
+
+ if (memoryType() == ImageView) {
+ ImageViewParams* imageView = reinterpret_cast(params);
+ ImgSubresRange.startSubres.mipLevel = imageView->level_;
+ desc_.baseLevel_ = imageView->level_;
+ ImgSubresRange.startSubres.arraySlice = imageView->layer_;
+ viewOwner_ = imageView->resource_;
+ image_ = viewOwner_->image_;
+ }
+ else if (memoryType() == ImageBuffer) {
+ ImageBufferParams* imageBuffer = reinterpret_cast(params);
+ viewOwner_ = imageBuffer->resource_;
+ }
+ if (nullptr != viewOwner_) {
+ offset_ = viewOwner_->offset();
+ }
+ ImgSubresRange.numMips = desc().mipLevels_;
+
+ if ((memoryType() != ImageView) ||
+ //! @todo PAL doesn't allow an SRD view creation with different pixel size
+ (elementSize() != viewOwner_->elementSize())) {
+ imgCreateInfo.usageFlags.shaderRead = true;
+ imgCreateInfo.usageFlags.shaderWrite =
+ (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
+ imgCreateInfo.swizzledFormat.format = format;
+ imgCreateInfo.swizzledFormat.swizzle = channels;
+ imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
+ imgCreateInfo.samples = 1;
+ imgCreateInfo.fragments = 1;
+ Pal::ImageTiling tiling = Pal::ImageTiling::Optimal;
+ uint32_t rowPitch = 0;
+
+ if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
+ (memoryType() == ImageBuffer)) {
+ tiling = Pal::ImageTiling::Linear;
+ }
+ else if (memoryType() == ImageView) {
+ tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
+ // Find the new pitch in pixels for the new format
+ rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
+ }
+
+ if (memoryType() == ImageBuffer) {
+ if ((params->owner_ != NULL) && params->owner_->asImage() &&
+ (params->owner_->asImage()->getRowPitch() != 0)) {
+ rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
+ }
+ else {
+ rowPitch = desc().width_;
+ }
+ }
+ desc_.pitch_ = rowPitch;
+ // Make sure the row pitch is aligned to pixels
+ imgCreateInfo.rowPitch =
+ elementSize() * amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
+ imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+ imgCreateInfo.tiling = tiling;
+
+ size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
+ if (result != Pal::Result::Success) {
+ return false;
+ }
+
+ char* memImg = new char[imageSize];
+ if (memImg != nullptr) {
+ result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
+ if (result != Pal::Result::Success) {
+ delete[] memImg;
+ return false;
+ }
+ }
+ image_->GetGpuMemoryRequirements(&req);
+ // createInfo.priority;
+ }
+
+ if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
+ Pal::GpuMemoryCreateInfo createInfo = {};
+ createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
+ createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
+ createInfo.vaRange = Pal::VaRange::Default;
+ createInfo.priority = Pal::GpuMemPriority::Normal;
+ memTypeToHeap(&createInfo);
+
+ memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+ createInfo.alignment, &subOffset_);
+ if (nullptr == memRef_) {
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ if (nullptr == memRef_) {
+ LogError("Failed PAL memory allocation!");
+ return false;
+ }
+ }
+ offset_ += static_cast(subOffset_);
+ }
+ else {
+ memRef_ = viewOwner_->memRef_;
+ memRef_->retain();
+ desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
+ if (req.size > viewOwner_->iMem()->Desc().size) {
+ LogWarning("Image is bigger than the original mem object!");
+ }
+ }
+ // Check if memory is locked already and restore CPU pointer
+ if (memRef_->cpuAddress_ != nullptr) {
+ address_ = memRef_->cpuAddress_;
+ memRef_->cpuAddress_ = nullptr;
+ mapCount_++;
+ }
+ result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
+ if (result != Pal::Result::Success) {
+ return false;
+ }
+
+ hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
+ if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+ return false;
+ }
+ viewInfo.pImage = image_;
+ viewInfo.swizzledFormat.format = format;
+ viewInfo.swizzledFormat.swizzle = channels;
+ viewInfo.subresRange = ImgSubresRange;
+ dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+
+ hwState_[8] = GetHSAILImageFormatType(desc().format_);
+ hwState_[9] = GetHSAILImageOrderType(desc().format_);
+ hwState_[10] = static_cast(desc().width_);
+ hwState_[11] = 0; // one extra reserved field in the argument
+ return true;
+}
+
+// ================================================================================================
+bool Resource::CreateInterop(CreateParams* params)
+{
+ Pal::Result result;
+ Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
+ Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+ Pal::ChannelMapping channels;
+ Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
+ Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
+ Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
+ uint misc = 0;
+ uint layer = 0;
+ uint mipLevel = 0;
+ InteropType type = InteropTypeless;
+
+ if (memoryType() == OGLInterop) {
+ OGLInteropParams* oglRes = reinterpret_cast(params);
+ assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
+ switch (oglRes->type_) {
+ case InteropVertexBuffer:
+ glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
+ break;
+ case InteropRenderBuffer:
+ glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
+ break;
+ case InteropTexture:
+ case InteropTextureViewLevel:
+ case InteropTextureViewCube:
+ glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
+ break;
+ default:
+ LogError("Unknown OGL interop type!");
+ return false;
+ break;
+ }
+ glPlatformContext_ = oglRes->glPlatformContext_;
+ layer = oglRes->layer_;
+ type = oglRes->type_;
+ mipLevel = oglRes->mipLevel_;
+
+ if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
+ &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
+#ifdef ATI_OS_WIN
+ , openInfo.doppDesktopInfo
+#endif
+ )) {
+ return false;
+ }
+ desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
+ format = dev().getPalFormat(desc().format_, &channels);
+ }
+#ifdef ATI_OS_WIN
+ else {
+ D3DInteropParams* d3dRes = reinterpret_cast(params);
+ openInfo.hExternalResource = d3dRes->handle_;
+ misc = d3dRes->misc;
+ layer = d3dRes->layer_;
+ type = d3dRes->type_;
+ mipLevel = d3dRes->mipLevel_;
+ }
+#endif
+ //! @todo PAL query for image/buffer object doesn't work properly!
+#if 0
+ bool isImage = false;
+ if (Pal::Result::Success !=
+ dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
+ return false;
+ }
+#endif // 0
+ if (desc().buffer_ || misc) {
+ memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
+ if (nullptr == memRef_) {
+ return false;
+ }
+
+ if (misc) {
+ Pal::ImageCreateInfo imgCreateInfo = {};
+ Pal::ExternalImageOpenInfo imgOpenInfo = {};
+ imgOpenInfo.resourceInfo = openInfo;
+ imgOpenInfo.swizzledFormat.format = format;
+ imgOpenInfo.swizzledFormat.swizzle = channels;
+ imgOpenInfo.usage.shaderRead = true;
+ imgOpenInfo.usage.shaderWrite = true;
+ size_t imageSize;
+ size_t gpuMemSize;
+
+ if (Pal::Result::Success !=
+ dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
+ &imgCreateInfo)) {
+ return false;
+ }
+
+ Pal::gpusize viewOffset = 0;
+ imgCreateInfo.flags.shareable = false;
+ imgCreateInfo.imageType = Pal::ImageType::Tex2d;
+ imgCreateInfo.extent.width = desc().width_;
+ imgCreateInfo.extent.height = desc().height_;
+ imgCreateInfo.extent.depth = desc().depth_;
+ imgCreateInfo.arraySize = 1;
+ imgCreateInfo.usageFlags.shaderRead = true;
+ imgCreateInfo.usageFlags.shaderWrite = true;
+ imgCreateInfo.swizzledFormat.format = format;
+ imgCreateInfo.swizzledFormat.swizzle = channels;
+ imgCreateInfo.mipLevels = 1;
+ imgCreateInfo.samples = 1;
+ imgCreateInfo.fragments = 1;
+ imgCreateInfo.tiling = Pal::ImageTiling::Linear;
+ imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
+
+ switch (misc) {
+ case 1: // NV12 format
+ switch (layer) {
+ case -1:
+ case 0:
+ break;
+ case 1:
+ // Y - plane size to the offset
+ // NV12 format. UV is 2 times smaller plane Y
+ viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+ imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+ break;
+ default:
+ LogError("Unknown Interop View Type");
+ return false;
+ }
+ break;
+ case 2: // YV12 format
+ switch (layer) {
+ case -1:
+ case 0:
+ break;
+ case 1:
+ // Y - plane size to the offset
+ // YV12 format. U is 4 times smaller plane than Y
+ viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+ imgCreateInfo.rowPitch >>= 1;
+ break;
+ case 2:
+ // Y + U plane sizes to the offest.
+ // U plane is 4 times smaller than Y and U == V
+ viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
+ imgCreateInfo.rowPitch >>= 1;
+ break;
+ default:
+ LogError("Unknown Interop View Type");
+ return false;
+ }
+ imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+ break;
+ case 3: // YUY2 format
+ imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+ break;
+ default:
+ LogError("Unknown Interop View Type");
+ return false;
+ }
+
+ imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
+ if (result != Pal::Result::Success) {
+ return false;
+ }
+
+ char* memImg = new char[imageSize];
+ if (memImg != nullptr) {
+ result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
+ if (result != Pal::Result::Success) {
+ delete[] memImg;
+ return false;
+ }
+ }
+ offset_ += static_cast(viewOffset);
+ result = image_->BindGpuMemory(iMem(), offset_);
+ if (result != Pal::Result::Success) {
+ return false;
+ }
+ hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
+ if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+ return false;
+ }
+ Pal::ImageViewInfo viewInfo = {};
+ viewInfo.viewType = Pal::ImageViewType::Tex2d;
+ viewInfo.pImage = image_;
+ viewInfo.swizzledFormat.format = format;
+ viewInfo.swizzledFormat.swizzle = channels;
+ viewInfo.subresRange = ImgSubresRange;
+ dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+
+ hwState_[8] = GetHSAILImageFormatType(desc().format_);
+ hwState_[9] = GetHSAILImageOrderType(desc().format_);
+ hwState_[10] = static_cast(desc().width_);
+ hwState_[11] = 0; // one extra reserved field in the argument
+ }
+ }
+ else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
+ if (nullptr == memRef_) {
+ return false;
+ }
+ Pal::BufferViewInfo viewInfo = {};
+ viewInfo.gpuAddr = vmAddress();
+ viewInfo.range = memRef_->iMem()->Desc().size;
+ viewInfo.stride = elementSize();
+ viewInfo.swizzledFormat.format = format;
+ viewInfo.swizzledFormat.swizzle = channels;
+ hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
+ if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+ return false;
+ }
+
+ dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
+ hwState_[8] = GetHSAILImageFormatType(desc().format_);
+ hwState_[9] = GetHSAILImageOrderType(desc().format_);
+ hwState_[10] = static_cast(desc().width_);
+ hwState_[11] = 0; // one extra reserved field in the argument
+ }
+ else {
+ Pal::ExternalImageOpenInfo imgOpenInfo = {};
+ Pal::ImageCreateInfo imgCreateInfo = {};
+ imgOpenInfo.resourceInfo = openInfo;
+ imgOpenInfo.swizzledFormat.format = format;
+ imgOpenInfo.swizzledFormat.swizzle = channels;
+ imgOpenInfo.usage.shaderRead = true;
+ imgOpenInfo.usage.shaderWrite = true;
+ memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
+ if (nullptr == memRef_) {
+ return false;
+ }
+
+ hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
+ if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+ return false;
+ }
+ Pal::ImageViewInfo viewInfo = {};
+ viewInfo.viewType = Pal::ImageViewType::Tex2d;
+ switch (imgCreateInfo.imageType) {
+ case Pal::ImageType::Tex3d:
+ viewInfo.viewType = Pal::ImageViewType::Tex3d;
+ break;
+ case Pal::ImageType::Tex1d:
+ viewInfo.viewType = Pal::ImageViewType::Tex1d;
+ break;
+ default:
+ break;
+ }
+ viewInfo.pImage = image_;
+ viewInfo.swizzledFormat.format = format;
+ viewInfo.swizzledFormat.swizzle = channels;
+ if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
+ ImgSubresRange.startSubres.mipLevel = mipLevel;
+ if (type == InteropTextureViewCube) {
+ ImgSubresRange.startSubres.arraySlice = layer;
+ viewInfo.viewType = Pal::ImageViewType::Tex2d;
+ }
+ }
+ if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ ImgSubresRange.numSlices = desc_.height_;
+ }
+ if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ ImgSubresRange.numSlices = desc_.depth_;
+ }
+ ImgSubresRange.numMips = desc().mipLevels_;
+ viewInfo.subresRange = ImgSubresRange;
+
+ dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+ //! It's a workaround for D24S8 format, since PAL doesn't support this format
+ //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
+ if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
+ (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
+ hwState_[1] &= ~0x3c000000;
+ hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
+ }
+ hwState_[8] = GetHSAILImageFormatType(desc().format_);
+ hwState_[9] = GetHSAILImageOrderType(desc().format_);
+ hwState_[10] = static_cast(desc().width_);
+ hwState_[11] = 0; // one extra reserved field in the argument
+ }
+ return true;
+}
+
+// ================================================================================================
+bool Resource::CreatePinned(CreateParams* params)
+{
+ PinnedParams* pinned = reinterpret_cast(params);
+ size_t allocSize = pinned->size_;
+ const amd::HostMemoryReference* hostMemRef = pinned->hostMemRef_;
+ void* pinAddress = address_ = hostMemRef->hostMem();
uint hostMemOffset = 0;
+ // assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
+ if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
+ // Allign offset to 4K boundary (Vista/Win7 limitation)
+ char* tmpHost = const_cast(
+ amd::alignDown(reinterpret_cast(address_), PinnedMemoryAlignment));
+
+ // Find the partial size for unaligned copy
+ hostMemOffset = static_cast(reinterpret_cast(address_) - tmpHost);
+
+ offset_ = hostMemOffset;
+
+ pinAddress = tmpHost;
+
+ if (hostMemOffset != 0) {
+ allocSize += hostMemOffset;
+ }
+ allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
+ // hostMemOffset &= ~(0xff);
+ }
+ else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
+ //! @todo: Width has to be aligned for 3D.
+ //! Need to be replaced with a compute copy
+ // Width aligned by 8 texels
+ if (((desc().width_ % 0x8) != 0) ||
+ // Pitch aligned by 64 bytes
+ (((desc().width_ * elementSize()) % 0x40) != 0)) {
+ return false;
+ }
+ }
+ else {
+ //! @todo GSL doesn't support pinning with resAlloc_
+ return false;
+ }
+
+ if (dev().settings().svmFineGrainSystem_) {
+ desc_.SVMRes_ = true;
+ }
+
+ // Ensure page alignment
+ if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
+ return false;
+ }
+ Pal::PinnedGpuMemoryCreateInfo createInfo = {};
+ createInfo.pSysMem = pinAddress;
+ createInfo.size = allocSize;
+ createInfo.vaRange = Pal::VaRange::Default;
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ if (nullptr == memRef_) {
+ LogError("Failed PAL memory allocation!");
+ return false;
+ }
+ desc_.cardMemory_ = false;
+ return true;
+}
+
+// ================================================================================================
+bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
+{
+ size_t allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
+ if ((memoryType() == RemoteUSWC) || (memoryType() == Remote)) {
+ Pal::SvmGpuMemoryCreateInfo createInfo = {};
+ createInfo.isUsedForKernel = desc_.isAllocExecute_;
+ createInfo.size = allocSize;
+ createInfo.alignment = MaxGpuAlignment;
+ if (svmPtr != 0) {
+ createInfo.flags.useReservedGpuVa = true;
+ createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
+ }
+ else {
+ createInfo.flags.useReservedGpuVa = false;
+ createInfo.pReservedGpuVaOwner = nullptr;
+ }
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ }
+ else {
+ Pal::GpuMemoryCreateInfo createInfo = {};
+ createInfo.size = allocSize;
+ createInfo.alignment = MaxGpuAlignment;
+ createInfo.vaRange = Pal::VaRange::Svm;
+ createInfo.priority = Pal::GpuMemPriority::Normal;
+ if (svmPtr != 0) {
+ createInfo.flags.useReservedGpuVa = true;
+ createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
+ }
+ memTypeToHeap(&createInfo);
+ memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+ }
+ if (nullptr == memRef_) {
+ LogError("Failed PAL memory allocation!");
+ return false;
+ }
+ desc_.cardMemory_ = false;
+ if ((nullptr != params) && (nullptr != params->owner_) &&
+ (nullptr != params->owner_->getSvmPtr())) {
+ params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr));
+ }
+ return true;
+}
+
+// ================================================================================================
+bool Resource::create(MemoryType memType, CreateParams* params) {
+ bool imageCreateView = false;
bool foundCalRef = false;
bool viewDefined = false;
uint viewLayer = 0;
uint viewLevel = 0;
uint viewFlags = 0;
- Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
- Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
Pal::ChannelMapping channels;
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
@@ -453,486 +1072,17 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
desc_.type_ = RemoteUSWC;
}
- Pal::Result result;
-
if ((memoryType() == OGLInterop) || (memoryType() == D3D9Interop) ||
(memoryType() == D3D10Interop) || (memoryType() == D3D11Interop)) {
- Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
- Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
- uint misc = 0;
- uint layer = 0;
- uint mipLevel = 0;
- InteropType type = InteropTypeless;
-
- if (memoryType() == OGLInterop) {
- OGLInteropParams* oglRes = reinterpret_cast(params);
- assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
- switch (oglRes->type_) {
- case InteropVertexBuffer:
- glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
- break;
- case InteropRenderBuffer:
- glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
- break;
- case InteropTexture:
- case InteropTextureViewLevel:
- case InteropTextureViewCube:
- glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
- break;
- default:
- LogError("Unknown OGL interop type!");
- return false;
- break;
- }
- glPlatformContext_ = oglRes->glPlatformContext_;
- layer = oglRes->layer_;
- type = oglRes->type_;
- mipLevel = oglRes->mipLevel_;
-
- if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
- &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
-#ifdef ATI_OS_WIN
- , openInfo.doppDesktopInfo
-#endif
- )) {
- return false;
- }
- desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
- format = dev().getPalFormat(desc().format_, &channels);
- }
-#ifdef ATI_OS_WIN
- else {
- D3DInteropParams* d3dRes = reinterpret_cast(params);
- openInfo.hExternalResource = d3dRes->handle_;
- misc = d3dRes->misc;
- layer = d3dRes->layer_;
- type = d3dRes->type_;
- mipLevel = d3dRes->mipLevel_;
- }
-#endif
-//! @todo PAL query for image/buffer object doesn't work properly!
-#if 0
- bool isImage = false;
- if (Pal::Result::Success !=
- dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
- return false;
- }
-#endif // 0
- if (desc().buffer_ || misc) {
- memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
- if (nullptr == memRef_) {
- return false;
- }
-
- if (misc) {
- Pal::ImageCreateInfo imgCreateInfo = {};
- Pal::ExternalImageOpenInfo imgOpenInfo = {};
- imgOpenInfo.resourceInfo = openInfo;
- imgOpenInfo.swizzledFormat.format = format;
- imgOpenInfo.swizzledFormat.swizzle = channels;
- imgOpenInfo.usage.shaderRead = true;
- imgOpenInfo.usage.shaderWrite = true;
- size_t imageSize;
- size_t gpuMemSize;
-
- if (Pal::Result::Success !=
- dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
- &imgCreateInfo)) {
- return false;
- }
-
- Pal::gpusize viewOffset = 0;
- imgCreateInfo.flags.shareable = false;
- imgCreateInfo.imageType = Pal::ImageType::Tex2d;
- imgCreateInfo.extent.width = desc().width_;
- imgCreateInfo.extent.height = desc().height_;
- imgCreateInfo.extent.depth = desc().depth_;
- imgCreateInfo.arraySize = 1;
- imgCreateInfo.usageFlags.shaderRead = true;
- imgCreateInfo.usageFlags.shaderWrite = true;
- imgCreateInfo.swizzledFormat.format = format;
- imgCreateInfo.swizzledFormat.swizzle = channels;
- imgCreateInfo.mipLevels = 1;
- imgCreateInfo.samples = 1;
- imgCreateInfo.fragments = 1;
- imgCreateInfo.tiling = Pal::ImageTiling::Linear;
- imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
-
- switch (misc) {
- case 1: // NV12 format
- switch (layer) {
- case -1:
- break;
- case 0:
- break;
- case 1:
- // Y - plane size to the offset
- // NV12 format. UV is 2 times smaller plane Y
- viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
- imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
- break;
- default:
- LogError("Unknown Interop View Type");
- return false;
- }
- break;
- case 2: // YV12 format
- switch (layer) {
- case -1:
- break;
- case 0:
- break;
- case 1:
- // Y - plane size to the offset
- // YV12 format. U is 4 times smaller plane than Y
- viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
- imgCreateInfo.rowPitch >>= 1;
- break;
- case 2:
- // Y + U plane sizes to the offest.
- // U plane is 4 times smaller than Y and U == V
- viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
- imgCreateInfo.rowPitch >>= 1;
- break;
- default:
- LogError("Unknown Interop View Type");
- return false;
- }
- imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
- break;
- case 3: // YUY2 format
- imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
- break;
- default:
- LogError("Unknown Interop View Type");
- return false;
- }
-
- imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
- if (result != Pal::Result::Success) {
- return false;
- }
-
- char* memImg = new char[imageSize];
- if (memImg != nullptr) {
- result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
- if (result != Pal::Result::Success) {
- delete [] memImg;
- return false;
- }
- }
- result = image_->BindGpuMemory(iMem(), viewOffset);
- if (result != Pal::Result::Success) {
- return false;
- }
- offset_ = static_cast(viewOffset);
- hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
- if ((0 == hwSrd_) && (memoryType() != ImageView)) {
- return false;
- }
- Pal::ImageViewInfo viewInfo = {};
- viewInfo.viewType = Pal::ImageViewType::Tex2d;
- viewInfo.pImage = image_;
- viewInfo.swizzledFormat.format = format;
- viewInfo.swizzledFormat.swizzle = channels;
- viewInfo.subresRange = ImgSubresRange;
- dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
-
- hwState_[8] = GetHSAILImageFormatType(desc().format_);
- hwState_[9] = GetHSAILImageOrderType(desc().format_);
- hwState_[10] = static_cast(desc().width_);
- hwState_[11] = 0; // one extra reserved field in the argument
- }
- } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
- if (nullptr == memRef_) {
- return false;
- }
- Pal::BufferViewInfo viewInfo = {};
- viewInfo.gpuAddr = vmAddress();
- viewInfo.range = memRef_->iMem()->Desc().size;
- viewInfo.stride = elementSize();
- viewInfo.swizzledFormat.format = format;
- viewInfo.swizzledFormat.swizzle = channels;
- hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
- if ((0 == hwSrd_) && (memoryType() != ImageView)) {
- return false;
- }
-
- dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
- hwState_[8] = GetHSAILImageFormatType(desc().format_);
- hwState_[9] = GetHSAILImageOrderType(desc().format_);
- hwState_[10] = static_cast(desc().width_);
- hwState_[11] = 0; // one extra reserved field in the argument
- } else {
- Pal::ExternalImageOpenInfo imgOpenInfo = {};
- Pal::ImageCreateInfo imgCreateInfo = {};
- imgOpenInfo.resourceInfo = openInfo;
- imgOpenInfo.swizzledFormat.format = format;
- imgOpenInfo.swizzledFormat.swizzle = channels;
- imgOpenInfo.usage.shaderRead = true;
- imgOpenInfo.usage.shaderWrite = true;
- memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
- if (nullptr == memRef_) {
- return false;
- }
-
- hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
- if ((0 == hwSrd_) && (memoryType() != ImageView)) {
- return false;
- }
- Pal::ImageViewInfo viewInfo = {};
- viewInfo.viewType = Pal::ImageViewType::Tex2d;
- switch (imgCreateInfo.imageType) {
- case Pal::ImageType::Tex3d:
- viewInfo.viewType = Pal::ImageViewType::Tex3d;
- break;
- case Pal::ImageType::Tex1d:
- viewInfo.viewType = Pal::ImageViewType::Tex1d;
- break;
- default:
- break;
- }
- viewInfo.pImage = image_;
- viewInfo.swizzledFormat.format = format;
- viewInfo.swizzledFormat.swizzle = channels;
- if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
- ImgSubresRange.startSubres.mipLevel = mipLevel;
- if (type == InteropTextureViewCube) {
- ImgSubresRange.startSubres.arraySlice = layer;
- viewInfo.viewType = Pal::ImageViewType::Tex2d;
- }
- }
- if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
- ImgSubresRange.numSlices = desc_.height_;
- }
- if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
- ImgSubresRange.numSlices = desc_.depth_;
- }
- ImgSubresRange.numMips = desc().mipLevels_;
- viewInfo.subresRange = ImgSubresRange;
-
- dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
- //! It's a workaround for D24S8 format, since PAL doesn't support this format
- //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
- if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
- (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
- hwState_[1] &= ~0x3c000000;
- hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
- }
- hwState_[8] = GetHSAILImageFormatType(desc().format_);
- hwState_[9] = GetHSAILImageOrderType(desc().format_);
- hwState_[10] = static_cast(desc().width_);
- hwState_[11] = 0; // one extra reserved field in the argument
- }
- return true;
+ return CreateInterop(params);
}
if (!desc_.buffer_) {
- if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- if (memoryType() == ImageBuffer) {
- ImageBufferParams* imageBuffer = reinterpret_cast(params);
- viewOwner_ = imageBuffer->resource_;
- memRef_ = viewOwner_->memRef_;
- memRef_->retain();
- desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
- } else {
- Pal::GpuMemoryCreateInfo createInfo = {};
- createInfo.size = desc().width_ * elementSize();
- // @todo 64K alignment is too big
- createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
- createInfo.alignment = MaxGpuAlignment;
- createInfo.vaRange = Pal::VaRange::Default;
- createInfo.priority = Pal::GpuMemPriority::Normal;
- memTypeToHeap(&createInfo);
- // createInfo.priority;
- memRef_ =
- dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
- if (nullptr == memRef_) {
- memRef_ = GpuMemoryReference::Create(dev(), createInfo);
- if (nullptr == memRef_) {
- LogError("Failed PAL memory allocation!");
- return false;
- }
- }
- }
- // Check if memory is locked already and restore CPU pointer
- if (memRef_->cpuAddress_ != nullptr) {
- address_ = memRef_->cpuAddress_;
- memRef_->cpuAddress_ = nullptr;
- mapCount_++;
- }
- Pal::BufferViewInfo viewInfo = {};
- viewInfo.gpuAddr = vmAddress();
- viewInfo.range = memRef_->iMem()->Desc().size;
- viewInfo.stride = elementSize();
- viewInfo.swizzledFormat.format = format;
- viewInfo.swizzledFormat.swizzle = channels;
- // viewInfo.channels = channels;
- hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
- if ((0 == hwSrd_) && (memoryType() != ImageView)) {
- return false;
- }
-
- dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
- hwState_[8] = GetHSAILImageFormatType(desc().format_);
- hwState_[9] = GetHSAILImageOrderType(desc().format_);
- hwState_[10] = static_cast(desc().width_);
- hwState_[11] = 0; // one extra reserved field in the argument
- return true;
- }
-
- Pal::ImageViewInfo viewInfo = {};
- Pal::ImageCreateInfo imgCreateInfo = {};
- Pal::GpuMemoryRequirements req = {};
- imgCreateInfo.imageType = Pal::ImageType::Tex2d;
- viewInfo.viewType = Pal::ImageViewType::Tex2d;
- imgCreateInfo.extent.width = desc_.width_;
- imgCreateInfo.extent.height = desc_.height_;
- imgCreateInfo.extent.depth = desc_.depth_;
- imgCreateInfo.arraySize = 1;
-
- switch (desc_.topology_) {
- case CL_MEM_OBJECT_IMAGE3D:
- imgCreateInfo.imageType = Pal::ImageType::Tex3d;
- viewInfo.viewType = Pal::ImageViewType::Tex3d;
- break;
- case CL_MEM_OBJECT_IMAGE1D:
- case CL_MEM_OBJECT_IMAGE1D_ARRAY:
- case CL_MEM_OBJECT_IMAGE1D_BUFFER:
- imgCreateInfo.imageType = Pal::ImageType::Tex1d;
- viewInfo.viewType = Pal::ImageViewType::Tex1d;
- break;
- }
- if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
- ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
- imgCreateInfo.extent.depth = desc_.height_;
- imgCreateInfo.extent.height = 1;
- }
- if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
- ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
- }
-
- if (memoryType() == ImageView) {
- ImageViewParams* imageView = reinterpret_cast(params);
- ImgSubresRange.startSubres.mipLevel = imageView->level_;
- desc_.baseLevel_ = imageView->level_;
- ImgSubresRange.startSubres.arraySlice = imageView->layer_;
- viewOwner_ = imageView->resource_;
- image_ = viewOwner_->image_;
- offset_ = viewOwner_->offset_;
- } else if (memoryType() == ImageBuffer) {
- ImageBufferParams* imageBuffer = reinterpret_cast(params);
- viewOwner_ = imageBuffer->resource_;
- }
- ImgSubresRange.numMips = desc().mipLevels_;
-
- if ((memoryType() != ImageView) ||
- //! @todo PAL doesn't allow an SRD view creation with different pixel size
- (elementSize() != viewOwner_->elementSize())) {
- imgCreateInfo.usageFlags.shaderRead = true;
- imgCreateInfo.usageFlags.shaderWrite =
- (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
- imgCreateInfo.swizzledFormat.format = format;
- imgCreateInfo.swizzledFormat.swizzle = channels;
- imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
- imgCreateInfo.samples = 1;
- imgCreateInfo.fragments = 1;
- Pal::ImageTiling tiling = Pal::ImageTiling::Optimal;
- uint32_t rowPitch = 0;
-
- if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
- (memoryType() == ImageBuffer)) {
- tiling = Pal::ImageTiling::Linear;
- } else if (memoryType() == ImageView) {
- tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
- // Find the new pitch in pixels for the new format
- rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
- }
-
- if (memoryType() == ImageBuffer) {
- if ((params->owner_ != NULL) && params->owner_->asImage() &&
- (params->owner_->asImage()->getRowPitch() != 0)) {
- rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
- } else {
- rowPitch = desc().width_;
- }
- }
- desc_.pitch_ = rowPitch;
- // Make sure the row pitch is aligned to pixels
- imgCreateInfo.rowPitch =
- elementSize() * amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
- imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
- imgCreateInfo.tiling = tiling;
-
- size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
- if (result != Pal::Result::Success) {
- return false;
- }
-
- char* memImg = new char[imageSize];
- if (memImg != nullptr) {
- result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
- if (result != Pal::Result::Success) {
- delete [] memImg;
- return false;
- }
- }
- image_->GetGpuMemoryRequirements(&req);
- // createInfo.priority;
- }
-
- if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
- Pal::GpuMemoryCreateInfo createInfo = {};
- createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
- createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
- createInfo.vaRange = Pal::VaRange::Default;
- createInfo.priority = Pal::GpuMemPriority::Normal;
- memTypeToHeap(&createInfo);
-
- memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
- if (nullptr == memRef_) {
- memRef_ = GpuMemoryReference::Create(dev(), createInfo);
- if (nullptr == memRef_) {
- LogError("Failed PAL memory allocation!");
- return false;
- }
- }
- } else {
- memRef_ = viewOwner_->memRef_;
- memRef_->retain();
- desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
- if (req.size > viewOwner_->iMem()->Desc().size) {
- LogWarning("Image is bigger than the original mem object!");
- }
- }
- // Check if memory is locked already and restore CPU pointer
- if (memRef_->cpuAddress_ != nullptr) {
- address_ = memRef_->cpuAddress_;
- memRef_->cpuAddress_ = nullptr;
- mapCount_++;
- }
-
- result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
- if (result != Pal::Result::Success) {
- return false;
- }
-
- hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast(&hwState_));
- if ((0 == hwSrd_) && (memoryType() != ImageView)) {
- return false;
- }
- viewInfo.pImage = image_;
- viewInfo.swizzledFormat.format = format;
- viewInfo.swizzledFormat.swizzle = channels;
- viewInfo.subresRange = ImgSubresRange;
- dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
-
- hwState_[8] = GetHSAILImageFormatType(desc().format_);
- hwState_[9] = GetHSAILImageOrderType(desc().format_);
- hwState_[10] = static_cast(desc().width_);
- hwState_[11] = 0; // one extra reserved field in the argument
- return true;
+ return CreateImage(params);
+ }
+
+ if (memoryType() == Pinned) {
+ return CreatePinned(params);
}
if (memoryType() == View) {
@@ -956,116 +1106,19 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
return true;
}
- if (memoryType() == Pinned) {
- PinnedParams* pinned = reinterpret_cast(params);
- size_t allocSize = pinned->size_;
- void* pinAddress;
- hostMemRef = pinned->hostMemRef_;
- pinAddress = address_ = hostMemRef->hostMem();
- // assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
- if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
- // Allign offset to 4K boundary (Vista/Win7 limitation)
- char* tmpHost = const_cast(
- amd::alignDown(reinterpret_cast(address_), PinnedMemoryAlignment));
-
- // Find the partial size for unaligned copy
- hostMemOffset = static_cast(reinterpret_cast(address_) - tmpHost);
-
- offset_ = hostMemOffset;
-
- pinAddress = tmpHost;
-
- if (hostMemOffset != 0) {
- allocSize += hostMemOffset;
- }
- allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
- // hostMemOffset &= ~(0xff);
- } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
- //! @todo: Width has to be aligned for 3D.
- //! Need to be replaced with a compute copy
- // Width aligned by 8 texels
- if (((desc().width_ % 0x8) != 0) ||
- // Pitch aligned by 64 bytes
- (((desc().width_ * elementSize()) % 0x40) != 0)) {
- return false;
- }
- } else {
- //! @todo GSL doesn't support pinning with resAlloc_
- return false;
- }
-
- if (dev().settings().svmFineGrainSystem_) {
- desc_.SVMRes_ = true;
- }
-
- // Ensure page alignment
- if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
- return false;
- }
- Pal::PinnedGpuMemoryCreateInfo createInfo = {};
- createInfo.pSysMem = pinAddress;
- createInfo.size = allocSize;
- createInfo.vaRange = Pal::VaRange::Default;
- memRef_ = GpuMemoryReference::Create(dev(), createInfo);
- if (nullptr == memRef_) {
- LogError("Failed PAL memory allocation!");
- return false;
- }
- desc_.cardMemory_ = false;
- return true;
- }
-
Pal::gpusize svmPtr = 0;
if ((nullptr != params) && (nullptr != params->owner_) &&
(nullptr != params->owner_->getSvmPtr())) {
- svmPtr = reinterpret_cast(params->owner_->getSvmPtr());
- desc_.SVMRes_ = true;
- svmPtr = (svmPtr == 1) ? 0 : svmPtr;
+ svmPtr = reinterpret_cast(params->owner_->getSvmPtr());
+ desc_.SVMRes_ = true;
+ svmPtr = (svmPtr == 1) ? 0 : svmPtr;
}
if (desc_.SVMRes_) {
- // @todo 64K alignment is too big
- size_t allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
- if ((memoryType() == RemoteUSWC) || (memoryType() == Remote)) {
- Pal::SvmGpuMemoryCreateInfo createInfo = {};
- createInfo.isUsedForKernel = desc_.isAllocExecute_;
- createInfo.size = allocSize;
- createInfo.alignment = MaxGpuAlignment;
- if (svmPtr != 0) {
- createInfo.flags.useReservedGpuVa = true;
- createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
- } else {
- createInfo.flags.useReservedGpuVa = false;
- createInfo.pReservedGpuVaOwner = nullptr;
- }
- memRef_ = GpuMemoryReference::Create(dev(), createInfo);
- } else {
- Pal::GpuMemoryCreateInfo createInfo = {};
- createInfo.size = allocSize;
- createInfo.alignment = MaxGpuAlignment;
- createInfo.vaRange = Pal::VaRange::Svm;
- createInfo.priority = Pal::GpuMemPriority::Normal;
- if (svmPtr != 0) {
- createInfo.flags.useReservedGpuVa = true;
- createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
- }
- memTypeToHeap(&createInfo);
- memRef_ = GpuMemoryReference::Create(dev(), createInfo);
- }
- if (nullptr == memRef_) {
- LogError("Failed PAL memory allocation!");
- return false;
- }
- desc_.cardMemory_ = false;
- if ((nullptr != params) && (nullptr != params->owner_) &&
- (nullptr != params->owner_->getSvmPtr())) {
- params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr));
- }
- return true;
+ return CreateSvm(params, svmPtr);
}
Pal::GpuMemoryCreateInfo createInfo = {};
createInfo.size = desc().width_ * elementSize_;
- // @todo 64K alignment is too big
createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
createInfo.alignment = MaxGpuAlignment;
createInfo.vaRange = Pal::VaRange::Default;
@@ -1082,7 +1135,8 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
memTypeToHeap(&createInfo);
// createInfo.priority;
- memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
+ memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+ createInfo.alignment, &subOffset_);
if (nullptr == memRef_) {
memRef_ = GpuMemoryReference::Create(dev(), createInfo);
if (nullptr == memRef_) {
@@ -1090,6 +1144,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
return false;
}
}
+ offset_ += static_cast(subOffset_);
// Check if memory is locked already and restore CPU pointer
if (memRef_->cpuAddress_ != nullptr) {
address_ = memRef_->cpuAddress_;
@@ -1099,7 +1154,9 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
return true;
}
-void Resource::free() {
+// ================================================================================================
+void Resource::free()
+{
if (memRef_ == nullptr) {
return;
}
@@ -1112,17 +1169,19 @@ void Resource::free() {
const bool wait =
(memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
+ // OCL has to wait, even if resource is placed in the cache, since reallocation can occur
+ // and resource can be reused on another async queue without a wait on a busy operation
if (wait) {
if (memRef_->gpu_ == nullptr) {
Device::ScopedLockVgpus lock(dev());
// Release all memory objects on all virtual GPUs
for (uint idx = 1; idx < dev().vgpus().size(); ++idx) {
- dev().vgpus()[idx]->waitForEvent(&memRef_->events_[idx]);
+ dev().vgpus()[idx]->waitForEvent(&events_[idx]);
}
}
else {
amd::ScopedLock l(memRef_->gpu_->execution());
- memRef_->gpu_->waitForEvent(&memRef_->events_[memRef_->gpu_->index()]);
+ memRef_->gpu_->waitForEvent(&events_[memRef_->gpu_->index()]);
}
} else {
// After a view destruction the original object is no longer can be associated with a vgpu
@@ -1144,8 +1203,8 @@ void Resource::free() {
}
}
- // Add resource to the cache if it's not assigned to a specific queue
- if ((memRef_->gpu_ != nullptr) || !dev().resourceCache().addGpuMemory(&desc_, memRef_)) {
+ // Add resource to the cache
+ if (!dev().resourceCache().addGpuMemory(&desc_, memRef_, subOffset_)) {
palFree();
}
}
@@ -1166,8 +1225,10 @@ void Resource::free() {
}
}
+// ================================================================================================
void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const void* data,
- bool waitForEvent) const {
+ bool waitForEvent) const
+{
GpuEvent event;
// Write data size bytes to surface
@@ -1175,7 +1236,8 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
assert((size & 3) == 0);
gpu.eventBegin(MainEngine);
gpu.queue(MainEngine).addCmdMemRef(memRef());
- gpu.iCmd()->CmdUpdateMemory(*iMem(), offset, size, reinterpret_cast(data));
+ gpu.iCmd()->CmdUpdateMemory(*iMem(), offset_ + offset, size,
+ reinterpret_cast(data));
gpu.eventEnd(MainEngine, event);
if (waitForEvent) {
@@ -1190,7 +1252,10 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
gpu.setGpuEvent(event, false);
}
}
-static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
+
+// ================================================================================================
+static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement)
+{
if (bytesPerElement == 16) {
return Pal::ChNumFormat::X32Y32Z32W32_Uint;
} else if (bytesPerElement == 8) {
@@ -1204,6 +1269,7 @@ static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
}
}
+// ================================================================================================
bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
Resource& dstResource, bool enableCopyRect, bool flushDMA,
@@ -1351,6 +1417,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
return true;
}
+// ================================================================================================
void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
addGpuEvent(gpu, gpuEvent);
@@ -1360,6 +1427,7 @@ void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
}
}
+// ================================================================================================
void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
GpuEvent* gpuEvent = getGpuEvent(gpu);
@@ -1377,6 +1445,7 @@ void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
}
}
+// ================================================================================================
bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3D& origin,
const amd::Coord3D& size, uint flags, size_t rowPitch, size_t slicePitch) {
void* dst;
@@ -1446,6 +1515,7 @@ bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3
return true;
}
+// ================================================================================================
bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& origin,
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch) {
void* src;
@@ -1515,6 +1585,7 @@ bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& orig
return true;
}
+// ================================================================================================
void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resource) const {
if (desc_.cardMemory_ && !isPersistentDirectMap()) {
// @todo remove const cast
@@ -1540,6 +1611,7 @@ void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resourc
}
}
+// ================================================================================================
void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
if (desc_.cardMemory_ && !isPersistentDirectMap()) {
// @todo remove const cast
@@ -1553,6 +1625,7 @@ void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
}
}
+// ================================================================================================
bool Resource::glAcquire() {
bool retVal = true;
if (desc().type_ == OGLInterop) {
@@ -1561,6 +1634,7 @@ bool Resource::glAcquire() {
return retVal;
}
+// ================================================================================================
bool Resource::glRelease() {
bool retVal = true;
if (desc().type_ == OGLInterop) {
@@ -1569,18 +1643,21 @@ bool Resource::glRelease() {
return retVal;
}
+// ================================================================================================
void Resource::addGpuEvent(const VirtualGPU& gpu, GpuEvent event) const {
uint idx = gpu.index();
- assert(idx < memRef_->events_.size());
- memRef_->events_[idx] = event;
+ assert(idx < events_.size());
+ events_[idx] = event;
}
+// ================================================================================================
GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
uint idx = gpu.index();
- assert((idx < memRef_->events_.size()) && "Undeclared queue access!");
- return &memRef_->events_[idx];
+ assert((idx < events_.size()) && "Undeclared queue access!");
+ return &events_[idx];
}
+// ================================================================================================
void Resource::palFree() const {
if (desc().type_ == OGLInterop) {
amd::ScopedLock lk(dev().lockPAL());
@@ -1589,6 +1666,7 @@ void Resource::palFree() const {
memRef_->release();
}
+// ================================================================================================
bool Resource::isMemoryType(MemoryType memType) const {
if (memoryType() == memType) {
return true;
@@ -1599,6 +1677,7 @@ bool Resource::isMemoryType(MemoryType memType) const {
return false;
}
+// ================================================================================================
bool Resource::isPersistentDirectMap() const {
bool directMap =
((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_);
@@ -1613,6 +1692,7 @@ bool Resource::isPersistentDirectMap() const {
return directMap;
}
+// ================================================================================================
void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) {
if (isMemoryType(Pinned)) {
// Check if we have to wait
@@ -1682,11 +1762,13 @@ void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers
return address_;
}
+// ================================================================================================
void* Resource::mapLayers(VirtualGPU* gpu, uint flags) {
Unimplemented();
return nullptr;
}
+// ================================================================================================
void Resource::unmap(VirtualGPU* gpu) {
if (isMemoryType(Pinned)) {
return;
@@ -1713,22 +1795,26 @@ void Resource::unmap(VirtualGPU* gpu) {
}
}
+// ================================================================================================
void Resource::unmapLayers(VirtualGPU* gpu) {
Unimplemented();
}
+// ================================================================================================
void Resource::setActiveRename(VirtualGPU& gpu, GpuMemoryReference* rename) {
// Copy the unique GSL data
memRef_ = rename;
address_ = rename->cpuAddress_;
}
+// ================================================================================================
bool Resource::getActiveRename(VirtualGPU& gpu, GpuMemoryReference** rename) {
// Copy the old data to the rename descriptor
*rename = memRef_;
return true;
}
+// ================================================================================================
bool Resource::rename(VirtualGPU& gpu, bool force) {
GpuEvent* gpuEvent = getGpuEvent(gpu);
if (!gpuEvent->isValid() && !force) {
@@ -1809,6 +1895,7 @@ bool Resource::rename(VirtualGPU& gpu, bool force) {
return true;
}
+// ================================================================================================
void Resource::warmUpRenames(VirtualGPU& gpu) {
// Make sure OCL touches every command buffer in the queue to avoid delays on the first submit
uint flush = dev().settings().maxRenames_ / VirtualGPU::Queue::MaxCmdBuffers;
@@ -1823,13 +1910,113 @@ void Resource::warmUpRenames(VirtualGPU& gpu) {
}
}
+// ================================================================================================
+MemorySubAllocator::~MemorySubAllocator()
+{
+ // Release memory heap for suballocations
+ for (auto it : mem_heap_) {
+ it.first->release();
+ delete it.second;
+ }
+}
+
+// ================================================================================================
+GpuMemoryReference* MemorySubAllocator::Allocate(
+ Pal::gpusize size, Pal::gpusize alignment, Pal::gpusize* offset)
+{
+ GpuMemoryReference* mem_ref = nullptr;
+ // Check if resource size is allowed for suballocation
+ if (size < device_->settings().subAllocationMaxSize_) {
+ uint i = 0;
+ size = amd::alignUp(size, device_->settings().subAllocationMinSize_);
+ do {
+ MemBuddyAllocator* allocator = nullptr;
+ // Find if current heap has enough empty space
+ for (auto it : mem_heap_) {
+ mem_ref = it.first;
+ allocator = it.second;
+ // If we have found a valid chunk, then suballocate memory
+ if (Pal::Result::Success == allocator->Allocate(size, alignment, offset)) {
+ return mem_ref;
+ } else {
+ mem_ref = nullptr;
+ }
+ }
+
+ // Check if a chunk for suballocation doesn't exist
+ if (mem_ref == nullptr) {
+ // Allocate a new chunk in memory
+ Pal::GpuMemoryCreateInfo createInfo = {};
+ createInfo.size = device_->settings().subAllocationChunkSize_;
+ createInfo.alignment = 0;
+ createInfo.vaRange = Pal::VaRange::Default;
+ createInfo.priority = Pal::GpuMemPriority::Normal;
+ createInfo.heapCount = 1;
+ createInfo.heaps[0] = Pal::GpuHeapInvisible;
+ mem_ref = GpuMemoryReference::Create(*device_, createInfo);
+ // If chunk was allocated, then allocate BuddyAllocator object
+ if (mem_ref != nullptr) {
+ allocator = new MemBuddyAllocator(device_,
+ device_->settings().subAllocationChunkSize_,
+ device_->settings().subAllocationMinSize_);
+ if ((allocator != nullptr) &&
+ (Pal::Result::Success == allocator->Init())) {
+ // Add the chunk and suballocator into the heap
+ mem_heap_.insert(std::pair(
+ mem_ref, allocator));
+ } else {
+ delete allocator;
+ mem_ref->release();
+ return nullptr;
+ }
+ } else {
+ return nullptr;
+ }
+ }
+ i++;
+ } while (i < 2);
+ }
+ return mem_ref;
+}
+
+// ================================================================================================
+bool MemorySubAllocator::Free(GpuMemoryReference* ref, Pal::gpusize offset)
+{
+ // Find if current memory reference is a chunk allocation
+ auto it = mem_heap_.find(ref);
+ if (it == mem_heap_.end()) {
+ return false;
+ }
+ // Free suballocation at the specified offset
+ it->second->Free(offset);
+ // If this suballocator empty, then release memory chunk
+ if (it->second->IsEmpty()) {
+ delete it->second;
+ it->first->release();
+ mem_heap_.erase(it);
+ }
+ return true;
+}
+
+// ================================================================================================
ResourceCache::~ResourceCache() { free(); }
+// ================================================================================================
//! \note the cache works in FILO mode
-bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference* ref) {
+bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
+ GpuMemoryReference* ref, Pal::gpusize offset)
+{
bool result = false;
size_t size = ref->iMem()->Desc().size;
+ if (desc->type_ == Resource::Local) {
+ amd::ScopedLock l(&lockCacheOps_);
+ // Check if runtime can free suballocation in local memory
+ if (memSubAllocLocal_.Free(ref, offset)) {
+ return true;
+ }
+ }
+
// Make sure current allocation isn't bigger than cache
if (((desc->type_ == Resource::Local) || (desc->type_ == Resource::Persistent) ||
(desc->type_ == Resource::Remote) || (desc->type_ == Resource::RemoteUSWC)) &&
@@ -1855,8 +2042,9 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference*
return result;
}
+// ================================================================================================
GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal::gpusize size,
- Pal::gpusize alignment) {
+ Pal::gpusize alignment, Pal::gpusize* offset) {
amd::ScopedLock l(&lockCacheOps_);
GpuMemoryReference* ref = nullptr;
@@ -1866,6 +2054,13 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
return ref;
}
+ if (desc->type_ == Resource::Local) {
+ ref = memSubAllocLocal_.Allocate(size, alignment, offset);
+ if (ref != nullptr) {
+ return ref;
+ }
+ }
+
// Serach the right resource through the cache list
for (const auto& it : resCache_) {
Resource::Descriptor* entry = it.first;
@@ -1886,6 +2081,7 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
return ref;
}
+// ================================================================================================
bool ResourceCache::free(size_t minCacheEntries) {
amd::ScopedLock l(&lockCacheOps_);
bool result = false;
@@ -1903,6 +2099,7 @@ bool ResourceCache::free(size_t minCacheEntries) {
return result;
}
+// ================================================================================================
void ResourceCache::removeLast() {
std::pair entry;
entry = resCache_.back();
@@ -1913,7 +2110,7 @@ void ResourceCache::removeLast() {
// Delete Descriptor
delete entry.first;
- // Destroy GSL resource
+ // Destroy PAL resource
entry.second->release();
cacheSize_ -= size;
}
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index 0118681006..3329ee077b 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -6,6 +6,7 @@
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "device/pal/paldefs.hpp"
+#include "util/palBuddyAllocatorImpl.h"
//! \namespace pal PAL Resource Implementation
namespace pal {
@@ -16,7 +17,6 @@ class VirtualGPU;
/*! \addtogroup PAL PAL Resource Implementation
* @{
*/
-
class GpuMemoryReference : public amd::ReferenceCountedObject {
public:
static GpuMemoryReference* Create(const Device& dev, const Pal::GpuMemoryCreateInfo& createInfo);
@@ -36,12 +36,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
//! Default constructor
GpuMemoryReference(const Device& dev);
- //! Resizes the events array to account the new queue
- void resizeGpuEvents(uint index) { events_.resize(index + 1); }
-
- //! Erase an entry in the array for provided queue index
- void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
-
//! Get PAL memory object
Pal::IGpuMemory* iMem() const { return gpuMem_; }
@@ -50,7 +44,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
const Device& device_; //!< GPU device
//! @note: This field is necessary for the thread safe release only
VirtualGPU* gpu_; //!< Resource will be used only on this queue
- std::vector events_; //!< GPU events associated with the resource
protected:
//! Default destructor
@@ -64,6 +57,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
GpuMemoryReference& operator=(const GpuMemoryReference&);
};
+static constexpr Pal::gpusize MaxGpuAlignment = 4 * Ki;
+
//! GPU resource
class Resource : public amd::HeapObject {
public:
@@ -178,7 +173,7 @@ class Resource : public amd::HeapObject {
uint imageArray_ : 1; //!< PAL resource is an array of images
uint buffer_ : 1; //!< PAL resource is a buffer
uint tiled_ : 1; //!< PAL resource is tiled
- uint SVMRes_ : 1; //!< SVM flag to the cal resource
+ uint SVMRes_ : 1; //!< SVM flag to the pal resource
uint scratch_ : 1; //!< Scratch buffer
uint isAllocExecute_ : 1; //!< SVM resource allocation attribute for shader\cmdbuf
uint isDoppTexture_ : 1; //!< PAL resource is for a DOPP desktop texture
@@ -205,9 +200,9 @@ class Resource : public amd::HeapObject {
//! Destructor of the resource
virtual ~Resource();
- /*! \brief Creates a CAL object, associated with the resource
+ /*! \brief Creates a PAL object, associated with the resource
*
- * \return True if we succesfully created a CAL resource
+ * \return True if we succesfully created a PAL resource
*/
virtual bool create(MemoryType memType, //!< memory type
CreateParams* params = 0 //!< special parameters for resource allocation
@@ -263,7 +258,7 @@ class Resource : public amd::HeapObject {
uint64_t vmAddress() const { return iMem()->Desc().gpuVirtAddr + offset_; }
//! Returns global memory offset
- uint64_t vmSize() const { return iMem()->Desc().size - offset_; }
+ uint64_t vmSize() const { return desc_.width_ * elementSize(); }
//! Returns global memory offset
bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; }
@@ -290,7 +285,7 @@ class Resource : public amd::HeapObject {
//! Marks the resource as busy
void setBusy(VirtualGPU& gpu, //!< Virtual GPU device object
- GpuEvent calEvent //!< CAL event
+ GpuEvent calEvent //!< PAL event
) const;
//! Wait for the resource
@@ -326,7 +321,7 @@ class Resource : public amd::HeapObject {
//! Get the mapped address of this resource
address data() const { return reinterpret_cast(address_); }
- //! Frees all allocated CAL memories and resources,
+ //! Frees all allocated PAL memories and resources,
//! associated with this objects. And also destroys all rename structures
//! Note: doesn't destroy the object itself
void free();
@@ -360,7 +355,42 @@ class Resource : public amd::HeapObject {
//! Returns GPU event associated with this resource and specified queue
GpuEvent* getGpuEvent(const VirtualGPU& gpu) const;
+ //! Resizes the events array to account the new queue
+ void resizeGpuEvents(uint index) { events_.resize(index + 1); }
+
+ //! Erase an entry in the array for provided queue index
+ void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
+
protected:
+ /*! \brief Creates a PAL iamge object, associated with the resource
+ *
+ * \return True if we succesfully created a PAL resource
+ */
+ bool CreateImage(CreateParams* params //!< special parameters for resource allocation
+ );
+
+ /*! \brief Creates a PAL interop object, associated with the resource
+ *
+ * \return True if we succesfully created a PAL interop resource
+ */
+ bool CreateInterop(CreateParams* params //!< special parameters for resource allocation
+ );
+
+ /*! \brief Creates a PAL pinned object, associated with the resource
+ *
+ * \return True if we succesfully created a PAL pinned resource
+ */
+ bool CreatePinned(CreateParams* params //!< special parameters for resource allocation
+ );
+
+ /*! \brief Creates a PAL SVM object, associated with the resource
+ *
+ * \return True if we succesfully created a PAL SVM resource
+ */
+ bool CreateSvm(CreateParams* params, //!< special parameters for resource allocation
+ Pal::gpusize svmPtr
+ );
+
uint elementSize_; //!< Size of a single element in bytes
private:
@@ -424,6 +454,7 @@ class Resource : public amd::HeapObject {
uint32_t curRename_; //!< Current active rename in the list
RenameList renames_; //!< Rename resource list
GpuMemoryReference* memRef_; //!< PAL resource reference
+ Pal::gpusize subOffset_; //!< GPU memory offset in the oririnal resource
const Resource* viewOwner_; //!< GPU resource, which owns this view
void* glInteropMbRes_; //!< Mb Res handle
uint32_t glType_; //!< GL interop type
@@ -438,26 +469,50 @@ class Resource : public amd::HeapObject {
uint32_t* hwState_; //!< HW state for image object
uint64_t hwSrd_; //!< GPU pointer to HW SRD
+
+ //! Note: Access to the events are thread safe.
+ mutable std::vector events_; //!< GPU events associated with the resource
+};
+
+typedef Util::BuddyAllocator MemBuddyAllocator;
+
+class MemorySubAllocator : public amd::HeapObject {
+public:
+ MemorySubAllocator(Device* device) : device_(device) {}
+
+ ~MemorySubAllocator();
+
+ GpuMemoryReference* Allocate(Pal::gpusize size,
+ Pal::gpusize alignment, Pal::gpusize* offset);
+ bool Free(GpuMemoryReference* ref, Pal::gpusize offset);
+
+private:
+ Device* device_;
+ std::map mem_heap_;
};
class ResourceCache : public amd::HeapObject {
public:
//! Default constructor
- ResourceCache(size_t cacheSizeLimit)
- : lockCacheOps_("PAL resource cache", true), cacheSize_(0), cacheSizeLimit_(cacheSizeLimit) {}
+ ResourceCache(Device* device, size_t cacheSizeLimit)
+ : lockCacheOps_("PAL resource cache", true)
+ , cacheSize_(0)
+ , cacheSizeLimit_(cacheSizeLimit)
+ , memSubAllocLocal_(device) {}
//! Default destructor
~ResourceCache();
- //! Adds a CAL resource to the cache
- bool addGpuMemory(Resource::Descriptor* desc, //!< Resource descriptor - cache key
- GpuMemoryReference* ref //!< Resource reference
+ //! Adds a PAL resource to the cache
+ bool addGpuMemory(Resource::Descriptor* desc, //!< Resource descriptor - cache key
+ GpuMemoryReference* ref, //!< Resource reference
+ Pal::gpusize offset //!< Original resource offset
);
- //! Finds a CAL resource from the cache
+ //! Finds a PAL resource from the cache
GpuMemoryReference* findGpuMemory(
Resource::Descriptor* desc, //!< Resource descriptor - cache key
- Pal::gpusize size, Pal::gpusize alignment);
+ Pal::gpusize size, Pal::gpusize alignment, Pal::gpusize* offset);
//! Destroys cache
bool free(size_t minCacheEntries = 0);
@@ -477,8 +532,10 @@ class ResourceCache : public amd::HeapObject {
size_t cacheSize_; //!< Current cache size in bytes
const size_t cacheSizeLimit_; //!< Cache size limit in bytes
- //! CAL resource cache
+ //! PAL resource cache
std::list > resCache_;
+
+ MemorySubAllocator memSubAllocLocal_; //!< Allocator for suballocations in Local
};
/*@}*/} // namespace pal
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index a0b20f21ea..60d1d30cba 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -138,6 +138,12 @@ Settings::Settings() {
rgpSqttDispCount_ = PAL_RGP_DISP_COUNT;
rgpSqttWaitIdle_ = true;
rgpSqttForceDisable_ = false;
+
+ // Sub allocation parameters
+ subAllocationMinSize_ = 4 * Ki;
+ subAllocationChunkSize_ = 64 * Mi;
+ subAllocationMaxSize_ =
+ std::min(static_cast(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
}
bool Settings::create(const Pal::DeviceProperties& palProp,
diff --git a/rocclr/runtime/device/pal/palsettings.hpp b/rocclr/runtime/device/pal/palsettings.hpp
index a6755da9d5..1cdad9c095 100644
--- a/rocclr/runtime/device/pal/palsettings.hpp
+++ b/rocclr/runtime/device/pal/palsettings.hpp
@@ -98,6 +98,10 @@ class Settings : public device::Settings {
uint64_t maxAllocSize_; //!< Maximum single allocation size
uint rgpSqttDispCount_; //!< The number of dispatches captured in SQTT
+ uint64_t subAllocationMinSize_; //!< Minimum size allowed for suballocations
+ uint64_t subAllocationMaxSize_; //!< Maximum size allowed with suballocations
+ uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
+
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
//! Default constructor
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index f7564b328f..85dccb0d00 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -409,7 +409,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
}
uint64_t curStart = memory->vmAddress();
- uint64_t curEnd = curStart + memory->vmSize();
+ uint64_t curEnd = curStart + memory->size();
// Loop through all memory objects in the queue and find dependency
// @note don't include objects from the current kernel
@@ -1974,6 +1974,7 @@ void VirtualGPU::PostDeviceEnqueue(
uint64_t vmParentWrap,
GpuEvent* gpuEvent)
{
+ uint32_t id = gpuEvent->id;
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
// Make sure exculsive access to the device queue
@@ -2055,6 +2056,9 @@ void VirtualGPU::PostDeviceEnqueue(
iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
vmParentWrap + offsetof(AmdAqlWrap, child_counter),
signalAddr, dev().settings().useDeviceQueue_);
+ if (id != gpuEvent->id) {
+ LogError("Something is wrong. ID mismatch!\n");
+ }
eventEnd(MainEngine, *gpuEvent);
}
@@ -2203,6 +2207,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
if (profiling() || state_.profileEnabled_) {
addBarrier();
}
+ if (id != gpuEvent.id) {
+ LogError("Something is wrong. ID mismatch!\n");
+ }
eventEnd(MainEngine, gpuEvent);
// Execute scheduler for device enqueue
@@ -2210,9 +2217,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
PostDeviceEnqueue(kernel, hsaKernel, gpuDefQueue, vmDefQueue, vmParentWrap, &gpuEvent);
}
- if (id != gpuEvent.id) {
- LogError("Something is wrong. ID mismatch!\n");
- }
// Update the global GPU event
setGpuEvent(gpuEvent, needFlush);
@@ -2266,7 +2270,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
}
}
-void VirtualGPU::releaseMemory(GpuMemoryReference* mem, GpuEvent* event) {
+void VirtualGPU::releaseMemory(GpuMemoryReference* mem) {
queues_[MainEngine]->removeCmdMemRef(mem);
queues_[SdmaEngine]->removeCmdMemRef(mem);
}
diff --git a/rocclr/runtime/device/pal/palvirtual.hpp b/rocclr/runtime/device/pal/palvirtual.hpp
index 43b67b17f1..fff4332f8b 100644
--- a/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/rocclr/runtime/device/pal/palvirtual.hpp
@@ -314,7 +314,7 @@ class VirtualGPU : public device::VirtualDevice {
virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd);
- void releaseMemory(GpuMemoryReference* mem, GpuEvent* event);
+ void releaseMemory(GpuMemoryReference* mem);
void flush(amd::Command* list = nullptr, bool wait = false);
bool terminate() { return true; }
diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp
index 7db658aee6..eb29a04ed4 100644
--- a/rocclr/runtime/utils/flags.hpp
+++ b/rocclr/runtime/utils/flags.hpp
@@ -86,6 +86,8 @@ release(size_t, GPU_PINNED_MIN_XFER_SIZE, 512, \
"The minimal buffer size for pinned read/write transfers in KBytes") \
release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \
"The resource cache size in MB") \
+release(size_t, GPU_MAX_SUBALLOC_SIZE, 4096, \
+ "The maximum size accepted for suballocaitons in KB") \
release(uint, GPU_ASYNC_MEM_COPY, 0, \
"Enables async memory transfers with DRM engine") \
release(bool, GPU_FORCE_64BIT_PTR, 0, \