From 7ae94da05b856ea8f678d5565d72e8ec37fbab15 Mon Sep 17 00:00:00 2001
From: foreman <dl.constructicon@amd.com>
Date: Thu, 15 Mar 2018 17:26:25 -0400
Subject: [PATCH] P4 to Git Change 1527848 by gandryey@gera-w8 on 2018/03/15
 17:11:43

	SWDEV-79445 - OCL generic changes and code clean-up
	- Add suballocations support for local(invisible) memory. It should significantly improve memory footprint and TLB usage with 2MB pages
	- Implementation uses BuddyAllocator provided in PAL
	- The chunk allocation size is 64MB, min allocation 4KB and max 4MB. GPU_MAX_SUBALLOC_SIZE controls the max size in KB

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#76 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#56 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#285 edit
---
 rocclr/runtime/device/pal/paldefs.hpp     |    1 +
 rocclr/runtime/device/pal/paldevice.cpp   |   25 +-
 rocclr/runtime/device/pal/paldevice.hpp   |   25 +-
 rocclr/runtime/device/pal/palprogram.cpp  |    4 +-
 rocclr/runtime/device/pal/palresource.cpp | 1427 ++++++++++++---------
 rocclr/runtime/device/pal/palresource.hpp |  101 +-
 rocclr/runtime/device/pal/palsettings.cpp |    6 +
 rocclr/runtime/device/pal/palsettings.hpp |    4 +
 rocclr/runtime/device/pal/palvirtual.cpp  |   14 +-
 rocclr/runtime/device/pal/palvirtual.hpp  |    2 +-
 rocclr/runtime/utils/flags.hpp            |    2 +
 11 files changed, 947 insertions(+), 664 deletions(-)

diff --git a/rocclr/runtime/device/pal/paldefs.hpp b/rocclr/runtime/device/pal/paldefs.hpp
index 7a21fb6852..06740582cf 100644
--- a/rocclr/runtime/device/pal/paldefs.hpp
+++ b/rocclr/runtime/device/pal/paldefs.hpp
@@ -8,6 +8,7 @@
 #include "palGpuMemory.h"
 #include "palImage.h"
 #include "palFormatInfo.h"
+#include "util/palSysMemory.h"
 
 //
 /// Memory Object Type
diff --git a/rocclr/runtime/device/pal/paldevice.cpp b/rocclr/runtime/device/pal/paldevice.cpp
index b8c59aa5df..1a71e3adf4 100644
--- a/rocclr/runtime/device/pal/paldevice.cpp
+++ b/rocclr/runtime/device/pal/paldevice.cpp
@@ -54,6 +54,10 @@ void PalDeviceUnload() { pal::Device::tearDown(); }
 
 namespace pal {
 
+Util::GenericAllocator  NullDevice::allocator_;
+char* Device::platformObj_;
+Pal::IPlatform*  Device::platform_;
+
 NullDevice::Compiler* NullDevice::compiler_;
 AppProfile Device::appProfile_;
 
@@ -183,6 +187,7 @@ bool NullDevice::init() {
   return true;
 }
 
+
 bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
                         uint xNACKSupported) {
   online_ = false;
@@ -736,7 +741,7 @@ bool Device::create(Pal::IDevice* device) {
   if (!amd::Device::create()) {
     return false;
   }
-  resourceList_ = new std::list<GpuMemoryReference*>();
+  resourceList_ = new std::list<Resource*>();
   if (nullptr == resourceList_) {
     return false;
   }
@@ -865,7 +870,7 @@ bool Device::create(Pal::IDevice* device) {
   size_t resourceCacheSize = settings().resourceCacheSize_;
   // Create resource cache.
   // \note Cache must be created before any resource creation to avoid nullptr check
-  resourceCache_ = new ResourceCache(resourceCacheSize);
+  resourceCache_ = new ResourceCache(this, resourceCacheSize);
   if (nullptr == resourceCache_) {
     return false;
   }
@@ -925,8 +930,6 @@ bool Device::create(Pal::IDevice* device) {
   return true;
 }
 
-static Pal::IPlatform* platform;
-
 bool Device::initializeHeapResources() {
   amd::ScopedLock k(lockForInitHeap_);
   if (!heapInitComplete_) {
@@ -998,7 +1001,7 @@ bool Device::initializeHeapResources() {
     xferQueue_->enableSyncedBlit();
 
     // Create RGP capture manager
-    rgpCaptureMgr_ = RgpCaptureMgr::Create(platform, *this);
+    rgpCaptureMgr_ = RgpCaptureMgr::Create(platform_, *this);
   }
   return true;
 }
@@ -1096,8 +1099,6 @@ static int reportHook(int reportType, char* message, int* returnValue) {
 }
 #endif  // _WIN32 & DEBUG
 
-static char* platformObj;
-
 bool Device::init() {
   uint32_t numDevices = 0;
   bool useDeviceList = false;
@@ -1123,7 +1124,7 @@ bool Device::init() {
 #endif  // !defined(WITH_LIGHTNING_COMPILER)
 
   size_t size = Pal::GetPlatformSize();
-  platformObj = new char[size];
+  platformObj_ = new char[size];
   Pal::PlatformCreateInfo info = {};
   info.flags.disableGpuTimeout = true;
 #if !defined(PAL_BUILD_DTIF)
@@ -1138,14 +1139,14 @@ bool Device::init() {
   info.maxSvmSize = static_cast<Pal::gpusize>(OCL_SET_SVM_SIZE * Mi);
 
   // PAL init
-  if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj, &platform)) {
+  if (Pal::Result::Success != Pal::CreatePlatform(info, platformObj_, &platform_)) {
     return false;
   }
 
   // Get the total number of active devices
   // Count up all the devices in the system.
   Pal::IDevice* deviceList[Pal::MaxDevices] = {};
-  platform->EnumerateDevices(&numDevices, &deviceList[0]);
+  platform_->EnumerateDevices(&numDevices, &deviceList[0]);
 
   uint ordinal = 0;
   const char* selectDeviceByName = nullptr;
@@ -1175,8 +1176,8 @@ bool Device::init() {
 }
 
 void Device::tearDown() {
-  platform->Destroy();
-  delete platformObj;
+  platform_->Destroy();
+  delete platformObj_;
 
 #if !defined(WITH_LIGHTNING_COMPILER)
   if (compiler_ != nullptr) {
diff --git a/rocclr/runtime/device/pal/paldevice.hpp b/rocclr/runtime/device/pal/paldevice.hpp
index b92ea73456..fc0640c917 100644
--- a/rocclr/runtime/device/pal/paldevice.hpp
+++ b/rocclr/runtime/device/pal/paldevice.hpp
@@ -120,7 +120,12 @@ class NullDevice : public amd::Device {
   amd::CacheCompilation* cacheCompilation() const { return cacheCompilation_.get(); }
 #endif
 
+  void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); }
+  void  Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
+
  protected:
+  static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
+
   Pal::AsicRevision asicRevision_;  //!< ASIC revision
   Pal::GfxIpLevel ipLevel_;         //!< Device IP level
   const AMDDeviceInfo* hwInfo_;     //!< Device HW info structure
@@ -464,6 +469,9 @@ class Device : public NullDevice {
   //! Returns PAL device properties
   const Pal::DeviceProperties& properties() const { return properties_; }
 
+  //! Returns PAL platform interface
+  Pal::IPlatform* iPlat() const { return platform_; }
+
   //! Returns PAL device interface
   Pal::IDevice* iDev() const { return device_; }
 
@@ -496,19 +504,19 @@ class Device : public NullDevice {
   bool resGLFree(void* GLplatformContext, void* mbResHandle, uint type) const;
 
   //! Adds a resource to the global list
-  void addResource(GpuMemoryReference* mem) const {
+  void addResource(Resource* res) const {
     amd::ScopedLock lock(lockResources());
-    auto findIt = std::find(resourceList_->begin(), resourceList_->end(), mem);
-    mem->events_.resize(numOfVgpus());
+    auto findIt = std::find(resourceList_->begin(), resourceList_->end(), res);
+    res->resizeGpuEvents(numOfVgpus() - 1);
     if (resourceList_->end() == findIt) {
-      resourceList_->push_back(mem);
+      resourceList_->push_back(res);
     }
   }
 
   //! Removes a resource from the global list
-  void removeResource(GpuMemoryReference* mem) const {
+  void removeResource(Resource* res) const {
     amd::ScopedLock lock(lockResources());
-    resourceList_->remove(mem);
+    resourceList_->remove(res);
   }
 
   //! Resizes global resource list to accumulate a new queue
@@ -566,6 +574,9 @@ class Device : public NullDevice {
   bool glAssociate(void* GLplatformContext, void* GLdeviceContext) const;
   bool glDissociate(void* GLplatformContext, void* GLdeviceContext) const;
 
+  static char* platformObj_;          //!< Memory allocated for PAL platform object
+  static Pal::IPlatform*  platform_;  //!< Pointer to the PAL platform object
+
   amd::Context* context_;       //!< A dummy context for internal allocations
   amd::Monitor* lockAsyncOps_;  //!< Lock to serialise all async ops on this device
   amd::Monitor*
@@ -592,7 +603,7 @@ class Device : public NullDevice {
   Pal::IDevice* device_;                 //!< PAL device object
   std::atomic<Pal::gpusize> freeMem[Pal::GpuHeap::GpuHeapCount];  //!< Free memory counter
   amd::Monitor* lockResourceOps_;        //!< Lock to serialise resource access
-  std::list<GpuMemoryReference*>* resourceList_;     //!< Active resource list
+  std::list<Resource*>* resourceList_;   //!< Active resource list
   RgpCaptureMgr*   rgpCaptureMgr_;       //!< RGP capture manager
 };
 
diff --git a/rocclr/runtime/device/pal/palprogram.cpp b/rocclr/runtime/device/pal/palprogram.cpp
index 54c0839063..9e9bb5d356 100644
--- a/rocclr/runtime/device/pal/palprogram.cpp
+++ b/rocclr/runtime/device/pal/palprogram.cpp
@@ -89,14 +89,14 @@ void Segment::copy(size_t offset, const void* src, size_t size) {
     amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
     VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
     Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
-    size_t tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
+    size_t tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
     size_t srcOffs = 0;
     while (size != 0) {
       xferBuf.hostWrite(&gpu, reinterpret_cast<const_address>(src) + srcOffs, 0, tmpSize);
       xferBuf.partialMemCopyTo(gpu, 0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
       size -= tmpSize;
       srcOffs += tmpSize;
-      tmpSize = std::min(static_cast<size_t>(xferBuf.vmSize()), size);
+      tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
     }
     gpu.waitAllEngines();
   }
diff --git a/rocclr/runtime/device/pal/palresource.cpp b/rocclr/runtime/device/pal/palresource.cpp
index 9e67e72b7b..0524ad2c72 100644
--- a/rocclr/runtime/device/pal/palresource.cpp
+++ b/rocclr/runtime/device/pal/palresource.cpp
@@ -28,6 +28,7 @@
 
 namespace pal {
 
+// ================================================================================================
 GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                                const Pal::GpuMemoryCreateInfo& createInfo) {
   Pal::Result result;
@@ -48,10 +49,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
   }
   // Update free memory size counters
   const_cast<Device&>(dev).updateFreeMemory(createInfo.heaps[0], createInfo.size, false);
-  dev.addResource(memRef);
   return memRef;
 }
 
+// ================================================================================================
 GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                                const Pal::PinnedGpuMemoryCreateInfo& createInfo) {
   Pal::Result result;
@@ -71,10 +72,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
   }
   // Update free memory size counters
   const_cast<Device&>(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
-  dev.addResource(memRef);
   return memRef;
 }
 
+// ================================================================================================
 GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                                const Pal::SvmGpuMemoryCreateInfo& createInfo) {
   Pal::Result result;
@@ -94,10 +95,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
   // Update free memory size counters
   const_cast<Device&>(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size,
                                             false);
-  dev.addResource(memRef);
   return memRef;
 }
 
+// ================================================================================================
 GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                                const Pal::ExternalGpuMemoryOpenInfo& openInfo) {
   Pal::Result result;
@@ -116,10 +117,10 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
       return nullptr;
     }
   }
-  dev.addResource(memRef);
   return memRef;
 }
 
+// ================================================================================================
 GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                                const Pal::ExternalImageOpenInfo& openInfo,
                                                Pal::ImageCreateInfo* imgCreateInfo,
@@ -143,33 +144,34 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
       return nullptr;
     }
   }
-  dev.addResource(memRef);
   return memRef;
 }
 
+// ================================================================================================
 GpuMemoryReference::GpuMemoryReference(const Device& dev)
-  : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr), events_(dev.numOfVgpus()) {}
+  : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr)
+{}
 
+// ================================================================================================
 GpuMemoryReference::~GpuMemoryReference() {
   if (gpu_ == nullptr) {
-    {
-      Device::ScopedLockVgpus lock(device_);
-      // Release all memory objects on all virtual GPUs
-      for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
-        device_.vgpus()[idx]->releaseMemory(this, &events_[idx]);
-      }
+    Device::ScopedLockVgpus lock(device_);
+    // Release all memory objects on all virtual GPUs
+    for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
+      device_.vgpus()[idx]->releaseMemory(this);
     }
   } else {
     amd::ScopedLock l(gpu_->execution());
-    gpu_->releaseMemory(this, &events_[gpu_->index()]);
+    gpu_->releaseMemory(this);
   }
   if (device_.vgpus().size() != 0) {
     assert(device_.vgpus()[0] == device_.xferQueue() && "Wrong transfer queue!");
     // Lock the transfer queue, since it's not handled by ScopedLockVgpus
     amd::ScopedLock k(device_.xferMgr().lockXfer());
-    device_.vgpus()[0]->releaseMemory(this, &events_[0]);
+    device_.vgpus()[0]->releaseMemory(this);
   }
 
+  // Destroy PAL object if it's not a suballocation
   if (cpuAddress_ != nullptr) {
     iMem()->Unmap();
   }
@@ -177,9 +179,9 @@ GpuMemoryReference::~GpuMemoryReference() {
     iMem()->Destroy();
     gpuMem_ = nullptr;
   }
-  device_.removeResource(this);
 }
 
+// ================================================================================================
 Resource::Resource(const Device& gpuDev, size_t size)
     : elementSize_(0),
       gpuDevice_(gpuDev),
@@ -188,9 +190,11 @@ Resource::Resource(const Device& gpuDev, size_t size)
       offset_(0),
       curRename_(0),
       memRef_(nullptr),
+      subOffset_(0),
       viewOwner_(nullptr),
       image_(nullptr),
-      hwSrd_(0) {
+      hwSrd_(0),
+      events_(gpuDev.numOfVgpus()) {
   // Fill resource descriptor fields
   desc_.state_ = 0;
   desc_.type_ = Empty;
@@ -213,8 +217,10 @@ Resource::Resource(const Device& gpuDev, size_t size)
   desc_.scratch_ = false;
   desc_.isAllocExecute_ = false;
   desc_.baseLevel_ = 0;
+  gpuDev.addResource(this);
 }
 
+// ================================================================================================
 Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t depth,
                    cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
     : elementSize_(0),
@@ -224,9 +230,11 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
       offset_(0),
       curRename_(0),
       memRef_(nullptr),
+      subOffset_(0),
       viewOwner_(nullptr),
       image_(nullptr),
-      hwSrd_(0) {
+      hwSrd_(0),
+      events_(gpuDev.numOfVgpus()) {
   // Fill resource descriptor fields
   desc_.state_ = 0;
   desc_.type_ = Empty;
@@ -273,8 +281,10 @@ Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t dep
       LogError("Unknown image type!");
       break;
   }
+  gpuDev.addResource(this);
 }
 
+// ================================================================================================
 Resource::~Resource() {
   Pal::GpuHeap heap = Pal::GpuHeapCount;
   switch (memoryType()) {
@@ -313,8 +323,10 @@ Resource::~Resource() {
     image_->Destroy();
     delete[] reinterpret_cast<char*>(image_);
   }
+  gpuDevice_.removeResource(this);
 }
 
+// ================================================================================================
 static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
   static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
                                         HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
@@ -338,6 +350,7 @@ static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
   return FormatType[idx];
 }
 
+// ================================================================================================
 static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
   static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
                                        HSA_EXT_IMAGE_CHANNEL_ORDER_A,
@@ -365,6 +378,7 @@ static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
   return OrderType[idx];
 }
 
+// ================================================================================================
 void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
   createInfo->heapCount = 1;
   switch (memoryType()) {
@@ -400,18 +414,623 @@ void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
   }
 }
 
-bool Resource::create(MemoryType memType, CreateParams* params) {
-  static const Pal::gpusize MaxGpuAlignment = 64 * Ki;
-  const amd::HostMemoryReference* hostMemRef = nullptr;
-  bool imageCreateView = false;
+// ================================================================================================
+bool Resource::CreateImage(CreateParams* params)
+{
+  Pal::Result result;
+  Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
+  Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+  Pal::ChannelMapping channels;
+  Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
+
+  if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+    if (memoryType() == ImageBuffer) {
+      ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
+      viewOwner_ = imageBuffer->resource_;
+      memRef_ = viewOwner_->memRef_;
+      memRef_->retain();
+      desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
+    }
+    else {
+      Pal::GpuMemoryCreateInfo createInfo = {};
+      createInfo.size = desc().width_ * elementSize();
+      createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
+      createInfo.alignment = MaxGpuAlignment;
+      createInfo.vaRange = Pal::VaRange::Default;
+      createInfo.priority = Pal::GpuMemPriority::Normal;
+      memTypeToHeap(&createInfo);
+      // createInfo.priority;
+      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+        createInfo.alignment, &subOffset_);
+      if (nullptr == memRef_) {
+        memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+        if (nullptr == memRef_) {
+          LogError("Failed PAL memory allocation!");
+          return false;
+        }
+      }
+      offset_ += static_cast<size_t>(subOffset_);
+    }
+    // Check if memory is locked already and restore CPU pointer
+    if (memRef_->cpuAddress_ != nullptr) {
+      address_ = memRef_->cpuAddress_;
+      memRef_->cpuAddress_ = nullptr;
+      mapCount_++;
+    }
+    Pal::BufferViewInfo viewInfo = {};
+    viewInfo.gpuAddr = vmAddress();
+    viewInfo.range = memRef_->iMem()->Desc().size;
+    viewInfo.stride = elementSize();
+    viewInfo.swizzledFormat.format = format;
+    viewInfo.swizzledFormat.swizzle = channels;
+    // viewInfo.channels = channels;
+    hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
+    if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+      return false;
+    }
+
+    dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
+    hwState_[8] = GetHSAILImageFormatType(desc().format_);
+    hwState_[9] = GetHSAILImageOrderType(desc().format_);
+    hwState_[10] = static_cast<uint32_t>(desc().width_);
+    hwState_[11] = 0;  // one extra reserved field in the argument
+    return true;
+  }
+
+  Pal::ImageViewInfo viewInfo = {};
+  Pal::ImageCreateInfo imgCreateInfo = {};
+  Pal::GpuMemoryRequirements req = {};
+  imgCreateInfo.imageType = Pal::ImageType::Tex2d;
+  viewInfo.viewType = Pal::ImageViewType::Tex2d;
+  imgCreateInfo.extent.width = desc_.width_;
+  imgCreateInfo.extent.height = desc_.height_;
+  imgCreateInfo.extent.depth = desc_.depth_;
+  imgCreateInfo.arraySize = 1;
+
+  switch (desc_.topology_) {
+  case CL_MEM_OBJECT_IMAGE3D:
+    imgCreateInfo.imageType = Pal::ImageType::Tex3d;
+    viewInfo.viewType = Pal::ImageViewType::Tex3d;
+    break;
+  case CL_MEM_OBJECT_IMAGE1D:
+  case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+  case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+    imgCreateInfo.imageType = Pal::ImageType::Tex1d;
+    viewInfo.viewType = Pal::ImageViewType::Tex1d;
+    break;
+  }
+  if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+    ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
+    imgCreateInfo.extent.depth = desc_.height_;
+    imgCreateInfo.extent.height = 1;
+  }
+  if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+    ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
+  }
+
+  if (memoryType() == ImageView) {
+    ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
+    ImgSubresRange.startSubres.mipLevel = imageView->level_;
+    desc_.baseLevel_ = imageView->level_;
+    ImgSubresRange.startSubres.arraySlice = imageView->layer_;
+    viewOwner_ = imageView->resource_;
+    image_ = viewOwner_->image_;
+  }
+  else if (memoryType() == ImageBuffer) {
+    ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
+    viewOwner_ = imageBuffer->resource_;
+  }
+  if (nullptr != viewOwner_) {
+    offset_ = viewOwner_->offset();
+  }
+  ImgSubresRange.numMips = desc().mipLevels_;
+
+  if ((memoryType() != ImageView) ||
+    //! @todo PAL doesn't allow an SRD view creation with different pixel size
+    (elementSize() != viewOwner_->elementSize())) {
+    imgCreateInfo.usageFlags.shaderRead = true;
+    imgCreateInfo.usageFlags.shaderWrite =
+      (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
+    imgCreateInfo.swizzledFormat.format = format;
+    imgCreateInfo.swizzledFormat.swizzle = channels;
+    imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
+    imgCreateInfo.samples = 1;
+    imgCreateInfo.fragments = 1;
+    Pal::ImageTiling tiling = Pal::ImageTiling::Optimal;
+    uint32_t rowPitch = 0;
+
+    if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
+      (memoryType() == ImageBuffer)) {
+      tiling = Pal::ImageTiling::Linear;
+    }
+    else if (memoryType() == ImageView) {
+      tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
+      // Find the new pitch in pixels for the new format
+      rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
+    }
+
+    if (memoryType() == ImageBuffer) {
+      if ((params->owner_ != NULL) && params->owner_->asImage() &&
+        (params->owner_->asImage()->getRowPitch() != 0)) {
+        rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
+      }
+      else {
+        rowPitch = desc().width_;
+      }
+    }
+    desc_.pitch_ = rowPitch;
+    // Make sure the row pitch is aligned to pixels
+    imgCreateInfo.rowPitch =
+      elementSize() * amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
+    imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+    imgCreateInfo.tiling = tiling;
+
+    size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
+    if (result != Pal::Result::Success) {
+      return false;
+    }
+
+    char* memImg = new char[imageSize];
+    if (memImg != nullptr) {
+      result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
+      if (result != Pal::Result::Success) {
+        delete[] memImg;
+        return false;
+      }
+    }
+    image_->GetGpuMemoryRequirements(&req);
+    // createInfo.priority;
+  }
+
+  if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
+    Pal::GpuMemoryCreateInfo createInfo = {};
+    createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
+    createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
+    createInfo.vaRange = Pal::VaRange::Default;
+    createInfo.priority = Pal::GpuMemPriority::Normal;
+    memTypeToHeap(&createInfo);
+
+    memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+      createInfo.alignment, &subOffset_);
+    if (nullptr == memRef_) {
+      memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+      if (nullptr == memRef_) {
+        LogError("Failed PAL memory allocation!");
+        return false;
+      }
+    }
+    offset_ += static_cast<size_t>(subOffset_);
+  }
+  else {
+    memRef_ = viewOwner_->memRef_;
+    memRef_->retain();
+    desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
+    if (req.size > viewOwner_->iMem()->Desc().size) {
+      LogWarning("Image is bigger than the original mem object!");
+    }
+  }
+  // Check if memory is locked already and restore CPU pointer
+  if (memRef_->cpuAddress_ != nullptr) {
+    address_ = memRef_->cpuAddress_;
+    memRef_->cpuAddress_ = nullptr;
+    mapCount_++;
+  }
+  result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
+  if (result != Pal::Result::Success) {
+    return false;
+  }
+
+  hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
+  if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+    return false;
+  }
+  viewInfo.pImage = image_;
+  viewInfo.swizzledFormat.format = format;
+  viewInfo.swizzledFormat.swizzle = channels;
+  viewInfo.subresRange = ImgSubresRange;
+  dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+
+  hwState_[8] = GetHSAILImageFormatType(desc().format_);
+  hwState_[9] = GetHSAILImageOrderType(desc().format_);
+  hwState_[10] = static_cast<uint32_t>(desc().width_);
+  hwState_[11] = 0;  // one extra reserved field in the argument
+  return true;
+}
+
+// ================================================================================================
+bool Resource::CreateInterop(CreateParams* params)
+{
+  Pal::Result result;
+  Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
+  Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+  Pal::ChannelMapping channels;
+  Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
+  Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
+  Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
+  uint misc = 0;
+  uint layer = 0;
+  uint mipLevel = 0;
+  InteropType type = InteropTypeless;
+
+  if (memoryType() == OGLInterop) {
+    OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
+    assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
+    switch (oglRes->type_) {
+    case InteropVertexBuffer:
+      glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
+      break;
+    case InteropRenderBuffer:
+      glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
+      break;
+    case InteropTexture:
+    case InteropTextureViewLevel:
+    case InteropTextureViewCube:
+      glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
+      break;
+    default:
+      LogError("Unknown OGL interop type!");
+      return false;
+      break;
+    }
+    glPlatformContext_ = oglRes->glPlatformContext_;
+    layer = oglRes->layer_;
+    type = oglRes->type_;
+    mipLevel = oglRes->mipLevel_;
+
+    if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
+      &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
+#ifdef ATI_OS_WIN
+      , openInfo.doppDesktopInfo
+#endif
+    )) {
+      return false;
+    }
+    desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
+    format = dev().getPalFormat(desc().format_, &channels);
+  }
+#ifdef ATI_OS_WIN	
+  else {
+    D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
+    openInfo.hExternalResource = d3dRes->handle_;
+    misc = d3dRes->misc;
+    layer = d3dRes->layer_;
+    type = d3dRes->type_;
+    mipLevel = d3dRes->mipLevel_;
+  }
+#endif
+  //! @todo PAL query for image/buffer object doesn't work properly!
+#if 0
+  bool    isImage = false;
+  if (Pal::Result::Success !=
+    dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
+    return false;
+  }
+#endif  // 0
+  if (desc().buffer_ || misc) {
+    memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
+    if (nullptr == memRef_) {
+      return false;
+    }
+
+    if (misc) {
+      Pal::ImageCreateInfo imgCreateInfo = {};
+      Pal::ExternalImageOpenInfo imgOpenInfo = {};
+      imgOpenInfo.resourceInfo = openInfo;
+      imgOpenInfo.swizzledFormat.format = format;
+      imgOpenInfo.swizzledFormat.swizzle = channels;
+      imgOpenInfo.usage.shaderRead = true;
+      imgOpenInfo.usage.shaderWrite = true;
+      size_t imageSize;
+      size_t gpuMemSize;
+
+      if (Pal::Result::Success !=
+        dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
+          &imgCreateInfo)) {
+        return false;
+      }
+
+      Pal::gpusize viewOffset = 0;
+      imgCreateInfo.flags.shareable = false;
+      imgCreateInfo.imageType = Pal::ImageType::Tex2d;
+      imgCreateInfo.extent.width = desc().width_;
+      imgCreateInfo.extent.height = desc().height_;
+      imgCreateInfo.extent.depth = desc().depth_;
+      imgCreateInfo.arraySize = 1;
+      imgCreateInfo.usageFlags.shaderRead = true;
+      imgCreateInfo.usageFlags.shaderWrite = true;
+      imgCreateInfo.swizzledFormat.format = format;
+      imgCreateInfo.swizzledFormat.swizzle = channels;
+      imgCreateInfo.mipLevels = 1;
+      imgCreateInfo.samples = 1;
+      imgCreateInfo.fragments = 1;
+      imgCreateInfo.tiling = Pal::ImageTiling::Linear;
+      imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
+
+      switch (misc) {
+      case 1:  // NV12 format
+        switch (layer) {
+        case -1:
+        case 0:
+          break;
+        case 1:
+          // Y - plane size to the offset
+          // NV12 format. UV is 2 times smaller plane Y
+          viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+          imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+          break;
+        default:
+          LogError("Unknown Interop View Type");
+          return false;
+        }
+        break;
+      case 2:  // YV12 format
+        switch (layer) {
+        case -1:
+        case 0:
+          break;
+        case 1:
+          // Y - plane size to the offset
+          // YV12 format. U is 4 times smaller plane than Y
+          viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+          imgCreateInfo.rowPitch >>= 1;
+          break;
+        case 2:
+          // Y + U plane sizes to the offest.
+          // U plane is 4 times smaller than Y and U == V
+          viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
+          imgCreateInfo.rowPitch >>= 1;
+          break;
+        default:
+          LogError("Unknown Interop View Type");
+          return false;
+        }
+        imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+        break;
+      case 3:  // YUY2 format
+        imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+        break;
+      default:
+        LogError("Unknown Interop View Type");
+        return false;
+      }
+
+      imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
+      if (result != Pal::Result::Success) {
+        return false;
+      }
+
+      char* memImg = new char[imageSize];
+      if (memImg != nullptr) {
+        result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
+        if (result != Pal::Result::Success) {
+          delete[] memImg;
+          return false;
+        }
+      }
+      offset_ += static_cast<size_t>(viewOffset);
+      result = image_->BindGpuMemory(iMem(), offset_);
+      if (result != Pal::Result::Success) {
+        return false;
+      }
+      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
+      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+        return false;
+      }
+      Pal::ImageViewInfo viewInfo = {};
+      viewInfo.viewType = Pal::ImageViewType::Tex2d;
+      viewInfo.pImage = image_;
+      viewInfo.swizzledFormat.format = format;
+      viewInfo.swizzledFormat.swizzle = channels;
+      viewInfo.subresRange = ImgSubresRange;
+      dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+
+      hwState_[8] = GetHSAILImageFormatType(desc().format_);
+      hwState_[9] = GetHSAILImageOrderType(desc().format_);
+      hwState_[10] = static_cast<uint32_t>(desc().width_);
+      hwState_[11] = 0;  // one extra reserved field in the argument
+    }
+  }
+  else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+    memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
+    if (nullptr == memRef_) {
+      return false;
+    }
+    Pal::BufferViewInfo viewInfo = {};
+    viewInfo.gpuAddr = vmAddress();
+    viewInfo.range = memRef_->iMem()->Desc().size;
+    viewInfo.stride = elementSize();
+    viewInfo.swizzledFormat.format = format;
+    viewInfo.swizzledFormat.swizzle = channels;
+    hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
+    if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+      return false;
+    }
+
+    dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
+    hwState_[8] = GetHSAILImageFormatType(desc().format_);
+    hwState_[9] = GetHSAILImageOrderType(desc().format_);
+    hwState_[10] = static_cast<uint32_t>(desc().width_);
+    hwState_[11] = 0;  // one extra reserved field in the argument
+  }
+  else {
+    Pal::ExternalImageOpenInfo imgOpenInfo = {};
+    Pal::ImageCreateInfo imgCreateInfo = {};
+    imgOpenInfo.resourceInfo = openInfo;
+    imgOpenInfo.swizzledFormat.format = format;
+    imgOpenInfo.swizzledFormat.swizzle = channels;
+    imgOpenInfo.usage.shaderRead = true;
+    imgOpenInfo.usage.shaderWrite = true;
+    memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
+    if (nullptr == memRef_) {
+      return false;
+    }
+
+    hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
+    if ((0 == hwSrd_) && (memoryType() != ImageView)) {
+      return false;
+    }
+    Pal::ImageViewInfo viewInfo = {};
+    viewInfo.viewType = Pal::ImageViewType::Tex2d;
+    switch (imgCreateInfo.imageType) {
+    case Pal::ImageType::Tex3d:
+      viewInfo.viewType = Pal::ImageViewType::Tex3d;
+      break;
+    case Pal::ImageType::Tex1d:
+      viewInfo.viewType = Pal::ImageViewType::Tex1d;
+      break;
+    default:
+      break;
+    }
+    viewInfo.pImage = image_;
+    viewInfo.swizzledFormat.format = format;
+    viewInfo.swizzledFormat.swizzle = channels;
+    if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
+      ImgSubresRange.startSubres.mipLevel = mipLevel;
+      if (type == InteropTextureViewCube) {
+        ImgSubresRange.startSubres.arraySlice = layer;
+        viewInfo.viewType = Pal::ImageViewType::Tex2d;
+      }
+    }
+    if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+      ImgSubresRange.numSlices = desc_.height_;
+    }
+    if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+      ImgSubresRange.numSlices = desc_.depth_;
+    }
+    ImgSubresRange.numMips = desc().mipLevels_;
+    viewInfo.subresRange = ImgSubresRange;
+
+    dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
+    //! It's a workaround for D24S8 format, since PAL doesn't support this format
+    //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
+    if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
+      (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
+      hwState_[1] &= ~0x3c000000;
+      hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
+    }
+    hwState_[8] = GetHSAILImageFormatType(desc().format_);
+    hwState_[9] = GetHSAILImageOrderType(desc().format_);
+    hwState_[10] = static_cast<uint32_t>(desc().width_);
+    hwState_[11] = 0;  // one extra reserved field in the argument
+  }
+  return true;
+}
+
+// ================================================================================================
+bool Resource::CreatePinned(CreateParams* params)
+{
+  PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
+  size_t allocSize = pinned->size_;
+  const amd::HostMemoryReference* hostMemRef = pinned->hostMemRef_;
+  void* pinAddress = address_ = hostMemRef->hostMem();
   uint hostMemOffset = 0;
+  // assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
+  if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
+    // Allign offset to 4K boundary (Vista/Win7 limitation)
+    char* tmpHost = const_cast<char*>(
+      amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));
+
+    // Find the partial size for unaligned copy
+    hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
+
+    offset_ = hostMemOffset;
+
+    pinAddress = tmpHost;
+
+    if (hostMemOffset != 0) {
+      allocSize += hostMemOffset;
+    }
+    allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
+    //            hostMemOffset &= ~(0xff);
+  }
+  else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
+    //! @todo: Width has to be aligned for 3D.
+    //! Need to be replaced with a compute copy
+    // Width aligned by 8 texels
+    if (((desc().width_ % 0x8) != 0) ||
+      // Pitch aligned by 64 bytes
+      (((desc().width_ * elementSize()) % 0x40) != 0)) {
+      return false;
+    }
+  }
+  else {
+    //! @todo GSL doesn't support pinning with resAlloc_
+    return false;
+  }
+
+  if (dev().settings().svmFineGrainSystem_) {
+    desc_.SVMRes_ = true;
+  }
+
+  // Ensure page alignment
+  if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
+    return false;
+  }
+  Pal::PinnedGpuMemoryCreateInfo createInfo = {};
+  createInfo.pSysMem = pinAddress;
+  createInfo.size = allocSize;
+  createInfo.vaRange = Pal::VaRange::Default;
+  memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+  if (nullptr == memRef_) {
+    LogError("Failed PAL memory allocation!");
+    return false;
+  }
+  desc_.cardMemory_ = false;
+  return true;
+}
+
+// ================================================================================================
+bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
+{
+  size_t allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
+  if ((memoryType() == RemoteUSWC) || (memoryType() == Remote)) {
+    Pal::SvmGpuMemoryCreateInfo createInfo = {};
+    createInfo.isUsedForKernel = desc_.isAllocExecute_;
+    createInfo.size = allocSize;
+    createInfo.alignment = MaxGpuAlignment;
+    if (svmPtr != 0) {
+      createInfo.flags.useReservedGpuVa = true;
+      createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
+    }
+    else {
+      createInfo.flags.useReservedGpuVa = false;
+      createInfo.pReservedGpuVaOwner = nullptr;
+    }
+    memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+  }
+  else {
+    Pal::GpuMemoryCreateInfo createInfo = {};
+    createInfo.size = allocSize;
+    createInfo.alignment = MaxGpuAlignment;
+    createInfo.vaRange = Pal::VaRange::Svm;
+    createInfo.priority = Pal::GpuMemPriority::Normal;
+    if (svmPtr != 0) {
+      createInfo.flags.useReservedGpuVa = true;
+      createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
+    }
+    memTypeToHeap(&createInfo);
+    memRef_ = GpuMemoryReference::Create(dev(), createInfo);
+  }
+  if (nullptr == memRef_) {
+    LogError("Failed PAL memory allocation!");
+    return false;
+  }
+  desc_.cardMemory_ = false;
+  if ((nullptr != params) && (nullptr != params->owner_) &&
+    (nullptr != params->owner_->getSvmPtr())) {
+    params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
+  }
+  return true;
+}
+
+// ================================================================================================
+bool Resource::create(MemoryType memType, CreateParams* params) {
+  bool imageCreateView = false;
   bool foundCalRef = false;
   bool viewDefined = false;
   uint viewLayer = 0;
   uint viewLevel = 0;
   uint viewFlags = 0;
-  Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
-  Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
   Pal::ChannelMapping channels;
   Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
 
@@ -453,486 +1072,17 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
     desc_.type_ = RemoteUSWC;
   }
 
-  Pal::Result result;
-
   if ((memoryType() == OGLInterop) || (memoryType() == D3D9Interop) ||
       (memoryType() == D3D10Interop) || (memoryType() == D3D11Interop)) {
-    Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
-    Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
-    uint misc = 0;
-    uint layer = 0;
-    uint mipLevel = 0;
-    InteropType type = InteropTypeless;
-
-    if (memoryType() == OGLInterop) {
-      OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
-      assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
-      switch (oglRes->type_) {
-        case InteropVertexBuffer:
-          glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
-          break;
-        case InteropRenderBuffer:
-          glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
-          break;
-        case InteropTexture:
-        case InteropTextureViewLevel:
-        case InteropTextureViewCube:
-          glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
-          break;
-        default:
-          LogError("Unknown OGL interop type!");
-          return false;
-          break;
-      }
-      glPlatformContext_ = oglRes->glPlatformContext_;
-      layer = oglRes->layer_;
-      type = oglRes->type_;
-      mipLevel = oglRes->mipLevel_;
-
-      if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
-                                &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
-#ifdef ATI_OS_WIN
-								, openInfo.doppDesktopInfo
-#endif
-								)) {
-        return false;
-      }
-      desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
-      format = dev().getPalFormat(desc().format_, &channels);
-    }
-#ifdef ATI_OS_WIN	
-    else {
-      D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
-      openInfo.hExternalResource = d3dRes->handle_;
-      misc = d3dRes->misc;
-      layer = d3dRes->layer_;
-      type = d3dRes->type_;
-      mipLevel = d3dRes->mipLevel_;
-    }
-#endif
-//! @todo PAL query for image/buffer object doesn't work properly!
-#if 0
-        bool    isImage = false;
-        if (Pal::Result::Success !=
-            dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
-            return false;
-        }
-#endif  // 0
-    if (desc().buffer_ || misc) {
-      memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
-      if (nullptr == memRef_) {
-        return false;
-      }
-
-      if (misc) {
-        Pal::ImageCreateInfo imgCreateInfo = {};
-        Pal::ExternalImageOpenInfo imgOpenInfo = {};
-        imgOpenInfo.resourceInfo = openInfo;
-        imgOpenInfo.swizzledFormat.format = format;
-        imgOpenInfo.swizzledFormat.swizzle = channels;
-        imgOpenInfo.usage.shaderRead = true;
-        imgOpenInfo.usage.shaderWrite = true;
-        size_t imageSize;
-        size_t gpuMemSize;
-
-        if (Pal::Result::Success !=
-            dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
-                                                      &imgCreateInfo)) {
-          return false;
-        }
-
-        Pal::gpusize viewOffset = 0;
-        imgCreateInfo.flags.shareable = false;
-        imgCreateInfo.imageType = Pal::ImageType::Tex2d;
-        imgCreateInfo.extent.width = desc().width_;
-        imgCreateInfo.extent.height = desc().height_;
-        imgCreateInfo.extent.depth = desc().depth_;
-        imgCreateInfo.arraySize = 1;
-        imgCreateInfo.usageFlags.shaderRead = true;
-        imgCreateInfo.usageFlags.shaderWrite = true;
-        imgCreateInfo.swizzledFormat.format = format;
-        imgCreateInfo.swizzledFormat.swizzle = channels;
-        imgCreateInfo.mipLevels = 1;
-        imgCreateInfo.samples = 1;
-        imgCreateInfo.fragments = 1;
-        imgCreateInfo.tiling = Pal::ImageTiling::Linear;
-        imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
-
-        switch (misc) {
-          case 1:  // NV12 format
-            switch (layer) {
-              case -1:
-                break;
-              case 0:
-                break;
-              case 1:
-                // Y - plane size to the offset
-                // NV12 format. UV is 2 times smaller plane Y
-                viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
-                imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-                break;
-              default:
-                LogError("Unknown Interop View Type");
-                return false;
-            }
-            break;
-          case 2:  // YV12 format
-            switch (layer) {
-              case -1:
-                break;
-              case 0:
-                break;
-              case 1:
-                // Y - plane size to the offset
-                // YV12 format. U is 4 times smaller plane than Y
-                viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
-                imgCreateInfo.rowPitch >>= 1;
-                break;
-              case 2:
-                // Y + U plane sizes to the offest.
-                // U plane is 4 times smaller than Y and U == V
-                viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
-                imgCreateInfo.rowPitch >>= 1;
-                break;
-              default:
-                LogError("Unknown Interop View Type");
-                return false;
-            }
-            imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-            break;
-          case 3:  // YUY2 format
-            imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-            break;
-          default:
-            LogError("Unknown Interop View Type");
-            return false;
-        }
-
-        imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
-        if (result != Pal::Result::Success) {
-          return false;
-        }
-
-        char* memImg = new char[imageSize];
-        if (memImg != nullptr) {
-          result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
-          if (result != Pal::Result::Success) {
-            delete [] memImg;
-            return false;
-          }
-        }
-        result = image_->BindGpuMemory(iMem(), viewOffset);
-        if (result != Pal::Result::Success) {
-          return false;
-        }
-        offset_ = static_cast<size_t>(viewOffset);
-        hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
-        if ((0 == hwSrd_) && (memoryType() != ImageView)) {
-          return false;
-        }
-        Pal::ImageViewInfo viewInfo = {};
-        viewInfo.viewType = Pal::ImageViewType::Tex2d;
-        viewInfo.pImage = image_;
-        viewInfo.swizzledFormat.format = format;
-        viewInfo.swizzledFormat.swizzle = channels;
-        viewInfo.subresRange = ImgSubresRange;
-        dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
-
-        hwState_[8] = GetHSAILImageFormatType(desc().format_);
-        hwState_[9] = GetHSAILImageOrderType(desc().format_);
-        hwState_[10] = static_cast<uint32_t>(desc().width_);
-        hwState_[11] = 0;  // one extra reserved field in the argument
-      }
-    } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
-      memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
-      if (nullptr == memRef_) {
-        return false;
-      }
-      Pal::BufferViewInfo viewInfo = {};
-      viewInfo.gpuAddr = vmAddress();
-      viewInfo.range = memRef_->iMem()->Desc().size;
-      viewInfo.stride = elementSize();
-      viewInfo.swizzledFormat.format = format;
-      viewInfo.swizzledFormat.swizzle = channels;
-      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
-      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
-        return false;
-      }
-
-      dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
-      hwState_[8] = GetHSAILImageFormatType(desc().format_);
-      hwState_[9] = GetHSAILImageOrderType(desc().format_);
-      hwState_[10] = static_cast<uint32_t>(desc().width_);
-      hwState_[11] = 0;  // one extra reserved field in the argument
-    } else {
-      Pal::ExternalImageOpenInfo imgOpenInfo = {};
-      Pal::ImageCreateInfo imgCreateInfo = {};
-      imgOpenInfo.resourceInfo = openInfo;
-      imgOpenInfo.swizzledFormat.format = format;
-      imgOpenInfo.swizzledFormat.swizzle = channels;
-      imgOpenInfo.usage.shaderRead = true;
-      imgOpenInfo.usage.shaderWrite = true;
-      memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
-      if (nullptr == memRef_) {
-        return false;
-      }
-
-      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
-      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
-        return false;
-      }
-      Pal::ImageViewInfo viewInfo = {};
-      viewInfo.viewType = Pal::ImageViewType::Tex2d;
-      switch (imgCreateInfo.imageType) {
-        case Pal::ImageType::Tex3d:
-          viewInfo.viewType = Pal::ImageViewType::Tex3d;
-          break;
-        case Pal::ImageType::Tex1d:
-          viewInfo.viewType = Pal::ImageViewType::Tex1d;
-          break;
-        default:
-          break;
-      }
-      viewInfo.pImage = image_;
-      viewInfo.swizzledFormat.format = format;
-      viewInfo.swizzledFormat.swizzle = channels;
-      if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
-        ImgSubresRange.startSubres.mipLevel = mipLevel;
-        if (type == InteropTextureViewCube) {
-          ImgSubresRange.startSubres.arraySlice = layer;
-          viewInfo.viewType = Pal::ImageViewType::Tex2d;
-        }
-      }
-      if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
-        ImgSubresRange.numSlices = desc_.height_;
-      }
-      if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
-        ImgSubresRange.numSlices = desc_.depth_;
-      }
-      ImgSubresRange.numMips = desc().mipLevels_;
-      viewInfo.subresRange = ImgSubresRange;
-
-      dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
-      //! It's a workaround for D24S8 format, since PAL doesn't support this format
-      //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
-      if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
-          (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
-        hwState_[1] &= ~0x3c000000;
-        hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
-      }
-      hwState_[8] = GetHSAILImageFormatType(desc().format_);
-      hwState_[9] = GetHSAILImageOrderType(desc().format_);
-      hwState_[10] = static_cast<uint32_t>(desc().width_);
-      hwState_[11] = 0;  // one extra reserved field in the argument
-    }
-    return true;
+    return CreateInterop(params);
   }
 
   if (!desc_.buffer_) {
-    if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
-      if (memoryType() == ImageBuffer) {
-        ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
-        viewOwner_ = imageBuffer->resource_;
-        memRef_ = viewOwner_->memRef_;
-        memRef_->retain();
-        desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
-      } else {
-        Pal::GpuMemoryCreateInfo createInfo = {};
-        createInfo.size = desc().width_ * elementSize();
-        // @todo 64K alignment is too big
-        createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
-        createInfo.alignment = MaxGpuAlignment;
-        createInfo.vaRange = Pal::VaRange::Default;
-        createInfo.priority = Pal::GpuMemPriority::Normal;
-        memTypeToHeap(&createInfo);
-        // createInfo.priority;
-        memRef_ =
-            dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
-        if (nullptr == memRef_) {
-          memRef_ = GpuMemoryReference::Create(dev(), createInfo);
-          if (nullptr == memRef_) {
-            LogError("Failed PAL memory allocation!");
-            return false;
-          }
-        }
-      }
-      // Check if memory is locked already and restore CPU pointer
-      if (memRef_->cpuAddress_ != nullptr) {
-        address_ = memRef_->cpuAddress_;
-        memRef_->cpuAddress_ = nullptr;
-        mapCount_++;
-      }
-      Pal::BufferViewInfo viewInfo = {};
-      viewInfo.gpuAddr = vmAddress();
-      viewInfo.range = memRef_->iMem()->Desc().size;
-      viewInfo.stride = elementSize();
-      viewInfo.swizzledFormat.format = format;
-      viewInfo.swizzledFormat.swizzle = channels;
-      // viewInfo.channels = channels;
-      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
-      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
-        return false;
-      }
-
-      dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
-      hwState_[8] = GetHSAILImageFormatType(desc().format_);
-      hwState_[9] = GetHSAILImageOrderType(desc().format_);
-      hwState_[10] = static_cast<uint32_t>(desc().width_);
-      hwState_[11] = 0;  // one extra reserved field in the argument
-      return true;
-    }
-
-    Pal::ImageViewInfo viewInfo = {};
-    Pal::ImageCreateInfo imgCreateInfo = {};
-    Pal::GpuMemoryRequirements req = {};
-    imgCreateInfo.imageType = Pal::ImageType::Tex2d;
-    viewInfo.viewType = Pal::ImageViewType::Tex2d;
-    imgCreateInfo.extent.width = desc_.width_;
-    imgCreateInfo.extent.height = desc_.height_;
-    imgCreateInfo.extent.depth = desc_.depth_;
-    imgCreateInfo.arraySize = 1;
-
-    switch (desc_.topology_) {
-      case CL_MEM_OBJECT_IMAGE3D:
-        imgCreateInfo.imageType = Pal::ImageType::Tex3d;
-        viewInfo.viewType = Pal::ImageViewType::Tex3d;
-        break;
-      case CL_MEM_OBJECT_IMAGE1D:
-      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-      case CL_MEM_OBJECT_IMAGE1D_BUFFER:
-        imgCreateInfo.imageType = Pal::ImageType::Tex1d;
-        viewInfo.viewType = Pal::ImageViewType::Tex1d;
-        break;
-    }
-    if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
-      ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
-      imgCreateInfo.extent.depth = desc_.height_;
-      imgCreateInfo.extent.height = 1;
-    }
-    if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
-      ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
-    }
-
-    if (memoryType() == ImageView) {
-      ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
-      ImgSubresRange.startSubres.mipLevel = imageView->level_;
-      desc_.baseLevel_ = imageView->level_;
-      ImgSubresRange.startSubres.arraySlice = imageView->layer_;
-      viewOwner_ = imageView->resource_;
-      image_ = viewOwner_->image_;
-      offset_ = viewOwner_->offset_;
-    } else if (memoryType() == ImageBuffer) {
-      ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
-      viewOwner_ = imageBuffer->resource_;
-    }
-    ImgSubresRange.numMips = desc().mipLevels_;
-
-    if ((memoryType() != ImageView) ||
-        //! @todo PAL doesn't allow an SRD view creation with different pixel size
-        (elementSize() != viewOwner_->elementSize())) {
-      imgCreateInfo.usageFlags.shaderRead = true;
-      imgCreateInfo.usageFlags.shaderWrite =
-          (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
-      imgCreateInfo.swizzledFormat.format = format;
-      imgCreateInfo.swizzledFormat.swizzle = channels;
-      imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
-      imgCreateInfo.samples = 1;
-      imgCreateInfo.fragments = 1;
-      Pal::ImageTiling tiling = Pal::ImageTiling::Optimal;
-      uint32_t rowPitch = 0;
-
-      if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
-          (memoryType() == ImageBuffer)) {
-        tiling = Pal::ImageTiling::Linear;
-      } else if (memoryType() == ImageView) {
-        tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
-        // Find the new pitch in pixels for the new format
-        rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
-      }
-
-      if (memoryType() == ImageBuffer) {
-        if ((params->owner_ != NULL) && params->owner_->asImage() &&
-            (params->owner_->asImage()->getRowPitch() != 0)) {
-          rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
-        } else {
-          rowPitch = desc().width_;
-        }
-      }
-      desc_.pitch_ = rowPitch;
-      // Make sure the row pitch is aligned to pixels
-      imgCreateInfo.rowPitch =
-          elementSize() * amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
-      imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-      imgCreateInfo.tiling = tiling;
-
-      size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
-      if (result != Pal::Result::Success) {
-        return false;
-      }
-
-      char* memImg = new char[imageSize];
-      if (memImg != nullptr) {
-        result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
-        if (result != Pal::Result::Success) {
-          delete [] memImg;
-          return false;
-        }
-      }
-      image_->GetGpuMemoryRequirements(&req);
-      // createInfo.priority;
-    }
-
-    if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
-      Pal::GpuMemoryCreateInfo createInfo = {};
-      createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
-      createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
-      createInfo.vaRange = Pal::VaRange::Default;
-      createInfo.priority = Pal::GpuMemPriority::Normal;
-      memTypeToHeap(&createInfo);
-
-      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
-      if (nullptr == memRef_) {
-        memRef_ = GpuMemoryReference::Create(dev(), createInfo);
-        if (nullptr == memRef_) {
-          LogError("Failed PAL memory allocation!");
-          return false;
-        }
-      }
-    } else {
-      memRef_ = viewOwner_->memRef_;
-      memRef_->retain();
-      desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
-      if (req.size > viewOwner_->iMem()->Desc().size) {
-        LogWarning("Image is bigger than the original mem object!");
-      }
-    }
-    // Check if memory is locked already and restore CPU pointer
-    if (memRef_->cpuAddress_ != nullptr) {
-      address_ = memRef_->cpuAddress_;
-      memRef_->cpuAddress_ = nullptr;
-      mapCount_++;
-    }
-
-    result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
-    if (result != Pal::Result::Success) {
-      return false;
-    }
-
-    hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
-    if ((0 == hwSrd_) && (memoryType() != ImageView)) {
-      return false;
-    }
-    viewInfo.pImage = image_;
-    viewInfo.swizzledFormat.format = format;
-    viewInfo.swizzledFormat.swizzle = channels;
-    viewInfo.subresRange = ImgSubresRange;
-    dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
-
-    hwState_[8] = GetHSAILImageFormatType(desc().format_);
-    hwState_[9] = GetHSAILImageOrderType(desc().format_);
-    hwState_[10] = static_cast<uint32_t>(desc().width_);
-    hwState_[11] = 0;  // one extra reserved field in the argument
-    return true;
+    return CreateImage(params);
+  }
+  
+  if (memoryType() == Pinned) {
+    return CreatePinned(params);
   }
 
   if (memoryType() == View) {
@@ -956,116 +1106,19 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
     return true;
   }
 
-  if (memoryType() == Pinned) {
-    PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
-    size_t allocSize = pinned->size_;
-    void* pinAddress;
-    hostMemRef = pinned->hostMemRef_;
-    pinAddress = address_ = hostMemRef->hostMem();
-    // assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
-    if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
-      // Allign offset to 4K boundary (Vista/Win7 limitation)
-      char* tmpHost = const_cast<char*>(
-          amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));
-
-      // Find the partial size for unaligned copy
-      hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
-
-      offset_ = hostMemOffset;
-
-      pinAddress = tmpHost;
-
-      if (hostMemOffset != 0) {
-        allocSize += hostMemOffset;
-      }
-      allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
-      //            hostMemOffset &= ~(0xff);
-    } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
-      //! @todo: Width has to be aligned for 3D.
-      //! Need to be replaced with a compute copy
-      // Width aligned by 8 texels
-      if (((desc().width_ % 0x8) != 0) ||
-          // Pitch aligned by 64 bytes
-          (((desc().width_ * elementSize()) % 0x40) != 0)) {
-        return false;
-      }
-    } else {
-      //! @todo GSL doesn't support pinning with resAlloc_
-      return false;
-    }
-
-    if (dev().settings().svmFineGrainSystem_) {
-      desc_.SVMRes_ = true;
-    }
-
-    // Ensure page alignment
-    if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
-      return false;
-    }
-    Pal::PinnedGpuMemoryCreateInfo createInfo = {};
-    createInfo.pSysMem = pinAddress;
-    createInfo.size = allocSize;
-    createInfo.vaRange = Pal::VaRange::Default;
-    memRef_ = GpuMemoryReference::Create(dev(), createInfo);
-    if (nullptr == memRef_) {
-      LogError("Failed PAL memory allocation!");
-      return false;
-    }
-    desc_.cardMemory_ = false;
-    return true;
-  }
-
   Pal::gpusize svmPtr = 0;
   if ((nullptr != params) && (nullptr != params->owner_) &&
       (nullptr != params->owner_->getSvmPtr())) {
-    svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
-    desc_.SVMRes_ = true;
-    svmPtr = (svmPtr == 1) ? 0 : svmPtr;
+      svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
+      desc_.SVMRes_ = true;
+      svmPtr = (svmPtr == 1) ? 0 : svmPtr;
   }
   if (desc_.SVMRes_) {
-    // @todo 64K alignment is too big
-    size_t allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
-    if ((memoryType() == RemoteUSWC) || (memoryType() == Remote)) {
-      Pal::SvmGpuMemoryCreateInfo createInfo = {};
-      createInfo.isUsedForKernel = desc_.isAllocExecute_;
-      createInfo.size = allocSize;
-      createInfo.alignment = MaxGpuAlignment;
-      if (svmPtr != 0) {
-        createInfo.flags.useReservedGpuVa = true;
-        createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
-      } else {
-        createInfo.flags.useReservedGpuVa = false;
-        createInfo.pReservedGpuVaOwner = nullptr;
-      }
-      memRef_ = GpuMemoryReference::Create(dev(), createInfo);
-    } else {
-      Pal::GpuMemoryCreateInfo createInfo = {};
-      createInfo.size = allocSize;
-      createInfo.alignment = MaxGpuAlignment;
-      createInfo.vaRange = Pal::VaRange::Svm;
-      createInfo.priority = Pal::GpuMemPriority::Normal;
-      if (svmPtr != 0) {
-        createInfo.flags.useReservedGpuVa = true;
-        createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
-      }
-      memTypeToHeap(&createInfo);
-      memRef_ = GpuMemoryReference::Create(dev(), createInfo);
-    }
-    if (nullptr == memRef_) {
-      LogError("Failed PAL memory allocation!");
-      return false;
-    }
-    desc_.cardMemory_ = false;
-    if ((nullptr != params) && (nullptr != params->owner_) &&
-        (nullptr != params->owner_->getSvmPtr())) {
-      params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
-    }
-    return true;
+      return CreateSvm(params, svmPtr);
   }
 
   Pal::GpuMemoryCreateInfo createInfo = {};
   createInfo.size = desc().width_ * elementSize_;
-  // @todo 64K alignment is too big
   createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
   createInfo.alignment = MaxGpuAlignment;
   createInfo.vaRange = Pal::VaRange::Default;
@@ -1082,7 +1135,8 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
 
   memTypeToHeap(&createInfo);
   // createInfo.priority;
-  memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
+  memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
+    createInfo.alignment, &subOffset_);
   if (nullptr == memRef_) {
     memRef_ = GpuMemoryReference::Create(dev(), createInfo);
     if (nullptr == memRef_) {
@@ -1090,6 +1144,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
       return false;
     }
   }
+  offset_ += static_cast<size_t>(subOffset_);
   // Check if memory is locked already and restore CPU pointer
   if (memRef_->cpuAddress_ != nullptr) {
     address_ = memRef_->cpuAddress_;
@@ -1099,7 +1154,9 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
   return true;
 }
 
-void Resource::free() {
+// ================================================================================================
+void Resource::free()
+{
   if (memRef_ == nullptr) {
     return;
   }
@@ -1112,17 +1169,19 @@ void Resource::free() {
   const bool wait =
     (memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
 
+  // OCL has to wait, even if resource is placed in the cache, since reallocation can occur
+  // and resource can be reused on another async queue without a wait on a busy operation
   if (wait) {
     if (memRef_->gpu_ == nullptr) {
       Device::ScopedLockVgpus lock(dev());
       // Release all memory objects on all virtual GPUs
       for (uint idx = 1; idx < dev().vgpus().size(); ++idx) {
-        dev().vgpus()[idx]->waitForEvent(&memRef_->events_[idx]);
+        dev().vgpus()[idx]->waitForEvent(&events_[idx]);
       }
     }
     else {
       amd::ScopedLock l(memRef_->gpu_->execution());
-      memRef_->gpu_->waitForEvent(&memRef_->events_[memRef_->gpu_->index()]);
+      memRef_->gpu_->waitForEvent(&events_[memRef_->gpu_->index()]);
     }
   } else {
     // After a view destruction the original object is no longer can be associated with a vgpu
@@ -1144,8 +1203,8 @@ void Resource::free() {
         }
       }
 
-      // Add resource to the cache if it's not assigned to a specific queue
-      if ((memRef_->gpu_ != nullptr) || !dev().resourceCache().addGpuMemory(&desc_, memRef_)) {
+      // Add resource to the cache
+      if (!dev().resourceCache().addGpuMemory(&desc_, memRef_, subOffset_)) {
         palFree();
       }
     }
@@ -1166,8 +1225,10 @@ void Resource::free() {
   }
 }
 
+// ================================================================================================
 void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const void* data,
-                            bool waitForEvent) const {
+                            bool waitForEvent) const
+{
   GpuEvent event;
 
   // Write data size bytes to surface
@@ -1175,7 +1236,8 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
   assert((size & 3) == 0);
   gpu.eventBegin(MainEngine);
   gpu.queue(MainEngine).addCmdMemRef(memRef());
-  gpu.iCmd()->CmdUpdateMemory(*iMem(), offset, size, reinterpret_cast<const uint32_t*>(data));
+  gpu.iCmd()->CmdUpdateMemory(*iMem(), offset_ + offset, size,
+    reinterpret_cast<const uint32_t*>(data));
   gpu.eventEnd(MainEngine, event);
 
   if (waitForEvent) {
@@ -1190,7 +1252,10 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
     gpu.setGpuEvent(event, false);
   }
 }
-static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
+
+// ================================================================================================
+static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement)
+{
   if (bytesPerElement == 16) {
     return Pal::ChNumFormat::X32Y32Z32W32_Uint;
   } else if (bytesPerElement == 8) {
@@ -1204,6 +1269,7 @@ static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
   }
 }
 
+// ================================================================================================
 bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
                                 const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
                                 Resource& dstResource, bool enableCopyRect, bool flushDMA,
@@ -1351,6 +1417,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
   return true;
 }
 
+// ================================================================================================
 void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
   addGpuEvent(gpu, gpuEvent);
 
@@ -1360,6 +1427,7 @@ void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
   }
 }
 
+// ================================================================================================
 void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
   GpuEvent* gpuEvent = getGpuEvent(gpu);
 
@@ -1377,6 +1445,7 @@ void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
   }
 }
 
+// ================================================================================================
 bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3D& origin,
                          const amd::Coord3D& size, uint flags, size_t rowPitch, size_t slicePitch) {
   void* dst;
@@ -1446,6 +1515,7 @@ bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3
   return true;
 }
 
+// ================================================================================================
 bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& origin,
                         const amd::Coord3D& size, size_t rowPitch, size_t slicePitch) {
   void* src;
@@ -1515,6 +1585,7 @@ bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& orig
   return true;
 }
 
+// ================================================================================================
 void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resource) const {
   if (desc_.cardMemory_ && !isPersistentDirectMap()) {
     // @todo remove const cast
@@ -1540,6 +1611,7 @@ void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resourc
   }
 }
 
+// ================================================================================================
 void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
   if (desc_.cardMemory_ && !isPersistentDirectMap()) {
     // @todo remove const cast
@@ -1553,6 +1625,7 @@ void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
   }
 }
 
+// ================================================================================================
 bool Resource::glAcquire() {
   bool retVal = true;
   if (desc().type_ == OGLInterop) {
@@ -1561,6 +1634,7 @@ bool Resource::glAcquire() {
   return retVal;
 }
 
+// ================================================================================================
 bool Resource::glRelease() {
   bool retVal = true;
   if (desc().type_ == OGLInterop) {
@@ -1569,18 +1643,21 @@ bool Resource::glRelease() {
   return retVal;
 }
 
+// ================================================================================================
 void Resource::addGpuEvent(const VirtualGPU& gpu, GpuEvent event) const {
   uint idx = gpu.index();
-  assert(idx < memRef_->events_.size());
-  memRef_->events_[idx] = event;
+  assert(idx < events_.size());
+  events_[idx] = event;
 }
 
+// ================================================================================================
 GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
   uint idx = gpu.index();
-  assert((idx < memRef_->events_.size()) && "Undeclared queue access!");
-  return &memRef_->events_[idx];
+  assert((idx < events_.size()) && "Undeclared queue access!");
+  return &events_[idx];
 }
 
+// ================================================================================================
 void Resource::palFree() const {
   if (desc().type_ == OGLInterop) {
     amd::ScopedLock lk(dev().lockPAL());
@@ -1589,6 +1666,7 @@ void Resource::palFree() const {
   memRef_->release();
 }
 
+// ================================================================================================
 bool Resource::isMemoryType(MemoryType memType) const {
   if (memoryType() == memType) {
     return true;
@@ -1599,6 +1677,7 @@ bool Resource::isMemoryType(MemoryType memType) const {
   return false;
 }
 
+// ================================================================================================
 bool Resource::isPersistentDirectMap() const {
   bool directMap =
       ((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_);
@@ -1613,6 +1692,7 @@ bool Resource::isPersistentDirectMap() const {
   return directMap;
 }
 
+// ================================================================================================
 void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) {
   if (isMemoryType(Pinned)) {
     // Check if we have to wait
@@ -1682,11 +1762,13 @@ void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers
   return address_;
 }
 
+// ================================================================================================
 void* Resource::mapLayers(VirtualGPU* gpu, uint flags) {
   Unimplemented();
   return nullptr;
 }
 
+// ================================================================================================
 void Resource::unmap(VirtualGPU* gpu) {
   if (isMemoryType(Pinned)) {
     return;
@@ -1713,22 +1795,26 @@ void Resource::unmap(VirtualGPU* gpu) {
   }
 }
 
+// ================================================================================================
 void Resource::unmapLayers(VirtualGPU* gpu) {
   Unimplemented();
 }
 
+// ================================================================================================
 void Resource::setActiveRename(VirtualGPU& gpu, GpuMemoryReference* rename) {
   // Copy the unique GSL data
   memRef_ = rename;
   address_ = rename->cpuAddress_;
 }
 
+// ================================================================================================
 bool Resource::getActiveRename(VirtualGPU& gpu, GpuMemoryReference** rename) {
   // Copy the old data to the rename descriptor
   *rename = memRef_;
   return true;
 }
 
+// ================================================================================================
 bool Resource::rename(VirtualGPU& gpu, bool force) {
   GpuEvent* gpuEvent = getGpuEvent(gpu);
   if (!gpuEvent->isValid() && !force) {
@@ -1809,6 +1895,7 @@ bool Resource::rename(VirtualGPU& gpu, bool force) {
   return true;
 }
 
+// ================================================================================================
 void Resource::warmUpRenames(VirtualGPU& gpu) {
   // Make sure OCL touches every command buffer in the queue to avoid delays on the first submit
   uint flush = dev().settings().maxRenames_ / VirtualGPU::Queue::MaxCmdBuffers;
@@ -1823,13 +1910,113 @@ void Resource::warmUpRenames(VirtualGPU& gpu) {
   }
 }
 
+// ================================================================================================
+MemorySubAllocator::~MemorySubAllocator()
+{
+  // Release memory heap for suballocations
+  for (auto it : mem_heap_) {
+    it.first->release();
+    delete it.second;
+  }
+}
+
+// ================================================================================================
+GpuMemoryReference* MemorySubAllocator::Allocate(
+  Pal::gpusize size, Pal::gpusize alignment, Pal::gpusize* offset)
+{
+  GpuMemoryReference* mem_ref = nullptr;
+  // Check if resource size is allowed for suballocation
+  if (size < device_->settings().subAllocationMaxSize_) {
+    uint i = 0;
+    size = amd::alignUp(size, device_->settings().subAllocationMinSize_);
+    do {
+      MemBuddyAllocator*  allocator = nullptr;
+      // Find if current heap has enough empty space
+      for (auto it : mem_heap_) {
+        mem_ref = it.first;
+        allocator = it.second;
+        // If we have found a valid chunk, then suballocate memory
+        if (Pal::Result::Success == allocator->Allocate(size, alignment, offset)) {
+          return mem_ref;
+        } else {
+          mem_ref = nullptr;
+        }
+      }
+      
+      // Check if a chunk for suballocation doesn't exist
+      if (mem_ref == nullptr) {
+        // Allocate a new chunk in memory
+        Pal::GpuMemoryCreateInfo createInfo = {};
+        createInfo.size       = device_->settings().subAllocationChunkSize_;
+        createInfo.alignment  = 0;
+        createInfo.vaRange    = Pal::VaRange::Default;
+        createInfo.priority   = Pal::GpuMemPriority::Normal;
+        createInfo.heapCount  = 1;
+        createInfo.heaps[0]   = Pal::GpuHeapInvisible;
+        mem_ref = GpuMemoryReference::Create(*device_, createInfo);
+        // If chunk was allocated, then allocate BuddyAllocator object
+        if (mem_ref != nullptr) {
+          allocator = new MemBuddyAllocator(device_,
+          device_->settings().subAllocationChunkSize_,
+          device_->settings().subAllocationMinSize_);
+          if ((allocator != nullptr) &&
+              (Pal::Result::Success == allocator->Init())) {
+            // Add the chunk and suballocator into the heap
+            mem_heap_.insert(std::pair<GpuMemoryReference*, MemBuddyAllocator*>(
+                mem_ref, allocator));
+          } else {
+            delete allocator;
+            mem_ref->release();
+            return nullptr;  
+          }
+        } else {
+          return nullptr;
+        }
+      }
+      i++;
+    } while (i < 2);
+  }
+  return mem_ref;
+}
+
+// ================================================================================================
+bool MemorySubAllocator::Free(GpuMemoryReference* ref, Pal::gpusize offset)
+{
+  // Find if current memory reference is a chunk allocation
+  auto it = mem_heap_.find(ref);
+  if (it == mem_heap_.end()) {
+    return false;
+  }
+  // Free suballocation at the specified offset
+  it->second->Free(offset);
+  // If this suballocator empty, then release memory chunk
+  if (it->second->IsEmpty()) {
+    delete it->second;
+    it->first->release();
+    mem_heap_.erase(it);
+  }
+  return true;
+}
+
+// ================================================================================================
 ResourceCache::~ResourceCache() { free(); }
 
+// ================================================================================================
 //! \note the cache works in FILO mode
-bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference* ref) {
+bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
+  GpuMemoryReference* ref, Pal::gpusize offset)
+{
   bool result = false;
   size_t size = ref->iMem()->Desc().size;
 
+  if (desc->type_ == Resource::Local) {
+      amd::ScopedLock l(&lockCacheOps_);
+    // Check if runtime can free suballocation in local memory
+    if (memSubAllocLocal_.Free(ref, offset)) {
+      return true;
+    }
+  }
+
   // Make sure current allocation isn't bigger than cache
   if (((desc->type_ == Resource::Local) || (desc->type_ == Resource::Persistent) ||
        (desc->type_ == Resource::Remote) || (desc->type_ == Resource::RemoteUSWC)) &&
@@ -1855,8 +2042,9 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference*
   return result;
 }
 
+// ================================================================================================
 GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal::gpusize size,
-                                                 Pal::gpusize alignment) {
+                                                 Pal::gpusize alignment, Pal::gpusize* offset) {
   amd::ScopedLock l(&lockCacheOps_);
   GpuMemoryReference* ref = nullptr;
 
@@ -1866,6 +2054,13 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
     return ref;
   }
 
+  if (desc->type_ == Resource::Local) {
+    ref = memSubAllocLocal_.Allocate(size, alignment, offset);
+    if (ref != nullptr) {
+      return ref;
+    }
+  }
+
   // Serach the right resource through the cache list
   for (const auto& it : resCache_) {
     Resource::Descriptor* entry = it.first;
@@ -1886,6 +2081,7 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
   return ref;
 }
 
+// ================================================================================================
 bool ResourceCache::free(size_t minCacheEntries) {
   amd::ScopedLock l(&lockCacheOps_);
   bool result = false;
@@ -1903,6 +2099,7 @@ bool ResourceCache::free(size_t minCacheEntries) {
   return result;
 }
 
+// ================================================================================================
 void ResourceCache::removeLast() {
   std::pair<Resource::Descriptor*, GpuMemoryReference*> entry;
   entry = resCache_.back();
@@ -1913,7 +2110,7 @@ void ResourceCache::removeLast() {
   // Delete Descriptor
   delete entry.first;
 
-  // Destroy GSL resource
+  // Destroy PAL resource
   entry.second->release();
   cacheSize_ -= size;
 }
diff --git a/rocclr/runtime/device/pal/palresource.hpp b/rocclr/runtime/device/pal/palresource.hpp
index 0118681006..3329ee077b 100644
--- a/rocclr/runtime/device/pal/palresource.hpp
+++ b/rocclr/runtime/device/pal/palresource.hpp
@@ -6,6 +6,7 @@
 #include "platform/command.hpp"
 #include "platform/program.hpp"
 #include "device/pal/paldefs.hpp"
+#include "util/palBuddyAllocatorImpl.h"
 
 //! \namespace pal PAL Resource Implementation
 namespace pal {
@@ -16,7 +17,6 @@ class VirtualGPU;
 /*! \addtogroup PAL PAL Resource Implementation
  *  @{
  */
-
 class GpuMemoryReference : public amd::ReferenceCountedObject {
  public:
   static GpuMemoryReference* Create(const Device& dev, const Pal::GpuMemoryCreateInfo& createInfo);
@@ -36,12 +36,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
   //! Default constructor
   GpuMemoryReference(const Device& dev);
 
-  //! Resizes the events array to account the new queue
-  void resizeGpuEvents(uint index) { events_.resize(index + 1); }
-
-  //! Erase an entry in the array for provided queue index
-  void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
-
   //! Get PAL memory object
   Pal::IGpuMemory* iMem() const { return gpuMem_; }
 
@@ -50,7 +44,6 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
   const Device& device_;      //!< GPU device
   //! @note: This field is necessary for the thread safe release only
   VirtualGPU* gpu_;           //!< Resource will be used only on this queue
-  std::vector<GpuEvent> events_;  //!< GPU events associated with the resource
 
  protected:
   //! Default destructor
@@ -64,6 +57,8 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
   GpuMemoryReference& operator=(const GpuMemoryReference&);
 };
 
+static constexpr Pal::gpusize MaxGpuAlignment = 4 * Ki;
+
 //! GPU resource
 class Resource : public amd::HeapObject {
  public:
@@ -178,7 +173,7 @@ class Resource : public amd::HeapObject {
         uint imageArray_ : 1;      //!< PAL resource is an array of images
         uint buffer_ : 1;          //!< PAL resource is a buffer
         uint tiled_ : 1;           //!< PAL resource is tiled
-        uint SVMRes_ : 1;          //!< SVM flag to the cal resource
+        uint SVMRes_ : 1;          //!< SVM flag to the pal resource
         uint scratch_ : 1;         //!< Scratch buffer
         uint isAllocExecute_ : 1;  //!< SVM resource allocation attribute for shader\cmdbuf
         uint isDoppTexture_ : 1;   //!< PAL resource is for a DOPP desktop texture
@@ -205,9 +200,9 @@ class Resource : public amd::HeapObject {
   //! Destructor of the resource
   virtual ~Resource();
 
-  /*! \brief Creates a CAL object, associated with the resource
+  /*! \brief Creates a PAL object, associated with the resource
    *
-   *  \return True if we succesfully created a CAL resource
+   *  \return True if we succesfully created a PAL resource
    */
   virtual bool create(MemoryType memType,       //!< memory type
                       CreateParams* params = 0  //!< special parameters for resource allocation
@@ -263,7 +258,7 @@ class Resource : public amd::HeapObject {
   uint64_t vmAddress() const { return iMem()->Desc().gpuVirtAddr + offset_; }
 
   //! Returns global memory offset
-  uint64_t vmSize() const { return iMem()->Desc().size - offset_; }
+  uint64_t vmSize() const { return desc_.width_ * elementSize(); }
 
   //! Returns global memory offset
   bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; }
@@ -290,7 +285,7 @@ class Resource : public amd::HeapObject {
 
   //! Marks the resource as busy
   void setBusy(VirtualGPU& gpu,   //!< Virtual GPU device object
-               GpuEvent calEvent  //!< CAL event
+               GpuEvent calEvent  //!< PAL event
                ) const;
 
   //! Wait for the resource
@@ -326,7 +321,7 @@ class Resource : public amd::HeapObject {
   //! Get the mapped address of this resource
   address data() const { return reinterpret_cast<address>(address_); }
 
-  //! Frees all allocated CAL memories and resources,
+  //! Frees all allocated PAL memories and resources,
   //! associated with this objects. And also destroys all rename structures
   //! Note: doesn't destroy the object itself
   void free();
@@ -360,7 +355,42 @@ class Resource : public amd::HeapObject {
   //! Returns GPU event associated with this resource and specified queue
   GpuEvent* getGpuEvent(const VirtualGPU& gpu) const;
 
+  //! Resizes the events array to account the new queue
+  void resizeGpuEvents(uint index) { events_.resize(index + 1); }
+
+  //! Erase an entry in the array for provided queue index
+  void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
+
  protected:
+  /*! \brief Creates a PAL iamge object, associated with the resource
+  *
+  *  \return True if we succesfully created a PAL resource
+  */
+  bool CreateImage(CreateParams* params //!< special parameters for resource allocation
+                   );
+
+  /*! \brief Creates a PAL interop object, associated with the resource
+  *
+  *  \return True if we succesfully created a PAL interop resource
+  */
+  bool CreateInterop(CreateParams* params //!< special parameters for resource allocation
+                     );
+
+  /*! \brief Creates a PAL pinned object, associated with the resource
+  *
+  *  \return True if we succesfully created a PAL pinned resource
+  */
+  bool CreatePinned(CreateParams* params //!< special parameters for resource allocation
+                    );
+
+  /*! \brief Creates a PAL SVM object, associated with the resource
+  *
+  *  \return True if we succesfully created a PAL SVM resource
+  */
+  bool CreateSvm(CreateParams* params,  //!< special parameters for resource allocation
+                 Pal::gpusize svmPtr
+                 );
+
   uint elementSize_;  //!< Size of a single element in bytes
 
  private:
@@ -424,6 +454,7 @@ class Resource : public amd::HeapObject {
   uint32_t curRename_;          //!< Current active rename in the list
   RenameList renames_;          //!< Rename resource list
   GpuMemoryReference* memRef_;  //!< PAL resource reference
+  Pal::gpusize  subOffset_;     //!< GPU memory offset in the oririnal resource
   const Resource* viewOwner_;   //!< GPU resource, which owns this view
   void* glInteropMbRes_;        //!< Mb Res handle
   uint32_t glType_;             //!< GL interop type
@@ -438,26 +469,50 @@ class Resource : public amd::HeapObject {
 
   uint32_t* hwState_;  //!< HW state for image object
   uint64_t hwSrd_;     //!< GPU pointer to HW SRD
+
+  //! Note: Access to the events are thread safe.
+  mutable std::vector<GpuEvent> events_;  //!< GPU events associated with the resource
+};
+
+typedef Util::BuddyAllocator<Device> MemBuddyAllocator;
+
+class MemorySubAllocator : public amd::HeapObject {
+public:
+  MemorySubAllocator(Device* device) : device_(device) {}
+
+  ~MemorySubAllocator();
+
+  GpuMemoryReference*  Allocate(Pal::gpusize size,
+    Pal::gpusize alignment, Pal::gpusize* offset);
+  bool Free(GpuMemoryReference* ref, Pal::gpusize offset);
+
+private:
+  Device* device_;
+  std::map<GpuMemoryReference*, MemBuddyAllocator*>  mem_heap_;
 };
 
 class ResourceCache : public amd::HeapObject {
  public:
   //! Default constructor
-  ResourceCache(size_t cacheSizeLimit)
-      : lockCacheOps_("PAL resource cache", true), cacheSize_(0), cacheSizeLimit_(cacheSizeLimit) {}
+  ResourceCache(Device* device, size_t cacheSizeLimit)
+      : lockCacheOps_("PAL resource cache", true)
+      , cacheSize_(0)
+      , cacheSizeLimit_(cacheSizeLimit)
+      , memSubAllocLocal_(device) {}
 
   //! Default destructor
   ~ResourceCache();
 
-  //! Adds a CAL resource to the cache
-  bool addGpuMemory(Resource::Descriptor* desc,  //!< Resource descriptor - cache key
-                    GpuMemoryReference* ref      //!< Resource reference
+  //! Adds a PAL resource to the cache
+  bool addGpuMemory(Resource::Descriptor* desc,   //!< Resource descriptor - cache key
+                    GpuMemoryReference*   ref,    //!< Resource reference
+                    Pal::gpusize          offset  //!< Original resource offset
                     );
 
-  //! Finds a CAL resource from the cache
+  //! Finds a PAL resource from the cache
   GpuMemoryReference* findGpuMemory(
       Resource::Descriptor* desc,  //!< Resource descriptor - cache key
-      Pal::gpusize size, Pal::gpusize alignment);
+      Pal::gpusize size, Pal::gpusize alignment, Pal::gpusize* offset);
 
   //! Destroys cache
   bool free(size_t minCacheEntries = 0);
@@ -477,8 +532,10 @@ class ResourceCache : public amd::HeapObject {
   size_t cacheSize_;            //!< Current cache size in bytes
   const size_t cacheSizeLimit_; //!< Cache size limit in bytes
 
-  //! CAL resource cache
+  //! PAL resource cache
   std::list<std::pair<Resource::Descriptor*, GpuMemoryReference*> > resCache_;
+
+  MemorySubAllocator  memSubAllocLocal_;  //!< Allocator for suballocations in Local
 };
 
 /*@}*/} // namespace pal
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index a0b20f21ea..60d1d30cba 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -138,6 +138,12 @@ Settings::Settings() {
   rgpSqttDispCount_ = PAL_RGP_DISP_COUNT;
   rgpSqttWaitIdle_ = true;
   rgpSqttForceDisable_ = false;
+
+  // Sub allocation parameters
+  subAllocationMinSize_ = 4 * Ki;
+  subAllocationChunkSize_ = 64 * Mi;
+  subAllocationMaxSize_ =
+    std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
 }
 
 bool Settings::create(const Pal::DeviceProperties& palProp,
diff --git a/rocclr/runtime/device/pal/palsettings.hpp b/rocclr/runtime/device/pal/palsettings.hpp
index a6755da9d5..1cdad9c095 100644
--- a/rocclr/runtime/device/pal/palsettings.hpp
+++ b/rocclr/runtime/device/pal/palsettings.hpp
@@ -98,6 +98,10 @@ class Settings : public device::Settings {
   uint64_t maxAllocSize_;      //!< Maximum single allocation size
   uint rgpSqttDispCount_;      //!< The number of dispatches captured in SQTT
 
+  uint64_t subAllocationMinSize_;   //!< Minimum size allowed for suballocations
+  uint64_t subAllocationMaxSize_;   //!< Maximum size allowed with suballocations
+  uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
+  
   amd::LibrarySelector libSelector_;  //!< Select linking libraries for compiler
 
   //! Default constructor
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index f7564b328f..85dccb0d00 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -409,7 +409,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
   }
 
   uint64_t curStart = memory->vmAddress();
-  uint64_t curEnd = curStart + memory->vmSize();
+  uint64_t curEnd = curStart + memory->size();
 
   // Loop through all memory objects in the queue and find dependency
   // @note don't include objects from the current kernel
@@ -1974,6 +1974,7 @@ void VirtualGPU::PostDeviceEnqueue(
     uint64_t vmParentWrap,
     GpuEvent* gpuEvent)
 {
+  uint32_t id  = gpuEvent->id;
   amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
 
   // Make sure exculsive access to the device queue
@@ -2055,6 +2056,9 @@ void VirtualGPU::PostDeviceEnqueue(
     iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
       vmParentWrap + offsetof(AmdAqlWrap, child_counter),
       signalAddr, dev().settings().useDeviceQueue_);
+    if (id != gpuEvent->id) {
+        LogError("Something is wrong. ID mismatch!\n");
+    }
     eventEnd(MainEngine, *gpuEvent);
   }
 
@@ -2203,6 +2207,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
     if (profiling() || state_.profileEnabled_) {
       addBarrier();
     }
+    if (id != gpuEvent.id) {
+      LogError("Something is wrong. ID mismatch!\n");
+    }
     eventEnd(MainEngine, gpuEvent);
 
     // Execute scheduler for device enqueue
@@ -2210,9 +2217,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
       PostDeviceEnqueue(kernel, hsaKernel, gpuDefQueue, vmDefQueue, vmParentWrap, &gpuEvent);
     }
 
-    if (id != gpuEvent.id) {
-      LogError("Something is wrong. ID mismatch!\n");
-    }
     // Update the global GPU event
     setGpuEvent(gpuEvent, needFlush);
 
@@ -2266,7 +2270,7 @@ void VirtualGPU::submitMarker(amd::Marker& vcmd) {
   }
 }
 
-void VirtualGPU::releaseMemory(GpuMemoryReference* mem, GpuEvent* event) {
+void VirtualGPU::releaseMemory(GpuMemoryReference* mem) {
   queues_[MainEngine]->removeCmdMemRef(mem);
   queues_[SdmaEngine]->removeCmdMemRef(mem);
 }
diff --git a/rocclr/runtime/device/pal/palvirtual.hpp b/rocclr/runtime/device/pal/palvirtual.hpp
index 43b67b17f1..fff4332f8b 100644
--- a/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/rocclr/runtime/device/pal/palvirtual.hpp
@@ -314,7 +314,7 @@ class VirtualGPU : public device::VirtualDevice {
   virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
   virtual void submitTransferBufferFromFile(amd::TransferBufferFileCommand& cmd);
 
-  void releaseMemory(GpuMemoryReference* mem, GpuEvent* event);
+  void releaseMemory(GpuMemoryReference* mem);
 
   void flush(amd::Command* list = nullptr, bool wait = false);
   bool terminate() { return true; }
diff --git a/rocclr/runtime/utils/flags.hpp b/rocclr/runtime/utils/flags.hpp
index 7db658aee6..eb29a04ed4 100644
--- a/rocclr/runtime/utils/flags.hpp
+++ b/rocclr/runtime/utils/flags.hpp
@@ -86,6 +86,8 @@ release(size_t, GPU_PINNED_MIN_XFER_SIZE, 512,                                \
         "The minimal buffer size for pinned read/write transfers in KBytes")  \
 release(size_t, GPU_RESOURCE_CACHE_SIZE, 64,                                  \
         "The resource cache size in MB")                                      \
+release(size_t, GPU_MAX_SUBALLOC_SIZE, 4096,                                  \
+        "The maximum size accepted for suballocaitons in KB")                 \
 release(uint, GPU_ASYNC_MEM_COPY, 0,                                          \
         "Enables async memory transfers with DRM engine")                     \
 release(bool, GPU_FORCE_64BIT_PTR, 0,                                         \