rocm-systems/rocclr/runtime/device/pal/palresource.cpp

// Copyright (c) 2015 Advanced Micro Devices, Inc. All rights reserved.
//
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "os/os.hpp"
#include "device/device.hpp"
#include "utils/flags.hpp"
#include "thread/monitor.hpp"
#include "device/pal/palresource.hpp"
#include "device/pal/paldevice.hpp"
#include "device/pal/palblit.hpp"
#include "device/pal/paltimestamp.hpp"
#include "thread/atomic.hpp"
#include "hsa_ext_image.h"
#ifdef _WIN32
#include <d3d10_1.h>
#include "CL/cl_d3d10.h"
#include "CL/cl_d3d11.h"
#endif  // _WIN32
#include <GL/gl.h>
#include "GL/glATIInternal.h"

#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <cmath>

namespace pal {

GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                               const Pal::GpuMemoryCreateInfo& createInfo) {
  Pal::Result result;
  size_t gpuMemSize = dev.iDev()->GetGpuMemorySize(createInfo, &result);
  if (result != Pal::Result::Success) {
    return nullptr;
  }

  GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
  if (memRef != nullptr) {
    result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
    if (result != Pal::Result::Success) {
      memRef->release();
      // Free cache if PAL failed allocation
      dev.resourceCache().free();
      return nullptr;
    }
  }
  // Update free memory size counters
  const_cast<Device&>(dev).updateFreeMemory(createInfo.heaps[0], createInfo.size, false);
  dev.addResource(memRef);
  return memRef;
}

GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                               const Pal::PinnedGpuMemoryCreateInfo& createInfo) {
  Pal::Result result;
  size_t gpuMemSize = dev.iDev()->GetPinnedGpuMemorySize(createInfo, &result);
  if (result != Pal::Result::Success) {
    return nullptr;
  }

  GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
  Pal::VaRange vaRange = Pal::VaRange::Default;
  if (memRef != nullptr) {
    result = dev.iDev()->CreatePinnedGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
    if (result != Pal::Result::Success) {
      memRef->release();
      return nullptr;
    }
  }
  // Update free memory size counters
  const_cast<Device&>(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size, false);
  dev.addResource(memRef);
  return memRef;
}

GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                               const Pal::SvmGpuMemoryCreateInfo& createInfo) {
  Pal::Result result;
  size_t gpuMemSize = dev.iDev()->GetSvmGpuMemorySize(createInfo, &result);
  if (result != Pal::Result::Success) {
    return nullptr;
  }

  GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
  if (memRef != nullptr) {
    result = dev.iDev()->CreateSvmGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
    if (result != Pal::Result::Success) {
      memRef->release();
      return nullptr;
    }
  }
  // Update free memory size counters
  const_cast<Device&>(dev).updateFreeMemory(Pal::GpuHeap::GpuHeapGartCacheable, createInfo.size,
                                            false);
  dev.addResource(memRef);
  return memRef;
}

GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                               const Pal::ExternalGpuMemoryOpenInfo& openInfo) {
  Pal::Result result;
  size_t gpuMemSize = dev.iDev()->GetExternalSharedGpuMemorySize(&result);
  if (result != Pal::Result::Success) {
    return nullptr;
  }

  Pal::GpuMemoryCreateInfo createInfo = {};
  GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
  if (memRef != nullptr) {
    result = dev.iDev()->OpenExternalSharedGpuMemory(openInfo, &memRef[1], &createInfo,
                                                     &memRef->gpuMem_);
    if (result != Pal::Result::Success) {
      memRef->release();
      return nullptr;
    }
  }
  dev.addResource(memRef);
  return memRef;
}

GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
                                               const Pal::ExternalImageOpenInfo& openInfo,
                                               Pal::ImageCreateInfo* imgCreateInfo,
                                               Pal::IImage** image) {
  Pal::Result result;
  size_t gpuMemSize = 0;
  size_t imageSize = 0;
  if (Pal::Result::Success !=
      dev.iDev()->GetExternalSharedImageSizes(openInfo, &imageSize, &gpuMemSize, imgCreateInfo)) {
    return nullptr;
  }

  Pal::GpuMemoryCreateInfo createInfo = {};
  GpuMemoryReference* memRef = new (gpuMemSize) GpuMemoryReference(dev);
  char* imgMem = new char[imageSize];
  if (memRef != nullptr) {
    result = dev.iDev()->OpenExternalSharedImage(openInfo, imgMem, &memRef[1], &createInfo, image,
                                                 &memRef->gpuMem_);
    if (result != Pal::Result::Success) {
      memRef->release();
      return nullptr;
    }
  }
  dev.addResource(memRef);
  return memRef;
}

GpuMemoryReference::GpuMemoryReference(const Device& dev)
  : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr), events_(dev.numOfVgpus()) {}

GpuMemoryReference::~GpuMemoryReference() {
  if (gpu_ == nullptr) {
    {
      Device::ScopedLockVgpus lock(device_);
      // Release all memory objects on all virtual GPUs
      for (uint idx = 1; idx < device_.vgpus().size(); ++idx) {
        device_.vgpus()[idx]->releaseMemory(this, &events_[idx]);
      }
    }
  } else {
    amd::ScopedLock l(gpu_->execution());
    gpu_->releaseMemory(this, &events_[gpu_->index()]);
  }
  if (device_.vgpus().size() != 0) {
    assert(device_.vgpus()[0] == device_.xferQueue() && "Wrong transfer queue!");
    // Lock the transfer queue, since it's not handled by ScopedLockVgpus
    amd::ScopedLock k(device_.xferMgr().lockXfer());
    device_.vgpus()[0]->releaseMemory(this, &events_[0]);
  }

  if (cpuAddress_ != nullptr) {
    iMem()->Unmap();
  }
  if (0 != iMem()) {
    iMem()->Destroy();
    gpuMem_ = nullptr;
  }
  device_.removeResource(this);
}

Resource::Resource(const Device& gpuDev, size_t size)
    : elementSize_(0),
      gpuDevice_(gpuDev),
      mapCount_(0),
      address_(nullptr),
      offset_(0),
      curRename_(0),
      memRef_(nullptr),
      viewOwner_(nullptr),
      pinOffset_(0),
      image_(nullptr),
      hwSrd_(0) {
  // Fill resource descriptor fields
  desc_.state_ = 0;
  desc_.type_ = Empty;
  desc_.width_ = amd::alignUp(size, Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint)) /
      Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint);
  desc_.height_ = 1;
  desc_.depth_ = 1;
  desc_.mipLevels_ = 1;
  desc_.format_.image_channel_order = CL_R;
  desc_.format_.image_channel_data_type = CL_FLOAT;
  desc_.flags_ = 0;
  desc_.pitch_ = 0;
  desc_.slice_ = 0;
  desc_.cardMemory_ = true;
  desc_.dimSize_ = 1;
  desc_.buffer_ = true;
  desc_.imageArray_ = false;
  desc_.topology_ = CL_MEM_OBJECT_BUFFER;
  desc_.SVMRes_ = false;
  desc_.scratch_ = false;
  desc_.isAllocExecute_ = false;
  desc_.baseLevel_ = 0;
}

Resource::Resource(const Device& gpuDev, size_t width, size_t height, size_t depth,
                   cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
    : elementSize_(0),
      gpuDevice_(gpuDev),
      mapCount_(0),
      address_(nullptr),
      offset_(0),
      curRename_(0),
      memRef_(nullptr),
      viewOwner_(nullptr),
      pinOffset_(0),
      image_(nullptr),
      hwSrd_(0) {
  // Fill resource descriptor fields
  desc_.state_ = 0;
  desc_.type_ = Empty;
  desc_.width_ = width;
  desc_.height_ = height;
  desc_.depth_ = depth;
  desc_.mipLevels_ = mipLevels;
  desc_.format_ = format;
  desc_.flags_ = 0;
  desc_.pitch_ = 0;
  desc_.slice_ = 0;
  desc_.cardMemory_ = true;
  desc_.buffer_ = false;
  desc_.imageArray_ = false;
  desc_.topology_ = imageType;
  desc_.SVMRes_ = false;
  desc_.scratch_ = false;
  desc_.isAllocExecute_ = false;
  desc_.baseLevel_ = 0;

  switch (imageType) {
    case CL_MEM_OBJECT_IMAGE2D:
      desc_.dimSize_ = 2;
      break;
    case CL_MEM_OBJECT_IMAGE3D:
      desc_.dimSize_ = 3;
      break;
    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
      desc_.dimSize_ = 3;
      desc_.imageArray_ = true;
      break;
    case CL_MEM_OBJECT_IMAGE1D:
      desc_.dimSize_ = 1;
      break;
    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
      desc_.dimSize_ = 2;
      desc_.imageArray_ = true;
      break;
    case CL_MEM_OBJECT_IMAGE1D_BUFFER:
      desc_.dimSize_ = 1;
      break;
    default:
      desc_.dimSize_ = 1;
      LogError("Unknown image type!");
      break;
  }
}

Resource::~Resource() {
  Pal::GpuHeap heap = Pal::GpuHeapCount;
  switch (memoryType()) {
    case Persistent:
      heap = Pal::GpuHeapLocal;
      break;
    case RemoteUSWC:
      heap = Pal::GpuHeapGartUswc;
      break;
    case Pinned:
    case Remote:
      heap = Pal::GpuHeapGartCacheable;
      break;
    case Shader:
    case BusAddressable:
    case ExternalPhysical:
    // Fall through to process the memory allocation ...
    case Local:
      heap = Pal::GpuHeapInvisible;
      break;
    default:
      heap = Pal::GpuHeapLocal;
      break;
  }
  if ((memRef_ != nullptr) && (heap != Pal::GpuHeapCount)) {
    // Update free memory size counters
    const_cast<Device&>(dev()).updateFreeMemory(heap, iMem()->Desc().size, true);
  }

  free();

  if ((nullptr != image_) &&
      ((memoryType() != ImageView) ||
       //! @todo PAL doesn't allow an SRD view creation with different pixel size
       (elementSize() != viewOwner_->elementSize()))) {
    image_->Destroy();
    delete[] reinterpret_cast<char*>(image_);
  }
}

static uint32_t GetHSAILImageFormatType(const cl_image_format& format) {
  static const uint32_t FormatType[] = {HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT,
                                        HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24};

  uint idx = format.image_channel_data_type - CL_SNORM_INT8;
  assert((idx <= (CL_UNORM_INT24 - CL_SNORM_INT8)) && "Out of range format channel!");
  return FormatType[idx];
}

static uint32_t GetHSAILImageOrderType(const cl_image_format& format) {
  static const uint32_t OrderType[] = {HSA_EXT_IMAGE_CHANNEL_ORDER_R,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_A,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RG,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RA,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RGB,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RX,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RGX,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA,
                                       HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR};

  uint idx = format.image_channel_order - CL_R;
  assert((idx <= (CL_ABGR - CL_R)) && "Out of range format order!");
  return OrderType[idx];
}

void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
  createInfo->heapCount = 1;
  switch (memoryType()) {
    case Persistent:
      createInfo->heaps[0] = Pal::GpuHeapLocal;
#ifdef ATI_OS_LINUX
      // Note: SSG in Linux requires DGMA heap
      if (dev().properties().gpuMemoryProperties.busAddressableMemSize > 0) {
        createInfo->flags.busAddressable = true;
      }
#endif
      break;
    case RemoteUSWC:
      createInfo->heaps[0] = Pal::GpuHeapGartUswc;
      desc_.cardMemory_ = false;
      break;
    case Remote:
      createInfo->heaps[0] = Pal::GpuHeapGartCacheable;
      desc_.cardMemory_ = false;
      break;
    case ExternalPhysical:
      desc_.cardMemory_ = false;
    case Shader:
    // Fall through to process the memory allocation ...
    case Local:
      createInfo->heapCount = 2;
      createInfo->heaps[0] = Pal::GpuHeapInvisible;
      createInfo->heaps[1] = Pal::GpuHeapLocal;
      break;
    default:
      createInfo->heaps[0] = Pal::GpuHeapLocal;
      break;
  }
}

bool Resource::create(MemoryType memType, CreateParams* params) {
  static const Pal::gpusize MaxGpuAlignment = 64 * Ki;
  const amd::HostMemoryReference* hostMemRef = nullptr;
  bool imageCreateView = false;
  uint hostMemOffset = 0;
  bool foundCalRef = false;
  bool viewDefined = false;
  uint viewLayer = 0;
  uint viewLevel = 0;
  uint viewFlags = 0;
  Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
  Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
  Pal::ChannelMapping channels;
  Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);

  // This is a thread safe operation
  const_cast<Device&>(dev()).initializeHeapResources();

  if (memType == Shader) {
    if (dev().settings().svmFineGrainSystem_) {
      desc_.isAllocExecute_ = true;
      desc_.SVMRes_ = true;
      memType = RemoteUSWC;
    } else {
      memType = Local;
    }
    // force to use remote memory for HW DEBUG or use
    // local memory once we determine if FGS is supported
    // memType = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
  }

  // Get the element size
  elementSize_ = Pal::Formats::BytesPerPixel(format);
  desc_.type_ = memType;
  if (memType == Scratch) {
    // use local memory for scratch buffer unless it is using HW DEBUG
    desc_.type_ = (!dev().settings().enableHwDebug_) ? Local : RemoteUSWC;
    desc_.scratch_ = true;
  }

  // Force remote allocation if it was requested in the settings
  if (dev().settings().remoteAlloc_ && ((memoryType() == Local) || (memoryType() == Persistent))) {
    if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
      desc_.type_ = Remote;
    } else {
      desc_.type_ = RemoteUSWC;
    }
  }

  if (dev().settings().disablePersistent_ && (memoryType() == Persistent)) {
    desc_.type_ = RemoteUSWC;
  }

  Pal::Result result;

  if ((memoryType() == OGLInterop) || (memoryType() == D3D9Interop) ||
      (memoryType() == D3D10Interop) || (memoryType() == D3D11Interop)) {
    Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
    Pal::ExternalResourceOpenInfo& openInfo = gpuMemOpenInfo.resourceInfo;
    uint misc = 0;
    uint layer = 0;
    uint mipLevel = 0;
    InteropType type = InteropTypeless;

    if (memoryType() == OGLInterop) {
      OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
      assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
      switch (oglRes->type_) {
        case InteropVertexBuffer:
          glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
          break;
        case InteropRenderBuffer:
          glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
          break;
        case InteropTexture:
        case InteropTextureViewLevel:
        case InteropTextureViewCube:
          glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
          break;
        default:
          LogError("Unknown OGL interop type!");
          return false;
          break;
      }
      glPlatformContext_ = oglRes->glPlatformContext_;
      layer = oglRes->layer_;
      type = oglRes->type_;
      mipLevel = oglRes->mipLevel_;

      if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
                                &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
#ifdef ATI_OS_WIN
								, openInfo.doppDesktopInfo
#endif
								)) {
        return false;
      }
      desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
      format = dev().getPalFormat(desc().format_, &channels);
    }
#ifdef ATI_OS_WIN
    else {
      D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
      openInfo.hExternalResource = d3dRes->handle_;
      misc = d3dRes->misc;
      layer = d3dRes->layer_;
      type = d3dRes->type_;
      mipLevel = d3dRes->mipLevel_;
    }
#endif
//! @todo PAL query for image/buffer object doesn't work properly!
#if 0
        bool    isImage = false;
        if (Pal::Result::Success !=
            dev().iDev()->DetermineExternalSharedResourceType(openInfo, &isImage)) {
            return false;
        }
#endif  // 0
    if (desc().buffer_ || misc) {
      memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
      if (nullptr == memRef_) {
        return false;
      }

      if (misc) {
        Pal::ImageCreateInfo imgCreateInfo = {};
        Pal::ExternalImageOpenInfo imgOpenInfo = {};
        imgOpenInfo.resourceInfo = openInfo;
        imgOpenInfo.swizzledFormat.format = format;
        imgOpenInfo.swizzledFormat.swizzle = channels;
        imgOpenInfo.usage.shaderRead = true;
        imgOpenInfo.usage.shaderWrite = true;
        size_t imageSize;
        size_t gpuMemSize;

        if (Pal::Result::Success !=
            dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
                                                      &imgCreateInfo)) {
          return false;
        }

        Pal::gpusize viewOffset = 0;
        imgCreateInfo.flags.shareable = false;
        imgCreateInfo.imageType = Pal::ImageType::Tex2d;
        imgCreateInfo.extent.width = desc().width_;
        imgCreateInfo.extent.height = desc().height_;
        imgCreateInfo.extent.depth = desc().depth_;
        imgCreateInfo.arraySize = 1;
        imgCreateInfo.usageFlags.shaderRead = true;
        imgCreateInfo.usageFlags.shaderWrite = true;
        imgCreateInfo.swizzledFormat.format = format;
        imgCreateInfo.swizzledFormat.swizzle = channels;
        imgCreateInfo.mipLevels = 1;
        imgCreateInfo.samples = 1;
        imgCreateInfo.fragments = 1;
        imgCreateInfo.tiling = Pal::ImageTiling::Linear;
        imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;

        switch (misc) {
          case 1:  // NV12 format
            switch (layer) {
              case -1:
                break;
              case 0:
                break;
              case 1:
                // Y - plane size to the offset
                // NV12 format. UV is 2 times smaller plane Y
                viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
                imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
                break;
              default:
                LogError("Unknown Interop View Type");
                return false;
            }
            break;
          case 2:  // YV12 format
            switch (layer) {
              case -1:
                break;
              case 0:
                break;
              case 1:
                // Y - plane size to the offset
                // YV12 format. U is 4 times smaller plane than Y
                viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
                imgCreateInfo.rowPitch >>= 1;
                break;
              case 2:
                // Y + U plane sizes to the offest.
                // U plane is 4 times smaller than Y and U == V
                viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
                imgCreateInfo.rowPitch >>= 1;
                break;
              default:
                LogError("Unknown Interop View Type");
                return false;
            }
            imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
            break;
          default:
            LogError("Unknown Interop View Type");
            return false;
        }

        imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
        if (result != Pal::Result::Success) {
          return false;
        }

        char* memImg = new char[imageSize];
        if (memImg != nullptr) {
          result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
          if (result != Pal::Result::Success) {
            delete [] memImg;
            return false;
          }
        }
        result = image_->BindGpuMemory(iMem(), viewOffset);
        if (result != Pal::Result::Success) {
          return false;
        }
        offset_ = static_cast<size_t>(viewOffset);
        hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
        if ((0 == hwSrd_) && (memoryType() != ImageView)) {
          return false;
        }
        Pal::ImageViewInfo viewInfo = {};
        viewInfo.viewType = Pal::ImageViewType::Tex2d;
        viewInfo.pImage = image_;
        viewInfo.swizzledFormat.format = format;
        viewInfo.swizzledFormat.swizzle = channels;
        viewInfo.subresRange = ImgSubresRange;
        dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);

        hwState_[8] = GetHSAILImageFormatType(desc().format_);
        hwState_[9] = GetHSAILImageOrderType(desc().format_);
        hwState_[10] = static_cast<uint32_t>(desc().width_);
        hwState_[11] = 0;  // one extra reserved field in the argument
      }
    } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
      memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
      if (nullptr == memRef_) {
        return false;
      }
      Pal::BufferViewInfo viewInfo = {};
      viewInfo.gpuAddr = vmAddress();
      viewInfo.range = memRef_->iMem()->Desc().size;
      viewInfo.stride = elementSize();
      viewInfo.swizzledFormat.format = format;
      viewInfo.swizzledFormat.swizzle = channels;
      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
        return false;
      }

      dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
      hwState_[8] = GetHSAILImageFormatType(desc().format_);
      hwState_[9] = GetHSAILImageOrderType(desc().format_);
      hwState_[10] = static_cast<uint32_t>(desc().width_);
      hwState_[11] = 0;  // one extra reserved field in the argument
    } else {
      Pal::ExternalImageOpenInfo imgOpenInfo = {};
      Pal::ImageCreateInfo imgCreateInfo = {};
      imgOpenInfo.resourceInfo = openInfo;
      imgOpenInfo.swizzledFormat.format = format;
      imgOpenInfo.swizzledFormat.swizzle = channels;
      imgOpenInfo.usage.shaderRead = true;
      imgOpenInfo.usage.shaderWrite = true;
      memRef_ = GpuMemoryReference::Create(dev(), imgOpenInfo, &imgCreateInfo, &image_);
      if (nullptr == memRef_) {
        return false;
      }

      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
        return false;
      }
      Pal::ImageViewInfo viewInfo = {};
      viewInfo.viewType = Pal::ImageViewType::Tex2d;
      switch (imgCreateInfo.imageType) {
        case Pal::ImageType::Tex3d:
          viewInfo.viewType = Pal::ImageViewType::Tex3d;
          break;
        case Pal::ImageType::Tex1d:
          viewInfo.viewType = Pal::ImageViewType::Tex1d;
          break;
        default:
          break;
      }
      viewInfo.pImage = image_;
      viewInfo.swizzledFormat.format = format;
      viewInfo.swizzledFormat.swizzle = channels;
      if ((type == InteropTextureViewLevel) || (type == InteropTextureViewCube)) {
        ImgSubresRange.startSubres.mipLevel = mipLevel;
        if (type == InteropTextureViewCube) {
          ImgSubresRange.startSubres.arraySlice = layer;
          viewInfo.viewType = Pal::ImageViewType::Tex2d;
        }
      }
      if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
        ImgSubresRange.numSlices = desc_.height_;
      }
      if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
        ImgSubresRange.numSlices = desc_.depth_;
      }
      ImgSubresRange.numMips = desc().mipLevels_;
      viewInfo.subresRange = ImgSubresRange;

      dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);
      //! It's a workaround for D24S8 format, since PAL doesn't support this format
      //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
      if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
          (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
        hwState_[1] &= ~0x3c000000;
        hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
      }
      hwState_[8] = GetHSAILImageFormatType(desc().format_);
      hwState_[9] = GetHSAILImageOrderType(desc().format_);
      hwState_[10] = static_cast<uint32_t>(desc().width_);
      hwState_[11] = 0;  // one extra reserved field in the argument
    }
    return true;
  }

  if (!desc_.buffer_) {
    if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
      if (memoryType() == ImageBuffer) {
        ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
        viewOwner_ = imageBuffer->resource_;
        memRef_ = viewOwner_->memRef_;
        memRef_->retain();
        desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
      } else {
        Pal::GpuMemoryCreateInfo createInfo = {};
        createInfo.size = desc().width_ * elementSize();
        // @todo 64K alignment is too big
        createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
        createInfo.alignment = MaxGpuAlignment;
        createInfo.vaRange = Pal::VaRange::Default;
        createInfo.priority = Pal::GpuMemPriority::Normal;
        memTypeToHeap(&createInfo);
        // createInfo.priority;
        memRef_ =
            dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
        if (nullptr == memRef_) {
          memRef_ = GpuMemoryReference::Create(dev(), createInfo);
          if (nullptr == memRef_) {
            LogError("Failed PAL memory allocation!");
            return false;
          }
        }
      }
      // Check if memory is locked already and restore CPU pointer
      if (memRef_->cpuAddress_ != nullptr) {
        address_ = memRef_->cpuAddress_;
        memRef_->cpuAddress_ = nullptr;
        mapCount_++;
      }
      Pal::BufferViewInfo viewInfo = {};
      viewInfo.gpuAddr = vmAddress();
      viewInfo.range = memRef_->iMem()->Desc().size;
      viewInfo.stride = elementSize();
      viewInfo.swizzledFormat.format = format;
      viewInfo.swizzledFormat.swizzle = channels;
      // viewInfo.channels = channels;
      hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
      if ((0 == hwSrd_) && (memoryType() != ImageView)) {
        return false;
      }

      dev().iDev()->CreateTypedBufferViewSrds(1, &viewInfo, hwState_);
      hwState_[8] = GetHSAILImageFormatType(desc().format_);
      hwState_[9] = GetHSAILImageOrderType(desc().format_);
      hwState_[10] = static_cast<uint32_t>(desc().width_);
      hwState_[11] = 0;  // one extra reserved field in the argument
      return true;
    }

    Pal::ImageViewInfo viewInfo = {};
    Pal::ImageCreateInfo imgCreateInfo = {};
    Pal::GpuMemoryRequirements req = {};
    imgCreateInfo.imageType = Pal::ImageType::Tex2d;
    viewInfo.viewType = Pal::ImageViewType::Tex2d;
    imgCreateInfo.extent.width = desc_.width_;
    imgCreateInfo.extent.height = desc_.height_;
    imgCreateInfo.extent.depth = desc_.depth_;
    imgCreateInfo.arraySize = 1;

    switch (desc_.topology_) {
      case CL_MEM_OBJECT_IMAGE3D:
        imgCreateInfo.imageType = Pal::ImageType::Tex3d;
        viewInfo.viewType = Pal::ImageViewType::Tex3d;
        break;
      case CL_MEM_OBJECT_IMAGE1D:
      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
      case CL_MEM_OBJECT_IMAGE1D_BUFFER:
        imgCreateInfo.imageType = Pal::ImageType::Tex1d;
        viewInfo.viewType = Pal::ImageViewType::Tex1d;
        break;
    }
    if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
      ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
      imgCreateInfo.extent.depth = desc_.height_;
      imgCreateInfo.extent.height = 1;
    }
    if (desc_.topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
      ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.depth_;
    }

    if (memoryType() == ImageView) {
      ImageViewParams* imageView = reinterpret_cast<ImageViewParams*>(params);
      ImgSubresRange.startSubres.mipLevel = imageView->level_;
      desc_.baseLevel_ = imageView->level_;
      ImgSubresRange.startSubres.arraySlice = imageView->layer_;
      viewOwner_ = imageView->resource_;
      image_ = viewOwner_->image_;
      offset_ = viewOwner_->offset_;
    } else if (memoryType() == ImageBuffer) {
      ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
      viewOwner_ = imageBuffer->resource_;
    }
    ImgSubresRange.numMips = desc().mipLevels_;

    if ((memoryType() != ImageView) ||
        //! @todo PAL doesn't allow an SRD view creation with different pixel size
        (elementSize() != viewOwner_->elementSize())) {
      imgCreateInfo.usageFlags.shaderRead = true;
      imgCreateInfo.usageFlags.shaderWrite =
          (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
      imgCreateInfo.swizzledFormat.format = format;
      imgCreateInfo.swizzledFormat.swizzle = channels;
      imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
      imgCreateInfo.samples = 1;
      imgCreateInfo.fragments = 1;
      Pal::ImageTiling tiling = Pal::ImageTiling::Optimal;
      uint32_t rowPitch = 0;

      if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
          (memoryType() == ImageBuffer)) {
        tiling = Pal::ImageTiling::Linear;
      } else if (memoryType() == ImageView) {
        tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
        // Find the new pitch in pixels for the new format
        rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
      }

      if (memoryType() == ImageBuffer) {
        if ((params->owner_ != NULL) && params->owner_->asImage() &&
            (params->owner_->asImage()->getRowPitch() != 0)) {
          rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
        } else {
          rowPitch = desc().width_;
        }
      }
      desc_.pitch_ = rowPitch;
      // Make sure the row pitch is aligned to pixels
      imgCreateInfo.rowPitch =
          elementSize() * amd::alignUp(rowPitch, dev().info().imagePitchAlignment_);
      imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
      imgCreateInfo.tiling = tiling;

      size_t imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
      if (result != Pal::Result::Success) {
        return false;
      }

      char* memImg = new char[imageSize];
      if (memImg != nullptr) {
        result = dev().iDev()->CreateImage(imgCreateInfo, memImg, &image_);
        if (result != Pal::Result::Success) {
          delete [] memImg;
          return false;
        }
      }
      image_->GetGpuMemoryRequirements(&req);
      // createInfo.priority;
    }

    if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
      Pal::GpuMemoryCreateInfo createInfo = {};
      createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
      createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
      createInfo.vaRange = Pal::VaRange::Default;
      createInfo.priority = Pal::GpuMemPriority::Normal;
      memTypeToHeap(&createInfo);

      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
      if (nullptr == memRef_) {
        memRef_ = GpuMemoryReference::Create(dev(), createInfo);
        if (nullptr == memRef_) {
          LogError("Failed PAL memory allocation!");
          return false;
        }
      }
    } else {
      memRef_ = viewOwner_->memRef_;
      memRef_->retain();
      desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
      if (req.size > viewOwner_->iMem()->Desc().size) {
        LogWarning("Image is bigger than the original mem object!");
      }
    }
    // Check if memory is locked already and restore CPU pointer
    if (memRef_->cpuAddress_ != nullptr) {
      address_ = memRef_->cpuAddress_;
      memRef_->cpuAddress_ = nullptr;
      mapCount_++;
    }

    result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
    if (result != Pal::Result::Success) {
      return false;
    }

    hwSrd_ = dev().srds().allocSrdSlot(reinterpret_cast<address*>(&hwState_));
    if ((0 == hwSrd_) && (memoryType() != ImageView)) {
      return false;
    }
    viewInfo.pImage = image_;
    viewInfo.swizzledFormat.format = format;
    viewInfo.swizzledFormat.swizzle = channels;
    viewInfo.subresRange = ImgSubresRange;
    dev().iDev()->CreateImageViewSrds(1, &viewInfo, hwState_);

    hwState_[8] = GetHSAILImageFormatType(desc().format_);
    hwState_[9] = GetHSAILImageOrderType(desc().format_);
    hwState_[10] = static_cast<uint32_t>(desc().width_);
    hwState_[11] = 0;  // one extra reserved field in the argument
    return true;
  }

  if (memoryType() == View) {
    // Save the offset in the global heap
    ViewParams* view = reinterpret_cast<ViewParams*>(params);
    offset_ = view->offset_;

    // Make sure parent was provided
    if (nullptr != view->resource_) {
      viewOwner_ = view->resource_;
      offset_ += viewOwner_->offset();
      if (viewOwner_->data() != nullptr) {
        address_ = viewOwner_->data() + view->offset_;
      }
      pinOffset_ = viewOwner_->pinOffset();
      memRef_ = viewOwner_->memRef_;
      memRef_->retain();
      desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
    } else {
      desc_.type_ = Empty;
    }
    return true;
  }

  if (memoryType() == Pinned) {
    PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
    size_t allocSize = pinned->size_;
    void* pinAddress;
    hostMemRef = pinned->hostMemRef_;
    pinAddress = address_ = hostMemRef->hostMem();
    // assert((allocSize == (desc().width_ * elementSize())) && "Sizes don't match");
    if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
      // Allign offset to 4K boundary (Vista/Win7 limitation)
      char* tmpHost = const_cast<char*>(
          amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));

      // Find the partial size for unaligned copy
      hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);

      pinOffset_ = hostMemOffset;

      pinAddress = tmpHost;

      if (hostMemOffset != 0) {
        allocSize += hostMemOffset;
      }
      allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
      //            hostMemOffset &= ~(0xff);
    } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
      //! @todo: Width has to be aligned for 3D.
      //! Need to be replaced with a compute copy
      // Width aligned by 8 texels
      if (((desc().width_ % 0x8) != 0) ||
          // Pitch aligned by 64 bytes
          (((desc().width_ * elementSize()) % 0x40) != 0)) {
        return false;
      }
    } else {
      //! @todo GSL doesn't support pinning with resAlloc_
      return false;
    }

    if (dev().settings().svmFineGrainSystem_) {
      desc_.SVMRes_ = true;
    }

    // Ensure page alignment
    if ((uint64_t)(pinAddress) & (amd::Os::pageSize() - 1)) {
      return false;
    }
    Pal::PinnedGpuMemoryCreateInfo createInfo = {};
    createInfo.pSysMem = pinAddress;
    createInfo.size = allocSize;
    createInfo.vaRange = Pal::VaRange::Default;
    memRef_ = GpuMemoryReference::Create(dev(), createInfo);
    if (nullptr == memRef_) {
      LogError("Failed PAL memory allocation!");
      pinOffset_ = 0;
      return false;
    }
    desc_.cardMemory_ = false;
    return true;
  }

  Pal::gpusize svmPtr = 0;
  if ((nullptr != params) && (nullptr != params->owner_) &&
      (nullptr != params->owner_->getSvmPtr())) {
    svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
    desc_.SVMRes_ = true;
    svmPtr = (svmPtr == 1) ? 0 : svmPtr;
  }
  if (desc_.SVMRes_) {
    // @todo 64K alignment is too big
    size_t allocSize = amd::alignUp(desc().width_ * elementSize_, MaxGpuAlignment);
    if ((memoryType() == RemoteUSWC) || (memoryType() == Remote)) {
      Pal::SvmGpuMemoryCreateInfo createInfo = {};
      createInfo.isUsedForKernel = desc_.isAllocExecute_;
      createInfo.size = allocSize;
      createInfo.alignment = MaxGpuAlignment;
      if (svmPtr != 0) {
        createInfo.flags.useReservedGpuVa = true;
        createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
      } else {
        createInfo.flags.useReservedGpuVa = false;
        createInfo.pReservedGpuVaOwner = nullptr;
      }
      memRef_ = GpuMemoryReference::Create(dev(), createInfo);
    } else {
      Pal::GpuMemoryCreateInfo createInfo = {};
      createInfo.size = allocSize;
      createInfo.alignment = MaxGpuAlignment;
      createInfo.vaRange = Pal::VaRange::Svm;
      createInfo.priority = Pal::GpuMemPriority::Normal;
      if (svmPtr != 0) {
        createInfo.flags.useReservedGpuVa = true;
        createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
      }
      memTypeToHeap(&createInfo);
      memRef_ = GpuMemoryReference::Create(dev(), createInfo);
    }
    if (nullptr == memRef_) {
      LogError("Failed PAL memory allocation!");
      return false;
    }
    desc_.cardMemory_ = false;
    if ((nullptr != params) && (nullptr != params->owner_) &&
        (nullptr != params->owner_->getSvmPtr())) {
      params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
    }
    return true;
  }

  Pal::GpuMemoryCreateInfo createInfo = {};
  createInfo.size = desc().width_ * elementSize_;
  // @todo 64K alignment is too big
  createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
  createInfo.alignment = MaxGpuAlignment;
  createInfo.vaRange = Pal::VaRange::Default;
  createInfo.priority = Pal::GpuMemPriority::Normal;

  if (memoryType() == ExternalPhysical) {
    cl_bus_address_amd bus_address = (reinterpret_cast<amd::Buffer*>(params->owner_))->busAddress();
    createInfo.surfaceBusAddr = bus_address.surface_bus_address;
    createInfo.markerBusAddr = bus_address.marker_bus_address;
    createInfo.flags.sdiExternal = true;
  } else if (memoryType() == BusAddressable) {
    createInfo.flags.busAddressable = true;
  }

  memTypeToHeap(&createInfo);
  // createInfo.priority;
  memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment);
  if (nullptr == memRef_) {
    memRef_ = GpuMemoryReference::Create(dev(), createInfo);
    if (nullptr == memRef_) {
      LogError("Failed PAL memory allocation!");
      return false;
    }
  }
  // Check if memory is locked already and restore CPU pointer
  if (memRef_->cpuAddress_ != nullptr) {
    address_ = memRef_->cpuAddress_;
    memRef_->cpuAddress_ = nullptr;
    mapCount_++;
  }
  return true;
}

void Resource::free() {
  if (memRef_ == nullptr) {
    return;
  }

  // Sanity check for the map calls
  if ((mapCount_ != 0) && (memoryType() != Remote) &&
      (memoryType() != RemoteUSWC) && (memoryType() != Persistent)) {
    LogWarning("Resource wasn't unlocked, but destroyed!");
  }
  const bool wait =
    (memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);

  if (wait) {
    if (memRef_->gpu_ == nullptr) {
      Device::ScopedLockVgpus lock(dev());
      // Release all memory objects on all virtual GPUs
      for (uint idx = 1; idx < dev().vgpus().size(); ++idx) {
        dev().vgpus()[idx]->waitForEvent(&memRef_->events_[idx]);
      }
    }
    else {
      amd::ScopedLock l(memRef_->gpu_->execution());
      memRef_->gpu_->waitForEvent(&memRef_->events_[memRef_->gpu_->index()]);
    }
  } else {
    // After a view destruction the original object is no longer can be associated with a vgpu
    memRef_->gpu_ = nullptr;
  }

  if (renames_.size() == 0) {
    // Destroy GSL resource
    if (iMem() != 0) {
      if (mapCount_ != 0) {
        if ((memoryType() != Remote) && (memoryType() != RemoteUSWC)) {
          //! @note: This is a workaround for bad applications that
          //! don't unmap memory
          unmap(nullptr);
        } else {
          // Delay CPU address unmap until memRef_ destruction
          assert(memRef_->cpuAddress_ == nullptr && "Memref shouldn't have a valid CPU address");
          memRef_->cpuAddress_ = address_;
        }
      }

      // Add resource to the cache if it's not assigned to a specific queue
      if ((memRef_->gpu_ != nullptr) || !dev().resourceCache().addGpuMemory(&desc_, memRef_)) {
        palFree();
      }
    }
  } else {
    renames_[curRename_]->cpuAddress_ = 0;
    for (size_t i = 0; i < renames_.size(); ++i) {
      memRef_ = renames_[i];
      // Destroy PAL resource
      if (iMem() != 0) {
        palFree();
      }
    }
  }

  // Free SRD for images
  if (!desc().buffer_) {
    dev().srds().freeSrdSlot(hwSrd_);
  }
}

void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const void* data,
                            bool waitForEvent) const {
  GpuEvent event;

  // Write data size bytes to surface
  // size needs to be DWORD aligned
  assert((size & 3) == 0);
  gpu.eventBegin(MainEngine);
  gpu.queue(MainEngine).addCmdMemRef(memRef());
  gpu.iCmd()->CmdUpdateMemory(*iMem(), offset, size, reinterpret_cast<const uint32_t*>(data));
  gpu.eventEnd(MainEngine, event);

  if (waitForEvent) {
    //! @note: We don't really have to mark the allocations as busy
    //! if we are waiting for a transfer

    // Wait for event to complete
    gpu.waitForEvent(&event);
  } else {
    setBusy(gpu, event);
    // Update the global GPU event
    gpu.setGpuEvent(event, false);
  }
}
static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
  if (bytesPerElement == 16) {
    return Pal::ChNumFormat::X32Y32Z32W32_Uint;
  } else if (bytesPerElement == 8) {
    return Pal::ChNumFormat::X32Y32_Uint;
  } else if (bytesPerElement == 4) {
    return Pal::ChNumFormat::X32_Uint;
  } else if (bytesPerElement == 2) {
    return Pal::ChNumFormat::X16_Uint;
  } else {
    return Pal::ChNumFormat::X8_Uint;
  }
}

bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
                                const amd::Coord3D& dstOrigin, const amd::Coord3D& size,
                                Resource& dstResource, bool enableCopyRect, bool flushDMA,
                                uint bytesPerElement) const {
  GpuEvent event;
  EngineType activeEngineID = gpu.engineID_;
  static const bool waitOnBusyEngine = true;

  assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) && "Unsupported configuraiton!");

  size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
  calSrcOrigin[0] = srcOrigin[0] + pinOffset();
  calSrcOrigin[1] = srcOrigin[1];
  calSrcOrigin[2] = srcOrigin[2];
  calDstOrigin[0] = dstOrigin[0] + dstResource.pinOffset();
  calDstOrigin[1] = dstOrigin[1];
  calDstOrigin[2] = dstOrigin[2];
  calSize[0] = size[0];
  calSize[1] = size[1];
  calSize[2] = size[2];

  uint64_t gpuMemoryOffset = 0;
  uint64_t gpuMemoryRowPitch = 0;
  uint64_t imageOffsetx = 0;
  bool img1Darray = false;
  bool img2Darray = false;

  if (desc().buffer_ && !dstResource.desc().buffer_) {
    imageOffsetx = calDstOrigin[0] % dstResource.elementSize();
    gpuMemoryOffset = calSrcOrigin[0] + offset();
    gpuMemoryRowPitch =
        (calSrcOrigin[1]) ? calSrcOrigin[1] : calSize[0] * dstResource.elementSize();
    img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
    img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
  } else if (!desc().buffer_ && dstResource.desc().buffer_) {
    imageOffsetx = calSrcOrigin[0] % elementSize();
    gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
    gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] : calSize[0] * elementSize();
    img1Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
    img2Darray = (desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
  }

  if ((desc().buffer_ && !dstResource.desc().buffer_) ||
      (!desc().buffer_ && dstResource.desc().buffer_)) {
    // sDMA cannot be used for the below conditions
    // Make sure linear pitch in bytes is 4 bytes aligned
    if (((gpuMemoryRowPitch % 4) != 0) ||
        // another DRM restriciton... SI has 4 pixels
        (gpuMemoryOffset % 4 != 0) || (dev().settings().sdamPageFaultWar_ && (imageOffsetx != 0))) {
      return false;
    }
  }

  gpu.engineID_ = SdmaEngine;

  // Wait for the resources, since runtime may use async transfers
  wait(gpu, waitOnBusyEngine);
  dstResource.wait(gpu, waitOnBusyEngine);

  if (gpu.validateSdmaOverlap(*this, dstResource)) {
    // Note: PAL should insert a NOP into the command buffer for synchronization
    gpu.addBarrier();
  }

  Pal::ImageLayout imgLayout = {};
  gpu.eventBegin(gpu.engineID_);
  gpu.queue(gpu.engineID_).addCmdMemRef(memRef());
  gpu.queue(gpu.engineID_).addCmdMemRef(dstResource.memRef());
  if (desc().buffer_ && !dstResource.desc().buffer_) {
    Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, dstResource.desc().baseLevel_, 0};
    Pal::MemoryImageCopyRegion copyRegion = {};
    copyRegion.imageSubres = ImgSubresId;
    copyRegion.imageOffset.x = calDstOrigin[0];
    copyRegion.imageOffset.y = calDstOrigin[1];
    copyRegion.imageOffset.z = calDstOrigin[2];
    copyRegion.imageExtent.width = calSize[0];
    copyRegion.imageExtent.height = calSize[1];
    copyRegion.imageExtent.depth = calSize[2];
    copyRegion.numSlices = 1;
    if (img1Darray) {
      copyRegion.numSlices = copyRegion.imageExtent.height;
      copyRegion.imageExtent.height = 1;
    } else if (img2Darray) {
      copyRegion.numSlices = copyRegion.imageExtent.depth;
      copyRegion.imageExtent.depth = 1;
    }
    copyRegion.gpuMemoryOffset = gpuMemoryOffset;
    copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
    copyRegion.gpuMemoryDepthPitch = (calSrcOrigin[2])
        ? calSrcOrigin[2]
        : copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
    gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_, imgLayout, 1, &copyRegion);
  } else if (!desc().buffer_ && dstResource.desc().buffer_) {
    Pal::MemoryImageCopyRegion copyRegion = {};
    Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, desc().baseLevel_, 0};
    copyRegion.imageSubres = ImgSubresId;
    copyRegion.imageOffset.x = calSrcOrigin[0];
    copyRegion.imageOffset.y = calSrcOrigin[1];
    copyRegion.imageOffset.z = calSrcOrigin[2];
    copyRegion.imageExtent.width = calSize[0];
    copyRegion.imageExtent.height = calSize[1];
    copyRegion.imageExtent.depth = calSize[2];
    copyRegion.numSlices = 1;
    if (img1Darray) {
      copyRegion.numSlices = copyRegion.imageExtent.height;
      copyRegion.imageExtent.height = 1;
    } else if (img2Darray) {
      copyRegion.numSlices = copyRegion.imageExtent.depth;
      copyRegion.imageExtent.depth = 1;
    }
    copyRegion.gpuMemoryOffset = gpuMemoryOffset;
    copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
    copyRegion.gpuMemoryDepthPitch = (calDstOrigin[2])
        ? calDstOrigin[2]
        : copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
    gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, &copyRegion);
  } else {
    if (enableCopyRect) {
      Pal::TypedBufferCopyRegion copyRegion = {};
      Pal::ChannelMapping channels = {Pal::ChannelSwizzle::X, Pal::ChannelSwizzle::Y,
                                      Pal::ChannelSwizzle::Z, Pal::ChannelSwizzle::W};
      copyRegion.srcBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
      copyRegion.srcBuffer.swizzledFormat.swizzle = channels;
      copyRegion.srcBuffer.offset = calSrcOrigin[0] + offset();
      copyRegion.srcBuffer.rowPitch = calSrcOrigin[1];
      copyRegion.srcBuffer.depthPitch = calSrcOrigin[2];
      copyRegion.extent.width = calSize[0] / bytesPerElement;
      copyRegion.extent.height = calSize[1];
      copyRegion.extent.depth = calSize[2];
      copyRegion.dstBuffer.swizzledFormat.format = ChannelFmt(bytesPerElement);
      copyRegion.dstBuffer.swizzledFormat.swizzle = channels;
      copyRegion.dstBuffer.offset = calDstOrigin[0] + dstResource.offset();
      copyRegion.dstBuffer.rowPitch = calDstOrigin[1];
      copyRegion.dstBuffer.depthPitch = calDstOrigin[2];
      gpu.iCmd()->CmdCopyTypedBuffer(*iMem(), *dstResource.iMem(), 1, &copyRegion);
    } else {
      Pal::MemoryCopyRegion copyRegion = {};
      copyRegion.srcOffset = calSrcOrigin[0] + offset();
      copyRegion.dstOffset = calDstOrigin[0] + dstResource.offset();
      copyRegion.copySize = calSize[0];
      gpu.iCmd()->CmdCopyMemory(*iMem(), *dstResource.iMem(), 1, &copyRegion);
    }
  }

  gpu.eventEnd(gpu.engineID_, event);

  // Mark source and destination as busy
  setBusy(gpu, event);
  dstResource.setBusy(gpu, event);

  // Update the global GPU event
  gpu.setGpuEvent(event, flushDMA);

  // Restore the original engine
  gpu.engineID_ = activeEngineID;

  return true;
}

void Resource::setBusy(VirtualGPU& gpu, GpuEvent gpuEvent) const {
  addGpuEvent(gpu, gpuEvent);

  // If current resource is a view, then update the parent event as well
  if (viewOwner_ != nullptr) {
    viewOwner_->setBusy(gpu, gpuEvent);
  }
}

void Resource::wait(VirtualGPU& gpu, bool waitOnBusyEngine) const {
  GpuEvent* gpuEvent = getGpuEvent(gpu);

  // Check if we have to wait unconditionally
  if (!waitOnBusyEngine ||
      // or we have to wait only if another engine was used on this resource
      (gpuEvent->engineId_ != gpu.engineID_)) {
    gpu.waitForEvent(gpuEvent);
  }

  // If current resource is a view and not in the global heap,
  // then wait for the parent event as well
  if (viewOwner_ != nullptr) {
    viewOwner_->wait(gpu, waitOnBusyEngine);
  }
}

bool Resource::hostWrite(VirtualGPU* gpu, const void* hostPtr, const amd::Coord3D& origin,
                         const amd::Coord3D& size, uint flags, size_t rowPitch, size_t slicePitch) {
  void* dst;

  size_t startLayer = origin[2];
  size_t numLayers = size[2];
  if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
    startLayer = origin[1];
    numLayers = size[1];
  }

  // Get physical GPU memmory
  dst = map(gpu, flags, startLayer, numLayers);
  if (nullptr == dst) {
    LogError("Couldn't map GPU memory for host write");
    return false;
  }

  if (1 == desc().dimSize_) {
    size_t copySize = (desc().buffer_) ? size[0] : size[0] * elementSize_;

    // Update the pointer
    dst = static_cast<void*>(static_cast<char*>(dst) + origin[0]);

    // Copy memory
    amd::Os::fastMemcpy(dst, hostPtr, copySize);
  } else {
    size_t dstOffsBase = origin[0] * elementSize_;

    // Make sure we use the right pitch if it's not specified
    if (rowPitch == 0) {
      rowPitch = size[0] * elementSize_;
    }

    // Make sure we use the right slice if it's not specified
    if (slicePitch == 0) {
      slicePitch = size[0] * size[1] * elementSize_;
    }

    // Adjust the destination offset with Y dimension
    dstOffsBase += desc().pitch_ * origin[1] * elementSize_;

    // Adjust the destination offset with Z dimension
    dstOffsBase += desc().slice_ * origin[2] * elementSize_;

    // Copy memory slice by slice
    for (size_t slice = 0; slice < size[2]; ++slice) {
      size_t dstOffs = dstOffsBase + slice * desc().slice_ * elementSize_;
      size_t srcOffs = slice * slicePitch;

      // Copy memory line by line
      for (size_t row = 0; row < size[1]; ++row) {
        // Copy memory
        amd::Os::fastMemcpy((reinterpret_cast<address>(dst) + dstOffs),
                            (reinterpret_cast<const_address>(hostPtr) + srcOffs),
                            size[0] * elementSize_);

        dstOffs += desc().pitch_ * elementSize_;
        srcOffs += rowPitch;
      }
    }
  }

  // Unmap GPU memory
  unmap(gpu);

  return true;
}

bool Resource::hostRead(VirtualGPU* gpu, void* hostPtr, const amd::Coord3D& origin,
                        const amd::Coord3D& size, size_t rowPitch, size_t slicePitch) {
  void* src;

  size_t startLayer = origin[2];
  size_t numLayers = size[2];
  if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
    startLayer = origin[1];
    numLayers = size[1];
  }

  // Get physical GPU memmory
  src = map(gpu, ReadOnly, startLayer, numLayers);
  if (nullptr == src) {
    LogError("Couldn't map GPU memory for host read");
    return false;
  }

  if (1 == desc().dimSize_) {
    size_t copySize = (desc().buffer_) ? size[0] : size[0] * elementSize_;

    // Update the pointer
    src = static_cast<void*>(static_cast<char*>(src) + origin[0]);

    // Copy memory
    amd::Os::fastMemcpy(hostPtr, src, copySize);
  } else {
    size_t srcOffsBase = origin[0] * elementSize_;

    // Make sure we use the right pitch if it's not specified
    if (rowPitch == 0) {
      rowPitch = size[0] * elementSize_;
    }

    // Make sure we use the right slice if it's not specified
    if (slicePitch == 0) {
      slicePitch = size[0] * size[1] * elementSize_;
    }

    // Adjust destination offset with Y dimension
    srcOffsBase += desc().pitch_ * origin[1] * elementSize_;

    // Adjust the destination offset with Z dimension
    srcOffsBase += desc().slice_ * origin[2] * elementSize_;

    // Copy memory line by line
    for (size_t slice = 0; slice < size[2]; ++slice) {
      size_t srcOffs = srcOffsBase + slice * desc().slice_ * elementSize_;
      size_t dstOffs = slice * slicePitch;

      // Copy memory line by line
      for (size_t row = 0; row < size[1]; ++row) {
        // Copy memory
        amd::Os::fastMemcpy((reinterpret_cast<address>(hostPtr) + dstOffs),
                            (reinterpret_cast<const_address>(src) + srcOffs),
                            size[0] * elementSize_);

        srcOffs += desc().pitch_ * elementSize_;
        dstOffs += rowPitch;
      }
    }
  }

  // Unmap GPU memory
  unmap(gpu);

  return true;
}

void* Resource::gpuMemoryMap(size_t* pitch, uint flags, Pal::IGpuMemory* resource) const {
  if (desc_.cardMemory_ && !isPersistentDirectMap()) {
    // @todo remove const cast
    Unimplemented();
    return nullptr;
    //        return const_cast<Device&>(dev()).resMapLocal(*pitch, resource, flags);
  } else {
    amd::ScopedLock lk(dev().lockPAL());
    void* address;
    if (image_ != nullptr) {
      constexpr Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
      Pal::SubresLayout layout;
      image_->GetSubresourceLayout(ImgSubresId, &layout);
      *pitch = layout.rowPitch / elementSize();
    }
    *pitch = desc().width_;
    if (Pal::Result::Success == resource->Map(&address)) {
      return address;
    } else {
      LogError("PAL GpuMemory->Map() failed!");
      return nullptr;
    }
  }
}

void Resource::gpuMemoryUnmap(Pal::IGpuMemory* resource) const {
  if (desc_.cardMemory_ && !isPersistentDirectMap()) {
    // @todo remove const cast
    Unimplemented();
    //        const_cast<Device&>(dev()).resUnmapLocal(resource);
  } else {
    Pal::Result result = resource->Unmap();
    if (Pal::Result::Success != result) {
      LogError("PAL GpuMemory->Unmap() failed!");
    }
  }
}

bool Resource::glAcquire() {
  bool retVal = true;
  if (desc().type_ == OGLInterop) {
    retVal = dev().resGLAcquire(glPlatformContext_, glInteropMbRes_, glType_);
  }
  return retVal;
}

bool Resource::glRelease() {
  bool retVal = true;
  if (desc().type_ == OGLInterop) {
    retVal = dev().resGLRelease(glPlatformContext_, glInteropMbRes_, glType_);
  }
  return retVal;
}

void Resource::addGpuEvent(const VirtualGPU& gpu, GpuEvent event) const {
  uint idx = gpu.index();
  assert(idx < memRef_->events_.size());
  memRef_->events_[idx] = event;
}

GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
  uint idx = gpu.index();
  assert((idx < memRef_->events_.size()) && "Undeclared queue access!");
  return &memRef_->events_[idx];
}

void Resource::palFree() const {
  if (desc().type_ == OGLInterop) {
    amd::ScopedLock lk(dev().lockPAL());
    dev().resGLFree(glPlatformContext_, glInteropMbRes_, glType_);
  }
  memRef_->release();
}

bool Resource::isMemoryType(MemoryType memType) const {
  if (memoryType() == memType) {
    return true;
  } else if (memoryType() == View) {
    return viewOwner_->isMemoryType(memType);
  }

  return false;
}

bool Resource::isPersistentDirectMap() const {
  bool directMap =
      ((memoryType() == Resource::Persistent) && (desc().dimSize_ < 3) && !desc().imageArray_);

  // If direct map is possible, then validate it with the current tiling
  if (directMap && desc().tiled_) {
    //!@note IOL for Linux doesn't support tiling aperture
    // and runtime doesn't force linear images in persistent
    directMap = IS_WINDOWS && !dev().settings().linearPersistentImage_;
  }

  return directMap;
}

void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers) {
  if (isMemoryType(Pinned)) {
    // Check if we have to wait
    if (!(flags & NoWait)) {
      if (gpu != nullptr) {
        wait(*gpu);
      }
    }
    return address_;
  }

  if (flags & ReadOnly) {
    assert(!(flags & Discard) && "We can't use lock discard with read only!");
  }

  if (flags & WriteOnly) {
  }

  // Check if use map discard
  if (flags & Discard) {
    if (gpu != nullptr) {
      // If we use a new renamed allocation, then skip the wait
      if (rename(*gpu)) {
        flags |= NoWait;
      }
    }
  }

  // Check if we have to wait
  if (!(flags & NoWait)) {
    if (gpu != nullptr) {
      wait(*gpu);
    }
  }

  // Check if memory wasn't mapped yet
  if (++mapCount_ == 1) {
    if ((desc().dimSize_ == 3) || desc().imageArray_ ||
        ((desc().type_ == ImageView) && viewOwner_->mipMapped())) {
      // Save map info for multilayer map/unmap
      startLayer_ = startLayer;
      numLayers_ = numLayers;
      mapFlags_ = flags;
      // Map with layers
      address_ = mapLayers(gpu, flags);
    } else {
      // Map current resource
      address_ = gpuMemoryMap(&desc_.pitch_, flags, iMem());
      if (address_ == nullptr) {
        LogError("cal::ResMap failed!");
        --mapCount_;
        return nullptr;
      }
    }
  }

  //! \note the atomic operation with counter doesn't
  // guarantee that the address will be valid,
  // since PAL could still process the first map
  if (address_ == nullptr) {
    for (uint i = 0; address_ == NULL && i < 10; ++i) {
      amd::Os::sleep(1);
    }
    assert((address_ != nullptr) && "Multiple maps failed!");
  }

  return address_;
}

void* Resource::mapLayers(VirtualGPU* gpu, uint flags) {
  Unimplemented();
  return nullptr;
}

void Resource::unmap(VirtualGPU* gpu) {
  if (isMemoryType(Pinned)) {
    return;
  }

  // Decrement map counter
  int count = --mapCount_;

  // Check if it's the last unmap
  if (count == 0) {
    if ((desc().dimSize_ == 3) || desc().imageArray_ ||
        ((desc().type_ == ImageView) && viewOwner_->mipMapped())) {
      // Unmap layers
      unmapLayers(gpu);
    } else {
      // Unmap current resource
      gpuMemoryUnmap(iMem());
    }
    address_ = nullptr;
  } else if (count < 0) {
    LogError("dev().serialCalResUnmap failed!");
    ++mapCount_;
    return;
  }
}

void Resource::unmapLayers(VirtualGPU* gpu) {
  Unimplemented();
}

void Resource::setActiveRename(VirtualGPU& gpu, GpuMemoryReference* rename) {
  // Copy the unique GSL data
  memRef_ = rename;
  address_ = rename->cpuAddress_;
}

bool Resource::getActiveRename(VirtualGPU& gpu, GpuMemoryReference** rename) {
  // Copy the old data to the rename descriptor
  *rename = memRef_;
  return true;
}

bool Resource::rename(VirtualGPU& gpu, bool force) {
  GpuEvent* gpuEvent = getGpuEvent(gpu);
  if (!gpuEvent->isValid() && !force) {
    return true;
  }

  bool useNext = false;
  uint resSize = desc().width_ * ((desc().height_) ? desc().height_ : 1) * elementSize_;

  // Rename will work with real GSL resources
  if (((memoryType() != Local) && (memoryType() != Persistent) && (memoryType() != Remote) &&
       (memoryType() != RemoteUSWC)) ||
      (dev().settings().maxRenames_ == 0)) {
    return false;
  }

  // If the resource for renaming is too big, then lets check the current status first
  // at the cost of an extra flush
  if (resSize >= (dev().settings().maxRenameSize_ / dev().settings().maxRenames_)) {
    if (gpu.isDone(gpuEvent)) {
      return true;
    }
  }

  // Save the first
  if (renames_.size() == 0) {
    GpuMemoryReference* rename;
    if (mapCount_ > 0) {
      memRef_->cpuAddress_ = address_;
    }
    if (!getActiveRename(gpu, &rename)) {
      return false;
    }

    curRename_ = renames_.size();
    renames_.push_back(rename);
  }

  // Can we use a new rename?
  if ((renames_.size() <= dev().settings().maxRenames_) &&
      ((renames_.size() * resSize) <= dev().settings().maxRenameSize_)) {
    GpuMemoryReference* rename;

    // Create a new GSL allocation
    if (create(memoryType())) {
      if (mapCount_ > 0) {
        assert(!desc().cardMemory_ && "Unsupported memory type!");
        memRef_->cpuAddress_ = gpuMemoryMap(&desc_.pitch_, 0, iMem());
        if (memRef_->cpuAddress_ == nullptr) {
          LogError("gslMap fails on rename!");
        }
        address_ = memRef_->cpuAddress_;
      }
      if (getActiveRename(gpu, &rename)) {
        curRename_ = renames_.size();
        renames_.push_back(rename);
      } else {
        memRef_->release();
        useNext = true;
      }
    } else {
      useNext = true;
    }
  } else {
    useNext = true;
  }

  if (useNext) {
    // Get the last submitted
    curRename_++;
    if (curRename_ >= renames_.size()) {
      curRename_ = 0;
    }
    setActiveRename(gpu, renames_[curRename_]);
    return false;
  }

  return true;
}

void Resource::warmUpRenames(VirtualGPU& gpu) {
  // Make sure OCL touches every command buffer in the queue to avoid delays on the first submit
  uint flush = dev().settings().maxRenames_ / VirtualGPU::Queue::MaxCmdBuffers;
  flush = (flush == 0) ? 1 : flush;
  for (uint i = 1; i <= dev().settings().maxRenames_; ++i) {
    uint dummy = 0;
    const bool Wait = (i % flush == 0) ? true : false;
    // Write 0 for the buffer paging by VidMM
    writeRawData(gpu, 0, sizeof(dummy), &dummy, Wait);
    const bool Force = true;
    rename(gpu, Force);
  }
}

ResourceCache::~ResourceCache() { free(); }

//! \note the cache works in FILO mode
bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference* ref) {
  bool result = false;
  size_t size = ref->iMem()->Desc().size;

  // Make sure current allocation isn't bigger than cache
  if (((desc->type_ == Resource::Local) || (desc->type_ == Resource::Persistent) ||
       (desc->type_ == Resource::Remote) || (desc->type_ == Resource::RemoteUSWC)) &&
      (size < cacheSizeLimit_) && !desc->SVMRes_) {
    amd::ScopedLock l(&lockCacheOps_);
    // Validate the cache size limit. Loop until we have enough space
    while ((cacheSize_ + size) > cacheSizeLimit_) {
      removeLast();
    }
    Resource::Descriptor* descCached = new Resource::Descriptor;
    if (descCached != nullptr) {
      // Copy the original desc to the cached version
      memcpy(descCached, desc, sizeof(Resource::Descriptor));

      // Add the current resource to the cache
      resCache_.push_front(std::make_pair(descCached, ref));
      ref->gpu_ = nullptr;
      cacheSize_ += size;
      result = true;
    }
  }

  return result;
}

GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal::gpusize size,
                                                 Pal::gpusize alignment) {
  amd::ScopedLock l(&lockCacheOps_);
  GpuMemoryReference* ref = nullptr;

  // Early exit if resource is too big
  if (size >= cacheSizeLimit_ || desc->SVMRes_) {
    //! \note we may need to free the cache here to reduce memory pressure
    return ref;
  }

  // Serach the right resource through the cache list
  for (const auto& it : resCache_) {
    Resource::Descriptor* entry = it.first;
    size_t sizeRes = it.second->iMem()->Desc().size;
    // Find if we can reuse this entry
    if ((entry->type_ == desc->type_) && (entry->flags_ == desc->flags_) && (size <= sizeRes) &&
        (size > (sizeRes >> 2)) && ((it.second->iMem()->Desc().gpuVirtAddr % alignment) == 0) &&
        (entry->isAllocExecute_ == desc->isAllocExecute_)) {
      ref = it.second;
      delete it.first;
      // Remove the found etry from the cache
      resCache_.remove(it);
      cacheSize_ -= sizeRes;
      break;
    }
  }

  return ref;
}

bool ResourceCache::free(size_t minCacheEntries) {
  amd::ScopedLock l(&lockCacheOps_);
  bool result = false;

  if (minCacheEntries < resCache_.size()) {
    if (static_cast<int>(cacheSize_) > 0) {
      result = true;
    }
    // Clear the cache
    while (static_cast<int>(cacheSize_) > 0) {
      removeLast();
    }
    CondLog((cacheSize_ != 0), "Incorrect size for cache release!");
  }
  return result;
}

void ResourceCache::removeLast() {
  std::pair<Resource::Descriptor*, GpuMemoryReference*> entry;
  entry = resCache_.back();
  resCache_.pop_back();

  size_t size = entry.second->iMem()->Desc().size;

  // Delete Descriptor
  delete entry.first;

  // Destroy GSL resource
  entry.second->release();
  cacheSize_ -= size;
}

}  // namespace pal