P4 to Git Change 1191682 by gandryey@gera-dev-w7 on 2015/09/17 11:14:23

ECR #304775 - Remove EG/NI support - Remove the heap emulation (non-vm) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#77 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#12 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#31 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#186 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#253 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#118 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#523 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#148 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.cpp#28 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.hpp#16 delete ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#116 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#122 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#48 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#227 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#83 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#329 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#94 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#379 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#143 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.h#57 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.cpp#38 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsasettings.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#242 edit
2015-09-17 11:24:31 -04:00
@@ -10,8 +10,6 @@ namespace cpu {
 bool
 Settings::create()
 {
-    largeHostMemAlloc_ = true;
-
    // This code is temporary until cl_khr_fp64 is unconditional
    if (flagIsDefault(CL_KHR_FP64) || CL_KHR_FP64) {
        enableExtension(ClKhrFp64);
@@ -517,7 +517,6 @@ Settings::Settings()
    extensions_          = 0;
    partialDispatch_     = false;
    supportRA_           = true;
-    largeHostMemAlloc_   = false;
    customHostAllocator_ = false;
    waitCommand_         = AMD_OCL_WAIT_COMMAND;
    supportDepthsRGB_    = false;
@@ -577,13 +577,12 @@ public:
        struct {
            uint    partialDispatch_: 1;    //!< Enables partial dispatch
            uint    supportRA_: 1;          //!< Support RA channel order format
-            uint    largeHostMemAlloc_: 1;  //!< Allow large host mem allocations (> maxSingleAlloc)
            uint    waitCommand_: 1;        //!< Enables a wait for every submitted command
            uint    customHostAllocator_: 1;//!< True if device has custom host allocator
                                            //  that replaces generic OS allocation routines
            uint    supportDepthsRGB_: 1;   //!< Support DEPTH and sRGB channel order format
            uint    enableHwDebug_: 1;      //!< Enable HW debug support
-            uint    reserved_: 25;
+            uint    reserved_: 26;
        };
        uint    value_;
    };
@@ -1955,20 +1955,9 @@ KernelBlitManager::copyBufferRect(

    // Fall into the CAL path for rejected transfers
    if (setup_.disableCopyBufferRect_ ||
-        (gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) ||
-        (!dev().heap()->isVirtual() &&
-         ((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))) {
-        // Copy data with CAL (no VM mode only)
-        if (gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
-            result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
-                srcRectIn, dstRectIn, sizeIn, entire);
-        }
-
-        if ((!dev().heap()->isVirtual() && ((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))
-                && !result) {
-            result = HostBlitManager::copyBufferRect(srcMemory, dstMemory,
-                srcRectIn, dstRectIn, sizeIn, entire);
-        }
+        gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
+        result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
+            srcRectIn, dstRectIn, sizeIn, entire);

        if (result) {
            synchronize();
@@ -2395,11 +2384,9 @@ KernelBlitManager::copyBuffer(
 {
    amd::ScopedLock k(lockXferOps_);
    bool    result = false;
-    bool    forceCal = !dev().heap()->isVirtual() &&
-        ((gpuMem(srcMemory).hb() == NULL) || (gpuMem(dstMemory).hb() == NULL));

-    if ((!forceCal && !gpuMem(srcMemory).isHostMemDirectAccess() &&
-         !gpuMem(dstMemory).isHostMemDirectAccess())) {
+    if (!gpuMem(srcMemory).isHostMemDirectAccess() &&
+        !gpuMem(dstMemory).isHostMemDirectAccess()) {
        uint    blitType = BlitCopyBuffer;
        size_t  dim = 1;
        size_t  globalWorkOffset[3] = { 0, 0, 0 };
@@ -2489,7 +2476,6 @@ KernelBlitManager::copyBuffer(
        result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters);
    }
    else {
-        // Copy data with CAL (no VM mode only)
        result = DmaBlitManager::copyBuffer(
            srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
    }
@@ -173,7 +173,7 @@ NullDevice::create(CALtarget target)
    calAttr.localRAM = 512;

    // Fill the device info structure
-    fillDeviceInfo(calAttr, memInfo, 4096, 1, true);
+    fillDeviceInfo(calAttr, memInfo, 4096, 1);

    if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
        // Runtime doesn't know what local size could be on the real board
@@ -225,9 +225,7 @@ void NullDevice::fillDeviceInfo(
    const CALdeviceattribs& calAttr,
    const gslMemInfo& memInfo,
    size_t  maxTextureSize,
-    uint    numComputeRings,
-    bool    isVirtualMode
-    )
+    uint    numComputeRings)
 {
    info_.type_     = CL_DEVICE_TYPE_GPU;
    info_.vendorId_ = 0x1002;
@@ -276,56 +274,45 @@ void NullDevice::fillDeviceInfo(
        info_.globalMemCacheType_   = CL_NONE;
    }

-    if (isVirtualMode) {
 #if defined(ATI_OS_LINUX)
-        info_.globalMemSize_   =
-            (static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
-            // globalMemSize is the actual available size for app on Linux
-            // Because Linux base driver doesn't support paging
-            static_cast<cl_ulong>(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
+    info_.globalMemSize_   =
+        (static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
+        // globalMemSize is the actual available size for app on Linux
+        // Because Linux base driver doesn't support paging
+        static_cast<cl_ulong>(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
 #else
-        info_.globalMemSize_   =
-            (static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
-            static_cast<cl_ulong>(calAttr.localRAM) / 100u) * Mi;
+    info_.globalMemSize_   =
+        (static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
+        static_cast<cl_ulong>(calAttr.localRAM) / 100u) * Mi;
 #endif
-        if (settings().apuSystem_) {
-            info_.globalMemSize_   +=
-                (static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
-        }
+    if (settings().apuSystem_) {
+        info_.globalMemSize_   +=
+            (static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
+    }

-        // We try to calculate the largest available memory size from
-        // the largest available block in either heap.  In theory this
-        // should be the size we can actually allocate at application
-        // start.  Note that it may not be a guarantee still as the
-        // application progresses.
-        info_.maxMemAllocSize_ = std::max(
-            cl_ulong(memInfo.cardLargestFreeBlockBytes),
-            cl_ulong(memInfo.cardExtLargestFreeBlockBytes));
+    // We try to calculate the largest available memory size from
+    // the largest available block in either heap.  In theory this
+    // should be the size we can actually allocate at application
+    // start.  Note that it may not be a guarantee still as the
+    // application progresses.
+    info_.maxMemAllocSize_ = std::max(
+        cl_ulong(memInfo.cardLargestFreeBlockBytes),
+        cl_ulong(memInfo.cardExtLargestFreeBlockBytes));

 #if defined(ATI_OS_WIN)
-        if (settings().apuSystem_) {
-            info_.maxMemAllocSize_ = std::max(
-                (static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
-                info_.maxMemAllocSize_);
-        }
+    if (settings().apuSystem_) {
+        info_.maxMemAllocSize_ = std::max(
+            (static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
+            info_.maxMemAllocSize_);
+    }
 #endif
-        info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
-            std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);
+    info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
+        std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);

-        //! \note Force max single allocation size.
-        //! 4GB limit for the blit kernels and 64 bit optimizations.
-        info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
-                static_cast<cl_ulong>(settings().maxAllocSize_));
-    }
-    else {
-        uint    maxHeapSize = flagIsDefault(GPU_MAX_HEAP_SIZE) ? 50 : GPU_MAX_HEAP_SIZE;
-        info_.globalMemSize_   = (std::min(maxHeapSize, 100u)
-            * calAttr.localRAM / 100u) * Mi;
-
-        uint    maxAllocSize = flagIsDefault(GPU_SINGLE_ALLOC_PERCENT) ? 25 : GPU_SINGLE_ALLOC_PERCENT;
-        info_.maxMemAllocSize_ = cl_ulong(info_.globalMemSize_ *
-            std::min(maxAllocSize, 100u) / 100u);
-    }
+    //! \note Force max single allocation size.
+    //! 4GB limit for the blit kernels and 64 bit optimizations.
+    info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
+            static_cast<cl_ulong>(settings().maxAllocSize_));

    if (info_.maxMemAllocSize_ < cl_ulong(128 * Mi)) {
        LogError("We are unable to get a heap large enough to support the OpenCL minimum "\
@@ -377,7 +364,7 @@ void NullDevice::fillDeviceInfo(
        info_.imagePitchAlignment_       = 256; // XXX: 256 pixel pitch alignment for now
        info_.imageBaseAddressAlignment_ = 256; // XXX: 256 byte base address alignment for now

-        info_.bufferFromImageSupport_ = (isVirtualMode) ? CL_TRUE : CL_FALSE;
+        info_.bufferFromImageSupport_ = CL_TRUE;
    }

    info_.errorCorrectionSupport_    = CL_FALSE;
@@ -404,7 +391,7 @@ void NullDevice::fillDeviceInfo(
    ::strcpy(info_.name_, hwInfo()->targetName_);
    ::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
    ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1,
-         AMD_BUILD_STRING "%s", (isVirtualMode) ? " (VM)": "");
+         AMD_BUILD_STRING "%s", " (VM)");

    info_.profile_ = "FULL_PROFILE";
    if (settings().oclVersion_ == OpenCL20) {
@@ -508,6 +495,25 @@ void NullDevice::fillDeviceInfo(
    }
 }

+bool
+Device::Heap::create(Device& device)
+{
+    // Create a new GPU resource
+    resource_ = new Resource(device, 0, CM_SURF_FMT_R32I);
+    if (resource_ == NULL) {
+        return false;
+    }
+
+    if (!resource_->create(Resource::Heap)) {
+        return false;
+    }
+
+    if (!device.settings().hsail_) {
+        baseAddress_ = resource_->gslResource()->getSurfaceAddress();
+    }
+    return true;
+}
+
 void
 Device::Engines::create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings)
 {
@@ -670,7 +676,7 @@ Device::Device()
    , CALGSLDevice()
    , numOfVgpus_(0)
    , context_(NULL)
-    , heap_(NULL)
+    , heap_()
    , dummyPage_(NULL)
    , lockAsyncOps_(NULL)
    , lockAsyncOpsForInitHeap_(NULL)
@@ -731,11 +737,6 @@ Device::~Device()
        dummyPage_->release();
    }

-    // Destroy global heap
-    if (heap_ != NULL) {
-        delete heap_;
-    }
-
    // Destroy resource cache
    delete resourceCache_;

@@ -837,26 +838,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)

    size_t  resourceCacheSize = settings().resourceCacheSize_;

-    // Allocate heap
-    heapSize_ = settings().heapSize_;
-
-    // Check if BE supports virtual addressing mode
-    if (isVmMode()) {
-        heap_ = new VirtualHeap(*this);
-        gpuSettings->largeHostMemAlloc_ = (NULL != heap_) ? true : false;
-    }
-
-    // If virtual heap allocation failed, then try static allocation
-    if (heap_ == NULL) {
-        heap_ = new Heap(*this);
-        // Disable resource cache if VM is disable
-        resourceCacheSize = 0;
-        if (NULL == heap_) {
-            return false;
-        }
-    }
-
-
 #ifdef DEBUG
    std::stringstream  message;
    if (settings().remoteAlloc_) {
@@ -865,10 +846,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
    else {
        message << "Using *Local* memory";
    }
-    if (!heap()->isVirtual()) {
-        message << ": " << settings().heapSize_ / Mi << "MB, growth: " <<  \
-            settings().heapSizeGrowth_ / Mi << "MB";
-    }
+
    message << std::endl;
    LogInfo(message.str().c_str());
 #endif // DEBUG
@@ -883,8 +861,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
    // Fill the device info structure
    fillDeviceInfo(getAttribs(), getMemInfo(),
        static_cast<size_t>(getMaxTextureSize()),
-        engines().numComputeRings(), heap()->isVirtual()
-    );
+        engines().numComputeRings());

    if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
        if (NULL == hsaCompiler_) {
@@ -955,7 +932,7 @@ Device::initializeHeapResources()
        }

        // Complete initialization of the heap and other buffers
-        if ((heap_ == NULL) || !heap_->create(heapSize_, settings().remoteAlloc_)) {
+        if (!heap_.create(*this)) {
            LogError("Failed GPU heap creation");
            return false;
        }
@@ -987,7 +964,7 @@ Device::initializeHeapResources()
                    type = Resource::RemoteUSWC;
                }
                xferWrite_ = new XferBuffers(*this, type,
-                    amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
+                    amd::alignUp(settings().stagedXferSize_, 4 * Ki));
                if ((xferWrite_ == NULL) || !xferWrite_->create()) {
                    LogError("Couldn't allocate transfer buffer objects for read");
                    return false;
@@ -997,7 +974,7 @@ Device::initializeHeapResources()
            // Initialize staged read buffers
            if (settings().stagedXferRead_) {
                xferRead_ = new XferBuffers(*this, Resource::Remote,
-                    amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
+                    amd::alignUp(settings().stagedXferSize_, 4 * Ki));
                if ((xferRead_ == NULL) || !xferRead_->create()) {
                    LogError("Couldn't allocate transfer buffer objects for write");
                    return false;
@@ -1086,52 +1063,6 @@ Device::createVirtualDevice(
    }
 }

-bool
-Device::reallocHeap(size_t size, bool remoteAlloc)
-{
-    size_t  heapSize    =  heapSize_ + ((size != 0) ?
-        amd::alignUp(size, settings().heapSizeGrowth_) : 0);
-    Heap*   oldHeap     = heap_;
-    // Maximum heap limit size = reported size + internal memory
-    size_t  maxHeapLimit = static_cast<size_t>(info().globalMemSize_) +
-        // an extra 10MB for the alignments of allocations,
-        // since the conformance test doesn't expect any
-        10 * Mi;
-
-    if ((settings().heapSizeGrowth_ == 0) ||
-        // Allow the heap growth up to the global memory limit
-        (heapSize_ + size > maxHeapLimit)) {
-        return false;
-    }
-    heapSize = std::min(maxHeapLimit, heapSize);
-
-    heap_ = new Heap(*this);
-
-    // Make sure we have allocated a new global heap
-    if (NULL == heap_) {
-        heap_ = oldHeap;
-        return false;
-    }
-
-    if (!heap_->create(heapSize, remoteAlloc)) {
-        delete heap_;
-        heap_ = oldHeap;
-        return false;
-    }
-
-    // Copy the old heap to the new one
-    if (!oldHeap->copyTo(heap_)) {
-        delete heap_;
-        heap_ = oldHeap;
-        return false;
-    }
-
-    delete oldHeap;
-    heapSize_ = heapSize;
-
-    return true;
-}
-
 device::Program*
 Device::createProgram(int oclVer)
 {
@@ -1288,65 +1219,6 @@ Device::tearDown()
    }
 }

-//! @note This funciton must be lock protected from a caller
-HeapBlock*
-Device::allocHeapBlock(size_t size) const
-{
-    HeapBlock* hb = NULL;
-
-    // Allocate the underlying heap block
-    hb = heap_->alloc(size);
-
-    // Virtual heap should never fail allocation
-    if ((hb == NULL) && (!heap_->isVirtual())) {
-        // Queues can't process commands,
-        // while the global heap reallocation occurs.
-        // So stall all queues and then reallocate the global heap
-        ScopedLockVgpus lock(*this);
-
-        // Wait for idle
-        for (uint idx = 0; idx < vgpus().size(); ++idx) {
-            vgpus()[idx]->waitAllEngines();
-        }
-
-        // Acount memory alignment for the new allocation
-        size_t  extraSpace = heap_->granularityB();
-        if (size >= heap_->freeSpace()) {
-            // Required extra space = requested size - free space
-            extraSpace += size - heap_->freeSpace();
-        }
-
-        //! @note the const cast here looks bad, but the device object
-        //  is a lock protected above. The rest of the code
-        //  doesn't change the device object.
-        //  So the const methods can be safly used everywhere else.
-        //  In general we should avoid changing the device object after initialization
-
-        // Try to reallocate the heap with the same memory type
-        if (const_cast<Device*>(this)->reallocHeap(extraSpace, settings().remoteAlloc_)) {
-            hb = heap_->alloc(size);
-        }
-
-        if (hb == NULL) {
-            // Use reversed memory type as a temporary storage
-            bool    remoteAlloc = settings().remoteAlloc_ ^ true;
-
-            // Try to reallocate the heap
-            if (const_cast<Device*>(this)->reallocHeap(extraSpace, remoteAlloc)) {
-                // Back to the default location of the global heap
-                remoteAlloc ^= true;
-                if (!const_cast<Device*>(this)->reallocHeap(0, remoteAlloc)) {
-                    LogWarning("New memory type for the \
-                        global heap after reallocation!");
-                }
-                hb = heap_->alloc(size);
-            }
-        }
-    }
-
-    return hb;
-}
-
 gpu::Memory*
 Device::getGpuMemory(amd::Memory* mem) const
 {
@@ -1392,99 +1264,20 @@ Device::createScratchBuffer(size_t size) const
 {
    Memory* gpuMemory = NULL;

-    // Use virtual heap allocation
-    if (heap()->isVirtual()) {
-        // Create a memory object
-        gpuMemory = new gpu::Memory(*this, size);
-        if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
-            delete gpuMemory;
-            gpuMemory = NULL;
-        }
-    }
-    else {
-        // We have to lock the heap block allocation,
-        // so possible reallocation won't occur twice or
-        // another thread could destroy a heap block,
-        // while we didn't finish allocation
-        amd::ScopedLock k(lockAsyncOps());
-
-        HeapBlock* hb = allocHeapBlock(size);
-        if (hb != NULL) {
-            // wrap it
-            gpuMemory = new gpu::Memory(*this, *hb);
-
-            // Create resource
-            if (NULL != gpuMemory) {
-                Resource::ViewParams   params;
-                params.offset_  = hb->offset_;
-                params.size_    = hb->size_;
-                params.resource_ = &(globalMem());
-                params.memory_  = NULL;
-                if (!gpuMemory->create(Resource::View, &params)) {
-                    delete gpuMemory;
-                    gpuMemory = NULL;
-                }
-            }
-        }
-    }
-
-    return gpuMemory;
-}
-
-gpu::Memory*
-Device::createBufferFromHeap(amd::Memory& owner) const
-{
-    size_t  size = owner.getSize();
-    gpu::Memory* gpuMemory;
-
-    // We have to lock the heap block allocation,
-    // so possible reallocation won't occur twice or
-    // another thread could destroy a heap block,
-    // while we didn't finish allocation
-    amd::ScopedLock k(lockAsyncOps());
-
-    HeapBlock* hb = allocHeapBlock(size);
-    if (hb == NULL) {
-        LogError("We don't have enough video memory!");
-        return NULL;
-    }
-
    // Create a memory object
-    gpuMemory = new gpu::Memory(*this, owner, hb);
-    if (NULL == gpuMemory) {
-        hb->setMemory(NULL);
-        hb->free();
-        return NULL;
-    }
-
-    Resource::ViewParams params;
-    params.owner_       = &owner;
-    params.offset_      = hb->offset_;
-    params.size_        = hb->size_;
-    params.resource_    = &(globalMem());
-    params.memory_      = NULL;
-
-    if (!gpuMemory->create(Resource::View, &params)) {
+    gpuMemory = new gpu::Memory(*this, size);
+    if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
        delete gpuMemory;
-        return NULL;
+        gpuMemory = NULL;
    }

-    // Check if owner is interop memory
-    if (owner.isInterop()) {
-        if (!gpuMemory->createInterop(Memory::InteropHwEmulation)) {
-            LogError("HW interop creation failed!");
-            delete gpuMemory;
-            return NULL;
-        }
-    }
    return gpuMemory;
 }

 gpu::Memory*
 Device::createBuffer(
    amd::Memory&    owner,
-    bool            directAccess,
-    bool            bufferAlloc) const
+    bool            directAccess) const
 {
    size_t  size = owner.getSize();
    gpu::Memory* gpuMemory;
@@ -1504,39 +1297,7 @@ Device::createBuffer(
            return NULL;
        }

-        if (!heap()->isVirtual()) {
-            bool    uhpAlloc =
-                (owner.parent()->getMemFlags() & CL_MEM_USE_HOST_PTR) ? true : false;
-
-            if (owner.parent()->getType() != CL_MEM_OBJECT_IMAGE1D_BUFFER) {
-                //! \note This extra line is necessary to make sure that subbuffer
-                //! allocation is a synch operation,
-                //! due to a possible realloc of heap(no VM) or parent(UHP)
-                amd::ScopedLock k(lockAsyncOps());
-
-                //! @note: For now make sure the parent is allocated in the global heap
-                //! or if it's the UHP optimization for prepinned memory
-                if (((gpuParent->hb() == NULL) || uhpAlloc) &&
-                    !owner.parent()->reallocedDeviceMemory(this)) {
-                    if (reallocMemory(*owner.parent())) {
-                        gpuParent = getGpuMemory(owner.parent());
-                    }
-                    else {
-                        LogError("Can't reallocate the owner object for subbuffer allocation");
-                        return NULL;
-                    }
-                }
-
-                return gpuParent->createBufferView(owner);
-            }
-            else {
-                gpuParent = getGpuMemory(owner.parent()->parent());
-                return gpuParent->createBufferView(*owner.parent()->parent());
-            }
-        }
-        else {
-            return gpuParent->createBufferView(owner);
-        }
+        return gpuParent->createBufferView(owner);
    }

    Resource::MemoryType    type = (owner.forceSysMemAlloc() || (owner.getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) ?
@@ -1550,138 +1311,123 @@ Device::createBuffer(
    }

    // Use direct access if it's possible
-    if (bufferAlloc || (type == Resource::Remote)) {
-        bool    forceHeapAlloc = false;
-        bool    remoteAlloc = false;
-        // Internal means VirtualDevice!=NULL
-        bool    internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
-              (owner.getVirtualDevice() != NULL)) ? true : false;
+    bool    remoteAlloc = false;
+    // Internal means VirtualDevice!=NULL
+    bool    internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
+            (owner.getVirtualDevice() != NULL)) ? true : false;

-        // Create a memory object
-        gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
-        if (NULL == gpuMemory) {
-            return NULL;
-        }
+    // Create a memory object
+    gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
+    if (NULL == gpuMemory) {
+        return NULL;
+    }

-        // Check if owner is interop memory
-        if (owner.isInterop()) {
-            result = gpuMemory->createInterop(Memory::InteropDirectAccess);
-        }
-        else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
-            // Attempt to allocate from persistent heap
-            result = gpuMemory->create(Resource::Persistent);
-        }
-        else if (directAccess || (type == Resource::Remote)) {
-            // Check for system memory allocations
-            if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
-                || (settings().remoteAlloc_)) {
-                // Allocate remote memory if AHP allocation and context has just 1 device
-                if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
-                    (owner.getContext().devices().size() == 1)) {
-                    if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
-                        CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
-                        // GPU will be reading from this host memory buffer,
-                        // so assume Host write into it
-                        type = Resource::RemoteUSWC;
-                        remoteAlloc = true;
-                    }
+    // Check if owner is interop memory
+    if (owner.isInterop()) {
+        result = gpuMemory->createInterop(Memory::InteropDirectAccess);
+    }
+    else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
+        // Attempt to allocate from persistent heap
+        result = gpuMemory->create(Resource::Persistent);
+    }
+    else if (directAccess || (type == Resource::Remote)) {
+        // Check for system memory allocations
+        if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
+            || (settings().remoteAlloc_)) {
+            // Allocate remote memory if AHP allocation and context has just 1 device
+            if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
+                (owner.getContext().devices().size() == 1)) {
+                if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
+                    CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
+                    // GPU will be reading from this host memory buffer,
+                    // so assume Host write into it
+                    type = Resource::RemoteUSWC;
+                    remoteAlloc = true;
                }
-                // Make sure owner has a valid hostmem pointer and it's not COPY
-                if (!remoteAlloc && (owner.getHostMem() != NULL)) {
-                    Resource::PinnedParams params;
-                    params.owner_ = &owner;
-                    params.gpu_ =
-                        reinterpret_cast<VirtualGPU*>(owner.getVirtualDevice());
+            }
+            // Make sure owner has a valid hostmem pointer and it's not COPY
+            if (!remoteAlloc && (owner.getHostMem() != NULL)) {
+                Resource::PinnedParams params;
+                params.owner_ = &owner;
+                params.gpu_ =
+                    reinterpret_cast<VirtualGPU*>(owner.getVirtualDevice());

-                    params.hostMemRef_  = owner.getHostMemRef();
-                    params.size_        = owner.getHostMemRef()->size();
-                    if (0 == params.size_) {
-                        params.size_ = owner.getSize();
-                    }
-                    // Create memory object
-                    result = gpuMemory->create(Resource::Pinned, &params);
+                params.hostMemRef_  = owner.getHostMemRef();
+                params.size_        = owner.getHostMemRef()->size();
+                if (0 == params.size_) {
+                    params.size_ = owner.getSize();
+                }
+                // Create memory object
+                result = gpuMemory->create(Resource::Pinned, &params);

-                    // If direct access failed
-                    if (!result) {
-                        // and VM off, then force a heap allocation
-                        if (!heap()->isVirtual()) {
-                            // Internal pinning doesn't need a heap allocation
-                            if (!internalAlloc) {
-                                forceHeapAlloc = true;
-                            }
-                        }
-                        // Don't use cached allocation
-                        // if size is biger than max single alloc
-                        if (owner.getSize() > info().maxMemAllocSize_) {
-                            delete gpuMemory;
-                            return NULL;
-                        }
+                // If direct access failed
+                if (!result) {
+                    // Don't use cached allocation
+                    // if size is biger than max single alloc
+                    if (owner.getSize() > info().maxMemAllocSize_) {
+                        delete gpuMemory;
+                        return NULL;
                    }
                }
            }
        }
+    }

-        if (!result && !forceHeapAlloc &&
-            // Make sure it's not internal alloc
-            !internalAlloc) {
-            Resource::CreateParams  params;
-            params.owner_ = &owner;
-            params.gpu_ = static_cast<VirtualGPU*>(owner.getVirtualDevice());
+    if (!result &&
+        // Make sure it's not internal alloc
+        !internalAlloc) {
+        Resource::CreateParams  params;
+        params.owner_ = &owner;
+        params.gpu_ = static_cast<VirtualGPU*>(owner.getVirtualDevice());

-            // Create memory object
-            result = gpuMemory->create(type, &params);
+        // Create memory object
+        result = gpuMemory->create(type, &params);

-            // If allocation was successful
-            if (result) {
-                // Initialize if the memory is a pipe object
-                if (owner.getType() == CL_MEM_OBJECT_PIPE) {
-                    // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
-                    // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
-                    size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
-                    gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
+        // If allocation was successful
+        if (result) {
+            // Initialize if the memory is a pipe object
+            if (owner.getType() == CL_MEM_OBJECT_PIPE) {
+                // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
+                // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
+                size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
+                gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
+            }
+            // If memory has direct access from host, then get CPU address
+            if (gpuMemory->isHostMemDirectAccess() &&
+                (type != Resource::ExternalPhysical)) {
+                void* address = gpuMemory->map(NULL);
+                if (address != NULL) {
+                    // Copy saved memory
+                    if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
+                        memcpy(address, owner.getHostMem(), owner.getSize());
+                    }
+                    // It should be safe to change the host memory pointer,
+                    // because it's lock protected from the upper caller
+                    owner.setHostMem(address);
                }
-                // If memory has direct access from host, then get CPU address
-                if (gpuMemory->isHostMemDirectAccess() &&
-                   (type != Resource::ExternalPhysical)) {
-                    void* address = gpuMemory->map(NULL);
-                    if (address != NULL) {
-                        // Copy saved memory
-                        if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
-                            memcpy(address, owner.getHostMem(), owner.getSize());
-                        }
-                        // It should be safe to change the host memory pointer,
-                        // because it's lock protected from the upper caller
-                        owner.setHostMem(address);
-                    }
-                    else {
-                        result = false;
-                    }
-                }
-                // An optimization for CHP. Copy memory and destroy sysmem allocation
-                else if ((gpuMemory->memoryType() != Resource::Pinned) &&
-                         (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
-                         (owner.getContext().devices().size() == 1)) {
-                    amd::Coord3D    origin(0, 0, 0);
-                    amd::Coord3D    region(owner.getSize());
-                    static const bool Entire  = true;
-                    if (xferMgr().writeBuffer(owner.getHostMem(),
-                        *gpuMemory, origin, region, Entire)) {
-                        // Clear CHP memory
-                        owner.setHostMem(NULL);
-                    }
+                else {
+                    result = false;
+                }
+            }
+            // An optimization for CHP. Copy memory and destroy sysmem allocation
+            else if ((gpuMemory->memoryType() != Resource::Pinned) &&
+                        (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
+                        (owner.getContext().devices().size() == 1)) {
+                amd::Coord3D    origin(0, 0, 0);
+                amd::Coord3D    region(owner.getSize());
+                static const bool Entire  = true;
+                if (xferMgr().writeBuffer(owner.getHostMem(),
+                    *gpuMemory, origin, region, Entire)) {
+                    // Clear CHP memory
+                    owner.setHostMem(NULL);
                }
            }
-        }
-
-        if (!result && !forceHeapAlloc) {
-            delete gpuMemory;
-            return NULL;
        }
    }

    if (!result) {
-        assert(!heap()->isVirtual() && "Can't have static heap allocation with VM");
-        gpuMemory = createBufferFromHeap(owner);
+        delete gpuMemory;
+        return NULL;
    }

    return gpuMemory;
@@ -1703,10 +1449,10 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
        }
        // Create a view on the specified device
        gpuImage = (gpu::Memory*)createView(owner, *devParent);
-        if (heap()->isVirtual() && (NULL != gpuImage) && (gpuImage->owner() != NULL)) {
+        if ((NULL != gpuImage) && (gpuImage->owner() != NULL)) {
            gpuImage->owner()->setHostMem((address)(owner.parent()->getHostMem()) + gpuImage->owner()->getOrigin());
        }
-        return gpuImage ;
+        return gpuImage;
    }

    gpuImage = new gpu::Image(*this, owner,
@@ -1778,11 +1524,11 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
                 (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
                 (owner.getContext().devices().size() == 1)) {
            // Ignore copy for image1D_buffer, since it was already done for buffer
-            if (heap()->isVirtual() && imageBuffer) {
+            if (imageBuffer) {
                // Clear CHP memory
                owner.setHostMem(NULL);
            }
-            else if (!imageBuffer) {
+            else {
                amd::Coord3D    origin(0, 0, 0);
                static const bool Entire  = true;
                if (xferMgr().writeImage(owner.getHostMem(),
@@ -1809,25 +1555,12 @@ Device::createMemory(
    amd::Memory&    owner) const
 {
    bool directAccess   = false;
-    bool bufferAlloc    = false;
    gpu::Memory* memory = NULL;

-    if (heap()->isVirtual()) {
-        bufferAlloc = true;
-    }
-    //!@todo Remove this code when VM is always on.
-    // Use zero-copy transfers for sysmem allocations or persistent memory
-    else {
-        if (owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR |
-                                   CL_MEM_USE_HOST_PTR)) {
-            bufferAlloc = true;
-        }
-    }
-
    if (owner.asBuffer()) {
        directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemBuffer)
            ? true : false;
-        memory = createBuffer(owner, directAccess, bufferAlloc);
+        memory = createBuffer(owner, directAccess);
    }
    else if (owner.asImage()) {
        directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemImage)
@@ -1878,7 +1611,6 @@ bool
 Device::reallocMemory(amd::Memory& owner) const
 {
    bool directAccess   = false;
-    bool bufferAlloc    = heap()->isVirtual();

    // For now we have to serialize reallocation code
    amd::ScopedLock lk(*lockAsyncOps_);
@@ -1889,35 +1621,18 @@ Device::reallocMemory(amd::Memory& owner) const
    if (gpuMemory == NULL) {
        return false;
    }
-    if (gpuMemory->hb() != NULL) {
+
+    if (gpuMemory->pinOffset() == 0) {
        return true;
    }
-
-    if (bufferAlloc) {
-        if (gpuMemory->pinOffset() == 0) {
-            return true;
-        }
-        else if (NULL != owner.parent()) {
-            if (!reallocMemory(*owner.parent())) {
-                return false;
-            }
+    else if (NULL != owner.parent()) {
+        if (!reallocMemory(*owner.parent())) {
+            return false;
        }
    }

    if (owner.asBuffer()) {
-        // Disable remote allocation if no VM
-        if ((gpuMemory != NULL) &&
-            ((gpuMemory->memoryType() == Resource::Remote) ||
-             (gpuMemory->memoryType() == Resource::RemoteUSWC)) && !bufferAlloc) {
-            // Make sure we don't have a stale memory in VA cache before reallocation
-            // of system memory.
-            // \note: the app must unmap() memory before kernel launch
-            removeVACache(gpuMemory);
-            static const bool forceAllocHostMem = true;
-            static const bool forceCopy = true;
-            owner.allocHostMemory(owner.getHostMem(), forceAllocHostMem, forceCopy);
-        }
-        gpuMemory = createBuffer(owner, directAccess, bufferAlloc);
+        gpuMemory = createBuffer(owner, directAccess);
    }
    else if (owner.asImage()) {
        return true;
@@ -2113,24 +1828,18 @@ Device::globalFreeMemory(size_t* freeMemory) const
    if (!(const_cast<Device*>(this)->initializeHeapResources())) {
        return false;
    }
-    if (heap()->isVirtual()) {
-        gslMemInfo memInfo = {0};
-        gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);

-         // Fill free memory info
-        freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
-            memInfo.cardExtMemAvailableBytes) / Ki;
-        freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
-           memInfo.cardExtLargestFreeBlockBytes) / Ki;
-        if (settings().apuSystem_) {
-            freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
-            freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
-        }
-    }
-    else {
-        freeMemory[TotalFreeMemory] = static_cast<size_t>((info().globalMemSize_ -
-            static_cast<cl_ulong>(heapSize_) + heap()->freeSpace()) / Ki);
-        freeMemory[LargestFreeBlock] = freeMemory[TotalFreeMemory];
+    gslMemInfo memInfo = {0};
+    gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
+
+        // Fill free memory info
+    freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
+        memInfo.cardExtMemAvailableBytes) / Ki;
+    freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
+        memInfo.cardExtLargestFreeBlockBytes) / Ki;
+    if (settings().apuSystem_) {
+        freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
+        freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
    }

    return true;
@@ -125,8 +125,7 @@ protected:
        const CALdeviceattribs& calAttr,    //!< CAL device attributes info
        const gslMemInfo&  memInfo,         //!< GSL mem info
        size_t  maxTextureSize,             //!< Maximum texture size supported in HW
-        uint    numComputeRings,            //!< Number of compute rings
-        bool    isVirtualMode               //!< Device is in virtual mode
+        uint    numComputeRings             //!< Number of compute rings
        );
 };

@@ -184,6 +183,32 @@ private:
 class Device : public NullDevice, public CALGSLDevice
 {
 public:
+    class Heap : public amd::EmbeddedObject
+    {
+    public:
+        //! The size of a heap element in bytes
+        static const size_t ElementSize = 4;
+
+        //! The type of a heap element in bytes
+        static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
+
+        Heap(): resource_(NULL), baseAddress_(0) {}
+
+        bool create(
+            Device& device      //!< GPU device object
+            );
+
+        //! Gets the GPU resource associated with the global heap
+        const Resource& resource() const { return *resource_; }
+
+        //! Returns the base virtual address of the heap
+        uint64_t baseAddress() const { return baseAddress_; }
+
+    protected:
+        Resource*   resource_;      //!< GPU resource referencing the heap memory
+        uint64_t    baseAddress_;   //!< Virtual heap base address
+    };
+
    //! Locks any access to the virtual GPUs
    class ScopedLockVgpus : public amd::StackObject {
    public:
@@ -377,12 +402,6 @@ public:
    //! Destructor for the physical GPU device
    virtual ~Device();

-    //! Reallocates current global heap
-    bool reallocHeap(
-        size_t  size,           //!< requested size for reallocation
-        bool    remoteAlloc     //!< allocate the new heap in remote memory
-        );
-
    //! Instantiate a new virtual device
    device::VirtualDevice* createVirtualDevice(
        amd::CommandQueue*  queue = NULL
@@ -442,15 +461,10 @@ public:
        ) const;

    //! Gets the GPU resource associated with the global heap
-    const Resource& globalMem() const { return heap_->resource(); }
+    const Resource& globalMem() const { return heap_.resource(); }

    //! Gets the global heap object
-    const Heap* heap() const { return heap_; }
-
-    //! Allocates a heap block from the global heap
-    HeapBlock* allocHeapBlock(
-        size_t size             //!< The heap block size for allocation
-        ) const;
+    const Heap& heap() const { return heap_; }

    //! Gets the memory object for the dummy page
    amd::Memory* dummyPage() const { return dummyPage_; }
@@ -566,16 +580,10 @@ private:
    //! Sends the stall command to all queues
    bool stallQueues();

-    //! Buffer allocation from static heap (no VM mode only)
-    gpu::Memory* createBufferFromHeap(
-        amd::Memory&    owner           //!< Abstraction layer memory object
-        ) const;
-
    //! Buffer allocation
    gpu::Memory* createBuffer(
        amd::Memory&    owner,          //!< Abstraction layer memory object
-        bool            directAccess,   //!< Use direct host memory access
-        bool            bufferAlloc     //!< If TRUE, then don't use heap
+        bool            directAccess    //!< Use direct host memory access
        ) const;

    //! Image allocation
@@ -591,8 +599,7 @@ private:
        );

    amd::Context*   context_;       //!< A dummy context for internal allocations
-    size_t      heapSize_;          //!< The global heap size
-    Heap*       heap_;              //!< GPU heap manager
+    Heap            heap_;          //!< GPU global heap
    amd::Memory*    dummyPage_;     //!< A dummy page for NULL pointer

    amd::Monitor*   lockAsyncOps_;  //!< Lock to serialise all async ops on this device
@@ -1,536 +0,0 @@
-//! Implementation of GPU device memory management
-
-#include "top.hpp"
-#include "thread/thread.hpp"
-#include "thread/monitor.hpp"
-#include "device/device.hpp"
-#include "device/gpu/gpuheap.hpp"
-#include "device/gpu/gpudevice.hpp"
-
-#include <string>
-#include <fstream>
-#include <sstream>
-#include <iostream>
-
-//! Turn this on to enable sanity checks before and after every heap operation.
-#if DEBUG
-#define EXTRA_HEAP_CHECKS   1
-#endif // DEBUG
-
-namespace gpu {
-
-// The GPU heap. Very simple implementation for now.
-Heap::Heap(
-    Device& device)
-    : resource_(NULL)
-    , freeList_(NULL)
-    , busyList_(NULL)
-    , freeSize_(0)
-    , device_(device)
-    , granularity_(Heap::MinGranularity)
-    , lock_("GPU heap lock", true)
-    , virtualMode_(false)
-    , baseAddress_(0)
-{
-}
-
-size_t
-Heap::granularityB() const
-{
-    return granularity_ * Heap::ElementSize;
-}
-
-bool
-Heap::create(size_t totalSize, bool remoteAlloc)
-{
-    Resource::MemoryType    memType;
-    size_t  maxHeight = device_.info().image2DMaxHeight_;
-    size_t  sizeInElements;
-    size_t  npages;
-
-    freeSize_ = totalSize;
-
-    sizeInElements = (totalSize + Heap::ElementSize - 1) / Heap::ElementSize;
-
-    // Calculate best granularity given the size and device characteristics
-    npages = amd::alignUp(sizeInElements, granularity_) / granularity_;
-
-    // Create a new GPU resource
-    resource_ = new Resource(device_, sizeInElements, Heap::ElementType);
-
-    if (resource_ == NULL) {
-        return false;
-    }
-
-    memType = (remoteAlloc) ? Resource::RemoteUSWC : Resource::Local;
-
-    if (!resource_->create(memType, NULL, true)) {
-        return false;
-    }
-
-    // Set up initial free list
-    freeList_ = new HeapBlock(this, npages * granularityB(), 0, NULL, NULL);
-    if (freeList_ == NULL) {
-        return false;
-    }
-
-    guarantee(isSane());
-    return true;
-}
-
-Heap::~Heap()
-{
-    amd::ScopedLock k(lock_);
-
-    guarantee(isSane());
-
-    // Release all heap blocks
-    HeapBlock *walk, *next;
-    walk = busyList_;
-    while (walk) {
-        next = walk->next_;
-        walk->free();
-        walk = next;
-    }
-
-    walk = freeList_;
-    while (walk) {
-        next = walk->next_;
-        delete walk;
-        walk = next;
-    }
-
-    // Release resource
-    delete resource_;
-}
-
-HeapBlock*
-Heap::alloc(size_t size)
-{
-    amd::ScopedLock k(lock_);
-    HeapBlock* walk = freeList_;
-    HeapBlock* best = NULL;
-
-    guarantee(isSane());
-
-    // Round size
-    size = amd::alignUp(size, granularityB());
-
-    // Walk the free list looking for a suitable block (currently best-fit)
-    //! @todo:dgladdin: experiment with switching back to first-fit
-
-    while (walk) {
-        if ((walk->size_ > size) &&
-            (best == NULL || walk->size_ < best->size_)) {
-                best = walk;
-        }
-        else if (walk->size_ == size) {
-            // No need to split, just move to busy list
-            detachBlock(&freeList_, walk);
-            walk->inUse_ = true;
-            insertBlock(&busyList_, walk);
-            guarantee(isSane());
-            freeSize_ -= size;
-            return walk;
-        }
-    walk = walk->next_;
-    }
-
-    if (best != NULL) {
-        // Got one, but need to split it. Keep first part in free list,
-        // put second part into busy list.
-        HeapBlock *newblock = splitBlock(best, size);
-        newblock->inUse_ = true;
-        insertBlock(&busyList_, newblock);
-        guarantee(isSane());
-        freeSize_ -= size;
-        return newblock;
-    }
-
-    // No free block available
-    guarantee(isSane());
-    return NULL;
-}
-
-bool
-Heap::copyTo(Heap* heap)
-{
-    HeapBlock    *walk;
-
-    walk = busyList_;
-    while (walk) {
-        if (walk->getMemory() != NULL) {
-            HeapBlock* hb = heap->alloc(walk->size_);
-            if (hb == NULL) {
-                return false;
-            }
-            hb->setMemory(walk->getMemory());
-
-            walk->destroyViewsMemory();
-            if (!walk->getMemory()->reallocate(hb, &(heap->resource()))) {
-                return false;
-            }
-
-            if (!walk->reallocateViews(hb,
-                    static_cast<size_t>(hb->offset_ - walk->offset_))) {
-                return false;
-            }
-        }
-        walk = walk->next_;
-    }
-
-    return true;
-}
-
-void
-Heap::free(HeapBlock* blk)
-{
-    amd::ScopedLock k(lock_);
-    guarantee(isSane());
-    detachBlock(&busyList_, blk);
-    blk->inUse_ = false;
-    freeSize_ += blk->size_;
-    mergeBlock(&freeList_, blk);
-    guarantee(isSane());
-}
-
-void
-Heap::detachBlock(HeapBlock** list, HeapBlock* blk)
-{
-    // Sanity checks
-    guarantee(isSane());
-
-    if (*list == blk) {
-        *list = blk->next_;
-    }
-
-    if (blk->prev_) {
-       blk->prev_->next_ = blk->next_;
-    }
-    if (blk->next_) {
-        blk->next_->prev_ = blk->prev_;
-    }
-    // no heap sanity check as blk is now floating
-}
-
-void
-Heap::insertBlock(HeapBlock** head, HeapBlock* blk)
-{
-     if (NULL == *head) {
-        *head = blk;
-        blk->prev_ = NULL;
-        blk->next_ = NULL;
-        guarantee(isSane());
-        return;
-    }
-
-    // Find the place to insert it at
-    HeapBlock* walk = *head;
-    while (walk->next_ && walk->next_->offset_ < blk->offset_) {
-        walk = walk->next_;
-    }
-
-    // Insert it
-    if (walk == *head) {
-        if (walk->offset_ >= blk->offset_) {
-            *head = blk;
-            blk->prev_ = NULL;
-            blk->next_ = walk;
-            walk->prev_ = *head;
-            guarantee(isSane());
-            return;
-        }
-    }
-
-    blk->next_ = walk->next_;
-    blk->prev_ = walk;
-    if (walk->next_) {
-        walk->next_->prev_ = blk;
-    }
-    walk->next_ = blk;
-    guarantee(isSane());
-}
-
-HeapBlock*
-Heap::splitBlock(HeapBlock* blk, size_t tailsize)
-{
-    // Sanity checks
-
-    guarantee(isSane());
-    guarantee(blk->size_ > tailsize && "block too small to split as requested");
-    guarantee(!blk->inUse_ && "can't split in-use block");
-
-    // Create a new block
-
-    HeapBlock* nb = new HeapBlock(blk->owner_, tailsize,
-                                  blk->offset_ + blk->size_ - tailsize);
-
-    // Resize the old block
-
-    blk->size_ = blk->size_ - tailsize;
-    return nb;  // no heap sanity check here as the new block hasn't been plugged in yet
-}
-
-//! Join two blocks, transferring the size of the second into the first and deleting
-//! the second. Utility fn for mergeBlock()
-
-static void
-join2Blocks(HeapBlock* first, HeapBlock* second)
-{
-    // Sanity checks
-
-    guarantee(first->size_ > 0 && "first block invalid");
-    guarantee(!first->inUse_ && "can't join  an in-use block");
-    guarantee(second->size_ > 0 && "second block invalid");
-    guarantee(first->offset_ + first->size_ == second->offset_);
-
-    // Do the join
-    first->size_ = first->size_ + second->size_;
-    first->next_ = second->next_;
-    if (second->next_) {
-        second->next_->prev_ = first;
-    }
-    delete second;
-}
-
-//! Insert a block into a list, merging it with adjacent blocks if possible. Must be called
-//! under a lock, cannot be used on in-use blocks or blocks with an associated resource alias.
-
-void
-Heap::mergeBlock(HeapBlock** head, HeapBlock* blk)
-{
-    insertBlock(head, blk);
-
-    // Merge with successor if possible
-    if ((blk->next_ != NULL) &&
-        (blk->offset_ + blk->size_ == blk->next_->offset_)) {
-        join2Blocks(blk, blk->next_);
-    }
-
-    // Merge with predecessor if possible
-    if ((blk->prev_ != NULL) &&
-        (blk->prev_->offset_ + blk->prev_->size_ == blk->offset_)) {
-        join2Blocks(blk->prev_, blk);
-    }
-
-    guarantee(isSane());
-}
-
-//! Sanity check for both types of block (helper function for Heap::isSane())
-
-static bool
-isBlockSane(HeapBlock* b)
-{
-    return (b->owner_ != NULL
-        && (b->next_ == NULL || b->next_->prev_ == b)
-        && (b->prev_ == NULL || b->prev_->next_ == b));
-}
-
-//! Sanity check for an individual free block (helper function for Heap::isSane())
-static bool
-isFreeBlockSane(HeapBlock* b)
-{
-    if (isBlockSane(b) && !b->inUse_) {
-        return true;
-    } else {
-        return false;
-    }
-}
-
-//! Sanity check for an individual busy block (helper function for Heap::isSane())
-static bool
-isBusyBlockSane(HeapBlock* b)
-{
-    if (isBlockSane(b) && b->inUse_) {
-        return true;
-    } else {
-        return false;
-    }
-}
-
-//! Sanity check for the heap.
-
-bool
-Heap::isSane() const
-{
-    // If we got this far, everything is (probably) OK
-#if EXTRA_HEAP_CHECKS
-    HeapBlock* walkFree = freeList_;    // Free list position
-    HeapBlock* walkBusy = busyList_;    // Busy list position
-    size_t offset = 0;                  // Current offset
-
-    // We can have zero lists if Heap allocation fails
-    if (walkFree == NULL && walkBusy == NULL) {
-        return true;
-    }
-
-    // Walk both lists in parallel
-    while (walkFree != NULL || walkBusy != NULL) {
-        if (walkFree != NULL && walkFree->offset_ == offset) {
-            if (!isFreeBlockSane(walkFree)) {
-                return false;
-            }
-            offset += walkFree->size_;
-            walkFree = walkFree->next_;
-        }
-        else if (walkBusy != NULL && walkBusy->offset_ == offset) {
-            if (!isBusyBlockSane(walkBusy)) {
-                return false;
-            }
-            offset += walkBusy->size_;
-            walkBusy = walkBusy->next_;
-        }
-        else {
-            return false;
-        }
-    }
-
-#endif // EXTRA_HEAP_CHECKS
-    return true;
-}
-
-void
-HeapBlock::destroyViewsMemory()
-{
-    if ((parent_ != NULL) && (0 == views_.size())) {
-        memory_->free();
-    }
-    else if (views_.size() != 0) {
-        std::list<HeapBlock*>::const_iterator it;
-        for (it = views_.begin(); it != views_.end(); ++it) {
-            (*it)->destroyViewsMemory();
-        }
-    }
-}
-
-bool
-HeapBlock::reallocateViews(HeapBlock* parent, size_t shift)
-{
-    if (views_.size() != 0) {
-        std::list<HeapBlock*>::const_iterator it;
-
-        // Loop through all views and reallocate them
-        for (it = views_.begin(); it != views_.end(); ++it) {
-            // Get the view HeapBlock
-            HeapBlock* hb = (*it);
-
-            // Readjust the offset
-            hb->offset_ += shift;
-            // Add to the list if we have a new parent
-            if (parent != this) {
-                parent->addView(hb);
-            }
-
-            // Reallocate memory
-            hb->memory_->reallocate(hb, parent->getMemory());
-
-            // Process a view on view if available
-            if (!hb->reallocateViews(hb, shift)) {
-                return false;
-            }
-        }
-
-        // Destroy old list
-        if (parent != this) {
-            views_.clear();
-        }
-    }
-    return true;
-}
-
-//! Destructor. Frees the block if in use and does some final sanity checks.
-HeapBlock::~HeapBlock()
-{
-    if (NULL != owner_) {
-        if (inUse_) {
-            owner_->free(this);
-        }
-    }
-    else {
-        // View destruction
-        if (parent_ != NULL) {
-            assert(((parent_->getMemory() != NULL) && (parent_->getMemory()->owner() != NULL)));
-            amd::ScopedLock lock(parent_->getMemory()->owner()->lockMemoryOps());
-            parent_->removeView(this);
-        }
-    }
-    guarantee(size_ > 0 && "destructor called for zero-size heap block (destructor called twice?)");
-    size_ = 0; // Mark as invalid
-
-    if (views_.size() != 0) {
-        LogError("Can't destroy a resource if we still have views!");
-    }
-}
-
-void
-HeapBlock::free()
-{
-    if (NULL != owner_) {
-        owner_->free(this);
-    }
-    else {
-        // It's a view. Destroy the object
-        delete this;
-    }
-}
-
-VirtualHeap::VirtualHeap(
-    Device& device)
-    : Heap(device)
-{
-    virtualMode_ = true;
-}
-
-bool
-VirtualHeap::create(
-    size_t  totalSize,
-    bool    remoteAlloc)
-{
-    // Create a new GPU resource
-    resource_ = new Resource(device_, 0, Heap::ElementType);
-    if (resource_ == NULL) {
-        return false;
-    }
-
-    if (!resource_->create(Resource::Heap)) {
-        return false;
-    }
-
-    if (!device_.settings().hsail_) {
-        baseAddress_ = resource_->gslResource()->getSurfaceAddress();
-    }
-    return true;
-}
-
-VirtualHeap::~VirtualHeap()
-{
-}
-
-HeapBlock*
-VirtualHeap::alloc(size_t size)
-{
-    assert(false && "Dead branch!");
-    return NULL;
-}
-
-void
-VirtualHeap::free(HeapBlock* blk)
-{
-    assert(false && "Dead branch!");
-}
-
-bool
-VirtualHeap::copyTo(Heap* heap)
-{
-    assert(false && "Dead branch!");
-    return false;
-}
-
-bool
-VirtualHeap::isSane(void) const
-{
-    assert(false && "Dead branch!");
-    return true;
-}
-
-} // namespace gpu
@@ -1,225 +0,0 @@
-//! Declarations for GPU memory management
-
-#ifndef GPUHEAP_HPP_
-#define GPUHEAP_HPP_
-
-#include "top.hpp"
-#include "thread/atomic.hpp"
-#include "device/gpu/gpudefs.hpp"
-
-/*! \addtogroup GPU
- *  @{
- */
-
-//! GPU Device Implementation
-
-namespace gpu {
-
-class Device;
-class Heap;
-class Resource;
-class Memory;
-class VirtualGPU;
-
-//! @todo:dgladdin: The heap list should be singly-linked
-
-//! \brief A block on the GPU heap.
-//!
-//! Note that no code outside of the gpumemory.hpp/.cpp pair should touch this
-//! class directly as it is not thread-safe. In general, this class should be
-//! pretty much a struct and contain as little functionality as possible - just
-//!  a constructor, destructor.
-//!
-//! Any other methods - in particular, anything that talks to CAL - should be no
-//! more than proxies for functionality implemented in Heap, as Heap is aware
-//! of the lock state.
-
-class HeapBlock : public amd::HeapObject
-{
-public:
-    //! Constructor
-    HeapBlock(
-        Heap* owner = NULL,
-        size_t size = 0,
-        size_t offset = 0,
-        HeapBlock* next=NULL,
-        HeapBlock* prev=NULL)
-        : owner_(owner)
-        , size_(size)
-        , offset_(offset)
-        , next_(next)
-        , prev_(prev)
-        , inUse_(false)
-        , parent_(NULL)
-        , memory_(NULL)
-        {}
-
-    //! Destructor does some sanity checks.
-    ~HeapBlock();
-
-    //! Frees a heap block, returning its memory to the owning heap (proxy)
-    void free();
-
-    //! Sets the GPU memory object associated with the heap block
-    void setMemory(Memory* memory) { memory_ = memory; }
-
-    //! Gets the GPU memory object associated with the heap block
-    Memory* getMemory() const { return memory_; }
-
-    //! Adds a heapblock view to the list of views
-    void addView(HeapBlock* hb)
-        { views_.push_back(hb);  hb->parent_ = this; }
-
-    //! Removes a heapblock view from the list of views
-    void removeView(HeapBlock* hb) { views_.remove(hb); }
-
-    //! Destroys all views
-    void destroyViewsMemory();
-
-    //! Creates all new views
-    bool reallocateViews(
-        HeapBlock*  parent,     //!< Parent heap block
-        size_t      shift       //!< The new HeapBlock shift
-        );
-
-    //! Gets the offset
-    size_t offset() const { return offset_; }
-
-    Heap*       owner_;     //!< Heap that owns this block
-    size_t      size_;      //!< Size of the block in bytes
-    size_t      offset_;    //!< Offset of this block in the heap
-    HeapBlock*  next_;      //!< Next block on the list, or NULL
-    HeapBlock*  prev_;      //!< Previous block on the list, or NULL
-    bool        inUse_;     //!< true if the block is in use
-    HeapBlock*  parent_;    //!< The parent heap block for a view
-
-private:
-    //! Disable copy constructor
-    HeapBlock(const HeapBlock&);
-
-    //! Disable assignment
-    HeapBlock& operator=(const HeapBlock&);
-
-    Memory*     memory_;    //!< Memory object associated with the heap block
-    std::list<HeapBlock*>   views_; //!< The list of all allocated views
-};
-
-class Heap : public amd::HeapObject
-{
-public:
-    //! Minimal supported CAL granularity = 256 bytes / ElementSize
-    static const size_t MinGranularity = 64;
-
-    //! The size of a heap element in bytes
-    static const size_t ElementSize = 4;
-
-    //! The type of a heap element in bytes
-    static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
-
-    Heap(
-        Device& device      //!< GPU device object
-        );
-
-    virtual bool create(
-        size_t  totalSize,  //!< total size of the allocated heap (bytes)
-        bool    remoteAlloc //!< allocate the heap in remote memory
-        );
-
-    //! Heap destructor
-    virtual ~Heap();
-
-    /*!
-     * \brief Allocates memory from a heap (best-fit).
-     * We round up to 4k granularity for alignment.
-     *
-     * \return A pointer to allocated heap block object.
-     */
-    virtual HeapBlock* alloc(
-        size_t size     //! The allocation size
-        );
-
-    //! Release memory back to a heap.
-    virtual void free(HeapBlock* blk);
-
-    //! Copies this heap to another
-    virtual bool copyTo(Heap* heap);
-
-    //! Gets the GPU resource associated with the global heap
-    const Resource& resource() const { return *resource_; }
-
-    //! Read the page size (bytes)
-    size_t granularityB() const;
-
-    //! Read the total free space (bytes)
-    size_t freeSpace() const { return freeSize_; }
-
-    virtual bool isSane(void) const;    //!< Checks heap sanity
-
-    //! Returns true if we have a virtual heap
-    bool isVirtual() const { return virtualMode_; }
-
-    //! Returns the base virtual address of the heap
-    uint64_t baseAddress() const { return baseAddress_; }
-
-private:
-    //! Insert a block into a list. Must be called under a lock.
-    void insertBlock(HeapBlock** list, HeapBlock* node);
-
-    //! Merge a block into a list. Must be called under a lock.
-    void mergeBlock(HeapBlock** list, HeapBlock* node);
-
-    //! Remove a block from a list. Must be called under a lock.
-    void detachBlock(HeapBlock** list, HeapBlock* node);
-
-    //! Split a block into two pieces
-    HeapBlock* splitBlock(HeapBlock* node, size_t size);
-
-protected:
-    Resource*   resource_;      //!< GPU resource referencing the heap memory
-    HeapBlock*  freeList_;      //!< Head block for free list
-    HeapBlock*  busyList_;      //!< Head block for busy list
-    size_t      freeSize_;      //!< total free size of the heap
-    Device&     device_;        //!< Device that owns this heap
-    size_t      granularity_;   //!< Size of an allocation page
-    amd::Monitor    lock_;      //!< Lock to serialise heap accesses
-    bool        virtualMode_;   //!< Virtual mode
-    uint64_t    baseAddress_;   //!< Virtual heap base address
-};
-
-class VirtualHeap : public Heap
-{
-public:
-    VirtualHeap(
-        Device& device      //!< GPU device object
-        );
-
-    virtual bool create(
-        size_t  totalSize,  //!< total size of the allocated heap (bytes)
-        bool    remoteAlloc //!< allocate the heap in remote memory
-        );
-
-    //! Heap destructor
-    virtual ~VirtualHeap();
-
-    /*!
-     * \brief Allocates memory from a heap (best-fit).
-     * We round up to 4k granularity for alignment.
-     *
-     * \return A pointer to allocated heap block object.
-     */
-    virtual HeapBlock* alloc(
-        size_t size     //! The allocation size
-        );
-
-    //! Release memory back to a heap.
-    virtual void free(HeapBlock* blk);
-
-    //! Copies this heap to another
-    virtual bool copyTo(Heap* heap);
-
-    virtual bool isSane(void) const;    //!< Checks heap sanity
-};
-
-} // namespace gpu
-
-#endif // GPUHEAP_HPP_
@@ -824,17 +824,6 @@ Kernel::create(
    // Initialize the kernel parameters
    bool    result = initParameters();

-    if (!dev().heap()->isVirtual()) {
-        amd::option::Options *options = nullProg().getCompilerOptions();
-        // @todo Remove this. This is a hack for no VM mode
-        if (!options->oVariables->EnableDumpKernel) {
-            if (!name().compare(BlitName[KernelBlitManager::BlitCopyImageToBuffer]) ||
-                !name().compare(BlitName[KernelBlitManager::BlitCopyBufferToImage])) {
-                blitKernelHack_ = true;
-            }
-        }
-    }
-
    // Wave limiter needs to be initialized after kernel metadata is parsed
    // Since it depends on it.
    waveLimiter_.enable();
@@ -855,7 +844,6 @@ Kernel::Kernel(
    const Program&      prog,
    const InitData*     initData)
    : NullKernel(name, gpuDev, prog)
-    , blitKernelHack_(false)
    , waveLimiter_(this)
 {
    hwPrivateSize_ = 0;
@@ -1603,10 +1591,6 @@ Kernel::debug(VirtualGPU& gpu) const
 {
    std::fstream    stubWrite;
    address         src = NULL;
-    if (!dev().heap()->isVirtual()) {
-        src  = reinterpret_cast<address>
-            (const_cast<Resource&>(dev().globalMem()).map(&gpu));
-    }

    std::cerr << "--- " << name_ << " ---" << std::endl;
    for (uint i = 0; i < arguments_.size(); ++i) {
@@ -1689,9 +1673,6 @@ Kernel::debug(VirtualGPU& gpu) const
            stubWrite.close();
        }
    }
-    if (!dev().heap()->isVirtual()) {
-        const_cast<Resource&>(dev().globalMem()).unmap(&gpu);
-    }
 }

 bool
@@ -1824,18 +1805,10 @@ Kernel::setArgument(
                    type = ArgumentBuffer;
                }
                else {
-                    if (blitKernelHack_) {
-                        // Bind global buffer to UAV this buffer is bound to
-                        if (!bindResource(gpu, *gpuMem, 0, GlobalBuffer, uavRaw_)) {
-                            return false;
-                        }
-                    }
-                    else {
-                        // Bind global buffer to UAV this buffer is bound to
-                        if (!bindResource(gpu, dev().globalMem(), 0,
-                            GlobalBuffer, uavRaw_)) {
-                            return false;
-                        }
+                    // Bind global buffer to UAV this buffer is bound to
+                    if (!bindResource(gpu, dev().globalMem(), 0,
+                        GlobalBuffer, uavRaw_)) {
+                        return false;
                    }
                }

@@ -1848,11 +1821,9 @@ Kernel::setArgument(

                // Update offset only if we bind HeapBuffer or
                // it's global address space in UAV setup on SI+
-                if (!blitKernelHack_) {
-                    offset += gpuMem->hbOffset();
-                    if (!forceZeroOffset) {
-                        assert((offset != 0) && "Offset 0 with a real allocation!");
-                    }
+                offset += gpuMem->hbOffset();
+                if (!forceZeroOffset) {
+                    assert((offset != 0) && "Offset 0 with a real allocation!");
                }
                gpu.addVmMemory(gpuMem);
            }
@@ -2253,10 +2224,9 @@ Kernel::bindResource(

    gslMemObject gslMem = NULL;
    // Use global address space on SI+ for UAV setup
-    if (((type == ArgumentBuffer) || (type == ArgumentCbID) ||
-         (type == ArgumentUavID) || (type == ArgumentPrintfID)) &&
-        !blitKernelHack_) {
-        gslMem = dev().heap()->resource().gslResource();
+    if ((type == ArgumentBuffer) || (type == ArgumentCbID) ||
+        (type == ArgumentUavID) || (type == ArgumentPrintfID)) {
+        gslMem = dev().heap().resource().gslResource();
    }
    else {
        gslMem = resource.gslResource();
@@ -2803,7 +2773,7 @@ NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
        case KernelArg::PointerPrivate:
            // Check if can't use a dedicated UAV,
            // so realloc memory in the heap
-            arg->memory_.realloc_ = isRealloc();
+            arg->memory_.realloc_ = false;
            arg->memory_.uavBuf_ = true;
            break;
        case KernelArg::PointerHwConst:
@@ -450,9 +450,6 @@ public:
    uint  instructionCnt() const { return instructionCnt_; }

 protected:
-    //! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
-    virtual bool isRealloc() const { return false; }
-
    /*! \brief Parses the metadata structure for the kernel,
     *   provided by the OpenCL compiler
     *
@@ -673,9 +670,6 @@ protected:
     */
    bool initConstBuffers();

-    //! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
-    virtual bool isRealloc() const { return !dev().heap()->isVirtual(); }
-
 private:
    //! Disable copy constructor
    Kernel(const Kernel&);
@@ -771,9 +765,6 @@ private:
    uint    hwPrivateSize_;     //!< initial HW private size
    uint    hwLocalSize_;       //!< initial HW local size

-    //! @todo remove the blit kernel hack
-    bool    blitKernelHack_;    //!< No VM hack for kernel blit
-
    WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
 };

@@ -30,39 +30,24 @@ namespace gpu {
 Memory::Memory(
    const Device&   gpuDev,
    amd::Memory&    owner,
-    HeapBlock*      hb,
    size_t          size)
    : device::Memory(owner)
-    , Resource(gpuDev, ((hb) ? hb->size_ : size) / Heap::ElementSize, Heap::ElementType)
-    , hb_(hb)
+    , Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType)
 {
    init();

-    if (NULL != hb_) hb_->setMemory(this);
-
    if (owner.parent() != NULL) {
        flags_ |= SubMemoryObject;
    }
 }

-Memory::Memory(
-    const Device&   gpuDev,
-    HeapBlock&      hb)
-    : device::Memory(hb.size_)
-    , Resource(gpuDev, hb.size_ / Heap::ElementSize, Heap::ElementType)
-    , hb_(&hb)
-{
-    init();
-    hb.setMemory(this);
-}
-
 Memory::Memory(
    const Device&   gpuDev,
    size_t          size)
    : device::Memory(size)
    , Resource(gpuDev,
-        amd::alignUp(size, Heap::ElementSize) / Heap::ElementSize, Heap::ElementType)
-    , hb_(NULL)
+        amd::alignUp(size, Device::Heap::ElementSize) /
+            Device::Heap::ElementSize, Device::Heap::ElementType)
 {
    init();
 }
@@ -75,7 +60,6 @@ Memory::Memory(
    )
    : device::Memory(owner)
    , Resource(gpuDev, width, format)
-    , hb_(NULL)
 {
    init();

@@ -92,7 +76,6 @@ Memory::Memory(
    )
    : device::Memory(size)
    , Resource(gpuDev, width, format)
-    , hb_(NULL)
 {
    init();
 }
@@ -110,7 +93,6 @@ Memory::Memory(
    )
    : device::Memory(owner)
    , Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
-    , hb_(NULL)
 {
    init();

@@ -132,7 +114,6 @@ Memory::Memory(
    )
    : device::Memory(size)
    , Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
-    , hb_(NULL)
 {
    init();
 }
@@ -197,14 +178,9 @@ Memory::create(
            break;
        case Resource::Remote:
        case Resource::RemoteUSWC:
-            // @todo Enable unconditional optimization for remote memory
-            if ((owner() != NULL &&
-                owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
-                (hb() == NULL)) {
-                if (!cal()->tiled_) {
-                    // Marks memory object for direct GPU access to the host memory
-                    flags_ |= HostMemoryDirectAccess;
-                }
+            if (!cal()->tiled_) {
+                // Marks memory object for direct GPU access to the host memory
+                flags_ |= HostMemoryDirectAccess;
            }
            break;
        case Resource::View: {
@@ -481,8 +457,8 @@ Memory::createInterop(InteropType type)
    else {
        // Allocate Resource object for interop as buffer
        interopMemory_ = new Memory(dev(), size(),
-            amd::alignUp(size(), Heap::ElementSize) / Heap::ElementSize,
-            Heap::ElementType);
+            amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
+            Device::Heap::ElementType);

        // Create the interop object in CAL
        if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
@@ -502,14 +478,6 @@ Memory::~Memory()
    // Clean VA cache
    dev().removeVACache(this);

-    // Release associated heap block, if any
-    if (hb_) {
-        // Protect heap block from simultaneous release with realloc
-        amd::ScopedLock k(dev().lockAsyncOps());
-        hb_->setMemory(NULL);
-        hb_->free();
-    }
-
    delete interopMemory_;

    // Release associated map target, if any
@@ -531,35 +499,6 @@ Memory::~Memory()
    }
 }

-bool
-Memory::reallocate(HeapBlock* hb, const Resource* parent)
-{
-    Resource::ViewParams params;
-    params.size_        = hb->size_;
-    params.resource_    = parent;
-    params.memory_      = NULL;
-
-    // Check if it's a view reallocation
-    if (NULL != hb->parent_) {
-        // The offset inside the view is unchanged
-        params.offset_ = Resource::offset();
-
-        // Create a new view
-        if (Resource::create(Resource::View, &params)) {
-            hb_ = hb;
-            return true;
-        }
-    }
-    else {
-        params.offset_ = hb->offset_;
-        if (Resource::reallocate(&params)) {
-            hb_ = hb;
-            return true;
-        }
-    }
-    return false;
-}
-
 void
 Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
 {
@@ -814,33 +753,13 @@ Memory::createBufferView(amd::Memory& subBufferOwner)
 {
    gpu::Memory*            viewMemory;
    Resource::ViewParams    params;
-    HeapBlock*              hb = NULL;

    size_t  offset = subBufferOwner.getOrigin();
    size_t  size = subBufferOwner.getSize();

-    if (!dev().heap()->isVirtual()) {
-        if (NULL == hb_) {
-            LogError("HeapBlock must be initialized!");
-            return NULL;
-        }
-
-        hb = new HeapBlock(NULL, size, offset + hb_->offset());
-        if (hb == NULL) {
-            LogError("We don't have enough video memory!");
-            return NULL;
-        }
-        amd::ScopedLock lock(owner()->lockMemoryOps());
-        hb_->addView(hb);
-    }
-
    // Create a memory object
-    viewMemory = new gpu::Memory(dev(), subBufferOwner, hb, size);
+    viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
    if (NULL == viewMemory) {
-        if (hb != NULL) {
-            hb->setMemory(NULL);
-            hb->free();
-        }
        return NULL;
    }

@@ -8,7 +8,6 @@
 #include "top.hpp"
 #include "thread/atomic.hpp"
 #include "device/gpu/gpuresource.hpp"
-#include "device/gpu/gpuheap.hpp"
 #include "device/gpu/gpudevice.hpp"
 #include <map>

@@ -27,7 +26,6 @@ class Heap;
 class Resource;
 class Memory;
 class VirtualGPU;
-class HeapBlock;

 //! GPU memory object.
 //  Wrapper that can contain a heap block or an interop buffer/image.
@@ -44,14 +42,8 @@ public:
    Memory(
        const Device&   gpuDev,
        amd::Memory&    owner,
-        HeapBlock*      hb,
        size_t          size = 0);

-    //! Constructor (nonfat version for local scratch mem use)
-    Memory(
-        const Device&   gpuDev,
-        HeapBlock&      hb);
-
    //! Constructor (nonfat version for local scratch mem use without heap block)
    Memory(
        const Device&   gpuDev,
@@ -102,12 +94,6 @@ public:
    //! Default destructor
    ~Memory();

-    //! Reallocates the memory object in the new heap block
-    bool reallocate(
-        HeapBlock*      hb,     //! The new heap block for this memory object
-        const Resource* parent  //! Parent resource for view reallocaiton
-        );
-
    //! Creates the interop memory
    bool createInterop(
        InteropType     type    //!< The interop type
@@ -189,9 +175,6 @@ public:
    //! Sets interop type for this memory object
    void setInteropType(InteropType type) { interopType_ = type; }

-    //! Returns the HeapBlock pointer
-    const HeapBlock* hb() const { return hb_; }
-
    //! Set the owner
    void setOwner(amd::Memory* owner) { owner_ = owner; }

@@ -229,7 +212,6 @@ private:
    InteropType interopType_;   //!< Interop type
    Memory*     interopMemory_; //!< interop memory

-    HeapBlock*  hb_;            //!< Heap Block, or NULL if not in-heap memory
    Memory*     pinnedMemory_;  //!< Memory used as pinned system memory
    const Memory*   parent_;        //!< Parent memory object
 };
@@ -322,7 +322,7 @@ static uint32_t GetHSAILImageOrderType(gslChannelOrder chOrder, cmSurfFmt format
 }

 bool
-Resource::create(MemoryType memType, CreateParams* params, bool heap)
+Resource::create(MemoryType memType, CreateParams* params)
 {
    bool    calRes = false;
    gslMemObject  gslResource = 0;
@@ -382,7 +382,7 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
    }

    // Force remote allocation if it was requested in the settings
-    if (dev().settings().remoteAlloc_ && !heap &&
+    if (dev().settings().remoteAlloc_ &&
        ((memoryType() == Local) ||
         (memoryType() == Persistent))) {
        if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
@@ -515,7 +515,7 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
                if (memoryType() == Local) {
                    cal_.type_ = Persistent;
                }
-                else if (!heap && (memoryType() == Persistent)) {
+                else if (memoryType() == Persistent) {
                    cal_.type_ = RemoteUSWC;
                }
                // Remote cacheable to uncacheable
@@ -553,11 +553,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
                reinterpret_cast<const char*>(address_) - tmpHost);

            pinOffset_ = hostMemOffset & 0xff;
-            //!@note GSL has a problem with the defines for flags and
-            //! view creation, so check the restriction here
-            if (!dev().heap()->isVirtual() && (pinOffset_ != 0)) {
-                return false;
-            }

            pinAddress = tmpHost;
            // Align width to avoid GSL useless assert with a view
@@ -629,20 +624,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
                calRes = true;
            }

-            // Check if it's a heap allocation
-            if (!dev().heap()->isVirtual()) {
-                if (viewOwner_ == &dev().globalMem()) {
-                    // Allocation directly from the heap
-                    hbOffset_   = static_cast<uint64_t>(view->offset_);
-                }
-                else {
-                    // Allocation from another memory object
-                    hbOffset_   = static_cast<uint64_t>(view->offset_) +
-                        viewOwner_->hbOffset();
-                }
-                hbSize_ = view->size_;
-            }
-
            if (viewOwner_->isMemoryType(Pinned)) {
                address_ = viewOwner_->data() + offset();
            }
@@ -952,11 +933,9 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
    cal_.tiled_ = (GSL_MOA_TILING_LINEAR != tiling) &&
        (GSL_MOA_TILING_LINEAR_GENERAL != tiling);

-    // Get the heap block offset if it's a virtual heap
-    if (dev().heap()->isVirtual()) {
-        hbOffset_ = gslResource->getSurfaceAddress() -
-            dev().heap()->baseAddress();
-    }
+    // Get the heap block offset
+    hbOffset_ = gslResource->getSurfaceAddress() -
+        dev().heap().baseAddress();
    hbSize_ = static_cast<uint64_t>(gslResource->getSurfaceSize());

    if (!dev().settings().use64BitPtr_ &&
@@ -1036,32 +1015,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
    return true;
 }

-bool
-Resource::reallocate(CreateParams* params)
-{
-    GslResourceReference*   old;
-    GslResourceReference*   active;
-
-    old = gslRef_;
-    if (!create(memoryType(), params)) {
-        gslRef_ = old;
-        return false;
-    }
-    // Get the new active resource
-    active = gslRef_;
-    gslRef_ = old;
-
-    dev().resCopy(old->gslResource(),
-        active->gslResource(), CAL_MEMCOPY_SYNC);
-
-    // Free all old resources
-    assert(renames_.size() == 0);
-    free();
-
-    gslRef_ = active;
-    return true;
-}
-
 void
 Resource::free()
 {
@@ -1813,10 +1766,8 @@ Resource::setActiveRename(VirtualGPU& gpu, GslResourceReference* rename)
    gslRef_  = rename;
    address_ = rename->cpuAddress_;

-    if (dev().heap()->isVirtual()) {
-        hbOffset_ = rename->gslResource()->getSurfaceAddress() -
-            dev().heap()->baseAddress();
-    }
+    hbOffset_ = rename->gslResource()->getSurfaceAddress() -
+        dev().heap().baseAddress();
 }

 bool
@@ -209,15 +209,6 @@ public:
     */
    virtual bool create(
        MemoryType  memType,        //!< memory type
-        CreateParams*   params = 0, //!< special parameters for resource allocation
-        bool        heap = false    //!< Global heap allocation for not VM mode
-        );
-
-    /*! \brief Reallocates a CAL object, associated with the resource
-     *
-     *  \return True if we succesfully reallocated a CAL resource
-     */
-    bool reallocate(
        CreateParams*   params = 0  //!< special parameters for resource allocation
        );

@@ -50,10 +50,6 @@ Settings::Settings()
    maxRenames_         = 16;
    maxRenameSize_      = 4 * Mi;

-    // The global heap settings
-    heapSize_           = GPU_INITIAL_HEAP_SIZE * Mi;
-    heapSizeGrowth_     = GPU_HEAP_GROWTH_INCREMENT * Mi;
-
    imageSupport_       = false;
    hwLDSSize_          = 0;

@@ -82,8 +82,6 @@ public:
    size_t  stagedXferSize_;    //!< Staged buffer size
    uint    maxRenames_;        //!< Maximum number of possible renames
    uint    maxRenameSize_;     //!< Maximum size for all renames
-    size_t  heapSize_;          //!< The global heap size
-    size_t  heapSizeGrowth_;    //!< The global heap size growth
    uint    hwLDSSize_;         //!< HW local data store size
    uint    maxWorkGroupSize_;  //!< Requested workgroup size for this device
    uint    hostMemDirectAccess_;   //!< Enables direct access to the host memory
@@ -517,10 +517,6 @@ VirtualGPU::create(
            // Fall through ...
        case Settings::BlitEngineCAL:
        case Settings::BlitEngineKernel:
-            if (!dev().heap()->isVirtual()) {
-                blitSetup.disableReadBufferRect_    = true;
-                blitSetup.disableWriteBufferRect_   = true;
-            }
            // use host blit for HW debug
            if (dev().settings().enableHwDebug_) {
                blitSetup.disableCopyImageToBuffer_   = true;
@@ -3166,23 +3162,21 @@ VirtualGPU::profilingCollectResults(CommandBatch* cb, const amd::Event* waitingE
 bool
 VirtualGPU::addVmMemory(const Resource* resource)
 {
-    if (dev().heap()->isVirtual()) {
-        uint*    cnt = &cal_.memCount_;
-        (*cnt)++;
-        // Reallocate array if kernel uses more memory objects
-        if (numVmMems_ < *cnt) {
-            gslMemObject* tmp;
-            tmp = new gslMemObject [*cnt];
-            if (tmp == NULL) {
-                return false;
-            }
-            memcpy(tmp, vmMems_, sizeof(gslMemObject) * numVmMems_);
-            delete [] vmMems_;
-            vmMems_ = tmp;
-            numVmMems_ = *cnt;
+    uint*    cnt = &cal_.memCount_;
+    (*cnt)++;
+    // Reallocate array if kernel uses more memory objects
+    if (numVmMems_ < *cnt) {
+        gslMemObject* tmp;
+        tmp = new gslMemObject [*cnt];
+        if (tmp == NULL) {
+            return false;
        }
-        vmMems_[*cnt - 1] = resource->gslResource();
+        memcpy(tmp, vmMems_, sizeof(gslMemObject) * numVmMems_);
+        delete [] vmMems_;
+        vmMems_ = tmp;
+        numVmMems_ = *cnt;
    }
+    vmMems_[*cnt - 1] = resource->gslResource();

    return true;
 }
@@ -496,7 +496,7 @@ CALGSLDevice::SetupContext(int32 &asic_id)
    getAttribs_int(temp_cs);
    temp_cs->getMemInfo(&m_memInfo, GSL_MEMINFO_BASIC);

-    m_vmMode = temp_cs->getVMMode();
+    assert(temp_cs->getVMMode());

    m_adp->deleteContext(temp_cs);

@@ -1313,38 +1313,6 @@ CALGSLDevice::PerformDMACopy(gslMemObject srcMem, gslMemObject destMem, cmSurfFm
    return true;
 }

-void
-CALGSLDevice::resCopy(gslMemObject srcRes, gslMemObject dstRes, uint32 flags) const
-{
-    assert(m_cs != 0);
-    assert(srcRes != 0);
-    assert(dstRes != 0);
-
-    //! @note: GSL device isn't thread safe
-    amd::ScopedLock k(gslDeviceOps());
-
-    uint64 surfaceSize;
-
-    CopyType type = GetCopyType(srcRes, dstRes, 0, 0, m_allowDMA, 0, surfaceSize, 0, 0);
-
-    if (type == USE_DRMDMA)
-    {
-        m_cs->DMACopy(srcRes, 0, dstRes, 0, surfaceSize, GSL_SYNCUPLOAD_SYNC_WAIT, NULL);
-        m_cs->Flush();
-        Wait(m_cs, GSL_DRMDMA_SYNC_ATI, m_mapDMAQuery);
-    }
-    else if (type == USE_CPDMA)
-    {
-        m_cs->syncUploadRaw(srcRes, 0, dstRes, 0, surfaceSize, 0);
-        m_cs->Flush();
-        Wait(m_cs, GSL_SYNC_ATI, m_mapQuery);
-    }
-    else
-    {
-        assert(0 && "No copy engine is being used");
-    }
-}
-
 #define CPDMA_THRESHOLD 131072

 CopyType
@@ -97,14 +97,10 @@ public:
    const CALdeviceattribs& getAttribs() const { return m_attribs; }
    const gslMemInfo& getMemInfo() const { return m_memInfo; }

-    bool             isVmMode() const { return m_vmMode; };
-
    uint32           getVPUMask() const { return m_vpuMask; }
    bool             canDMA() const { return m_canDMA; }
    gslMemObject     m_srcDRMDMAMem, m_dstDRMDMAMem;    // memory object of flush buffer, used for DRMDMA flush

-    void             resCopy(gslMemObject srcRes, gslMemObject dstRes, uint32 flags) const;
-
    void             PerformAdapterInitialization() const;
    void             PerformFullInitialization() const;

@@ -211,7 +207,6 @@ private:
            uint    m_computeRing           : 1;
            uint    m_usePerVPUAdapterModel : 1;
            uint    m_PerformLazyDeviceInit : 1;
-            uint    m_vmMode                : 1;
            uint    m_isComputeRingIDForced : 1;
        };
    };
@@ -34,7 +34,6 @@ Settings::Settings()
 bool
 Settings::create(bool doublePrecision)
 {
-    largeHostMemAlloc_ = true;
    customHostAllocator_ = true;

    // Enable extensions
@@ -52,12 +52,8 @@ release(cstring, GPU_DEVICE_ORDINAL, "",                                      \
        "Select the device ordinal (comma seperated list of available devices)") \
 release(bool, REMOTE_ALLOC, false,                                            \
        "Use remote memory for the global heap allocation")                   \
-release(int, GPU_INITIAL_HEAP_SIZE, 16,                                       \
-        "Initial size of the GPU heap in MiB")                                \
 release(uint, GPU_MAX_HEAP_SIZE, 100,                                         \
        "Set maximum size of the GPU heap to % of board memory")              \
-release(int, GPU_HEAP_GROWTH_INCREMENT, 8,                                    \
-        "Amount to grow the GPU heap by in MiB")                              \
 release(uint, GPU_STAGING_BUFFER_SIZE, 512,                                   \
        "Size of the GPU staging buffer in KiB")                              \
 release(bool, GPU_DUMP_BLIT_KERNELS, false,                                   \