From bc5a50bf7b783aa4f831516c5ddbb3d6bab2eec1 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 17 Sep 2015 11:24:31 -0400
Subject: [PATCH] P4 to Git Change 1191682 by gandryey@gera-dev-w7 on
2015/09/17 11:14:23
ECR #304775 - Remove EG/NI support
- Remove the heap emulation (non-vm)
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#186 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#253 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#118 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#523 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#148 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.cpp#28 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.hpp#16 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#297 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#122 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#227 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#329 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#94 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#379 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#143 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.h#57 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.cpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsasettings.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#242 edit
---
rocclr/runtime/device/cpu/cpusettings.cpp | 2 -
rocclr/runtime/device/device.cpp | 1 -
rocclr/runtime/device/device.hpp | 3 +-
rocclr/runtime/device/gpu/gpublit.cpp | 24 +-
rocclr/runtime/device/gpu/gpudevice.cpp | 661 +++++-------------
rocclr/runtime/device/gpu/gpudevice.hpp | 55 +-
rocclr/runtime/device/gpu/gpuheap.cpp | 536 --------------
rocclr/runtime/device/gpu/gpuheap.hpp | 225 ------
rocclr/runtime/device/gpu/gpukernel.cpp | 52 +-
rocclr/runtime/device/gpu/gpukernel.hpp | 9 -
rocclr/runtime/device/gpu/gpumemory.cpp | 99 +--
rocclr/runtime/device/gpu/gpumemory.hpp | 18 -
rocclr/runtime/device/gpu/gpuresource.cpp | 65 +-
rocclr/runtime/device/gpu/gpuresource.hpp | 9 -
rocclr/runtime/device/gpu/gpusettings.cpp | 4 -
rocclr/runtime/device/gpu/gpusettings.hpp | 2 -
rocclr/runtime/device/gpu/gpuvirtual.cpp | 32 +-
.../device/gpu/gslbe/src/rt/GSLDevice.cpp | 34 +-
.../device/gpu/gslbe/src/rt/GSLDevice.h | 5 -
rocclr/runtime/device/hsa/hsasettings.cpp | 1 -
rocclr/runtime/utils/flags.hpp | 4 -
21 files changed, 264 insertions(+), 1577 deletions(-)
delete mode 100644 rocclr/runtime/device/gpu/gpuheap.cpp
delete mode 100644 rocclr/runtime/device/gpu/gpuheap.hpp
diff --git a/rocclr/runtime/device/cpu/cpusettings.cpp b/rocclr/runtime/device/cpu/cpusettings.cpp
index 14d43cdbe6..b7dfdf57de 100644
--- a/rocclr/runtime/device/cpu/cpusettings.cpp
+++ b/rocclr/runtime/device/cpu/cpusettings.cpp
@@ -10,8 +10,6 @@ namespace cpu {
bool
Settings::create()
{
- largeHostMemAlloc_ = true;
-
// This code is temporary until cl_khr_fp64 is unconditional
if (flagIsDefault(CL_KHR_FP64) || CL_KHR_FP64) {
enableExtension(ClKhrFp64);
diff --git a/rocclr/runtime/device/device.cpp b/rocclr/runtime/device/device.cpp
index 4e1ef8a2e8..d9c7a1a2b2 100644
--- a/rocclr/runtime/device/device.cpp
+++ b/rocclr/runtime/device/device.cpp
@@ -517,7 +517,6 @@ Settings::Settings()
extensions_ = 0;
partialDispatch_ = false;
supportRA_ = true;
- largeHostMemAlloc_ = false;
customHostAllocator_ = false;
waitCommand_ = AMD_OCL_WAIT_COMMAND;
supportDepthsRGB_ = false;
diff --git a/rocclr/runtime/device/device.hpp b/rocclr/runtime/device/device.hpp
index bb9d16b368..d2a313b6be 100644
--- a/rocclr/runtime/device/device.hpp
+++ b/rocclr/runtime/device/device.hpp
@@ -577,13 +577,12 @@ public:
struct {
uint partialDispatch_: 1; //!< Enables partial dispatch
uint supportRA_: 1; //!< Support RA channel order format
- uint largeHostMemAlloc_: 1; //!< Allow large host mem allocations (> maxSingleAlloc)
uint waitCommand_: 1; //!< Enables a wait for every submitted command
uint customHostAllocator_: 1;//!< True if device has custom host allocator
// that replaces generic OS allocation routines
uint supportDepthsRGB_: 1; //!< Support DEPTH and sRGB channel order format
uint enableHwDebug_: 1; //!< Enable HW debug support
- uint reserved_: 25;
+ uint reserved_: 26;
};
uint value_;
};
diff --git a/rocclr/runtime/device/gpu/gpublit.cpp b/rocclr/runtime/device/gpu/gpublit.cpp
index e201985e09..ab85396b93 100644
--- a/rocclr/runtime/device/gpu/gpublit.cpp
+++ b/rocclr/runtime/device/gpu/gpublit.cpp
@@ -1955,20 +1955,9 @@ KernelBlitManager::copyBufferRect(
// Fall into the CAL path for rejected transfers
if (setup_.disableCopyBufferRect_ ||
- (gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) ||
- (!dev().heap()->isVirtual() &&
- ((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))) {
- // Copy data with CAL (no VM mode only)
- if (gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
- result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
- srcRectIn, dstRectIn, sizeIn, entire);
- }
-
- if ((!dev().heap()->isVirtual() && ((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))
- && !result) {
- result = HostBlitManager::copyBufferRect(srcMemory, dstMemory,
- srcRectIn, dstRectIn, sizeIn, entire);
- }
+ gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
+ result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
+ srcRectIn, dstRectIn, sizeIn, entire);
if (result) {
synchronize();
@@ -2395,11 +2384,9 @@ KernelBlitManager::copyBuffer(
{
amd::ScopedLock k(lockXferOps_);
bool result = false;
- bool forceCal = !dev().heap()->isVirtual() &&
- ((gpuMem(srcMemory).hb() == NULL) || (gpuMem(dstMemory).hb() == NULL));
- if ((!forceCal && !gpuMem(srcMemory).isHostMemDirectAccess() &&
- !gpuMem(dstMemory).isHostMemDirectAccess())) {
+ if (!gpuMem(srcMemory).isHostMemDirectAccess() &&
+ !gpuMem(dstMemory).isHostMemDirectAccess()) {
uint blitType = BlitCopyBuffer;
size_t dim = 1;
size_t globalWorkOffset[3] = { 0, 0, 0 };
@@ -2489,7 +2476,6 @@ KernelBlitManager::copyBuffer(
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters);
}
else {
- // Copy data with CAL (no VM mode only)
result = DmaBlitManager::copyBuffer(
srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
}
diff --git a/rocclr/runtime/device/gpu/gpudevice.cpp b/rocclr/runtime/device/gpu/gpudevice.cpp
index 32bd622b01..e8775b6db7 100644
--- a/rocclr/runtime/device/gpu/gpudevice.cpp
+++ b/rocclr/runtime/device/gpu/gpudevice.cpp
@@ -173,7 +173,7 @@ NullDevice::create(CALtarget target)
calAttr.localRAM = 512;
// Fill the device info structure
- fillDeviceInfo(calAttr, memInfo, 4096, 1, true);
+ fillDeviceInfo(calAttr, memInfo, 4096, 1);
if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
// Runtime doesn't know what local size could be on the real board
@@ -225,9 +225,7 @@ void NullDevice::fillDeviceInfo(
const CALdeviceattribs& calAttr,
const gslMemInfo& memInfo,
size_t maxTextureSize,
- uint numComputeRings,
- bool isVirtualMode
- )
+ uint numComputeRings)
{
info_.type_ = CL_DEVICE_TYPE_GPU;
info_.vendorId_ = 0x1002;
@@ -276,56 +274,45 @@ void NullDevice::fillDeviceInfo(
info_.globalMemCacheType_ = CL_NONE;
}
- if (isVirtualMode) {
#if defined(ATI_OS_LINUX)
- info_.globalMemSize_ =
- (static_cast(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
- // globalMemSize is the actual available size for app on Linux
- // Because Linux base driver doesn't support paging
- static_cast(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
+ info_.globalMemSize_ =
+ (static_cast(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
+ // globalMemSize is the actual available size for app on Linux
+ // Because Linux base driver doesn't support paging
+ static_cast(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
#else
- info_.globalMemSize_ =
- (static_cast(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
- static_cast(calAttr.localRAM) / 100u) * Mi;
+ info_.globalMemSize_ =
+ (static_cast(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
+ static_cast(calAttr.localRAM) / 100u) * Mi;
#endif
- if (settings().apuSystem_) {
- info_.globalMemSize_ +=
- (static_cast(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
- }
+ if (settings().apuSystem_) {
+ info_.globalMemSize_ +=
+ (static_cast(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
+ }
- // We try to calculate the largest available memory size from
- // the largest available block in either heap. In theory this
- // should be the size we can actually allocate at application
- // start. Note that it may not be a guarantee still as the
- // application progresses.
- info_.maxMemAllocSize_ = std::max(
- cl_ulong(memInfo.cardLargestFreeBlockBytes),
- cl_ulong(memInfo.cardExtLargestFreeBlockBytes));
+ // We try to calculate the largest available memory size from
+ // the largest available block in either heap. In theory this
+ // should be the size we can actually allocate at application
+ // start. Note that it may not be a guarantee still as the
+ // application progresses.
+ info_.maxMemAllocSize_ = std::max(
+ cl_ulong(memInfo.cardLargestFreeBlockBytes),
+ cl_ulong(memInfo.cardExtLargestFreeBlockBytes));
#if defined(ATI_OS_WIN)
- if (settings().apuSystem_) {
- info_.maxMemAllocSize_ = std::max(
- (static_cast(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
- info_.maxMemAllocSize_);
- }
+ if (settings().apuSystem_) {
+ info_.maxMemAllocSize_ = std::max(
+ (static_cast(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
+ info_.maxMemAllocSize_);
+ }
#endif
- info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
- std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);
+ info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
+ std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);
- //! \note Force max single allocation size.
- //! 4GB limit for the blit kernels and 64 bit optimizations.
- info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
- static_cast(settings().maxAllocSize_));
- }
- else {
- uint maxHeapSize = flagIsDefault(GPU_MAX_HEAP_SIZE) ? 50 : GPU_MAX_HEAP_SIZE;
- info_.globalMemSize_ = (std::min(maxHeapSize, 100u)
- * calAttr.localRAM / 100u) * Mi;
-
- uint maxAllocSize = flagIsDefault(GPU_SINGLE_ALLOC_PERCENT) ? 25 : GPU_SINGLE_ALLOC_PERCENT;
- info_.maxMemAllocSize_ = cl_ulong(info_.globalMemSize_ *
- std::min(maxAllocSize, 100u) / 100u);
- }
+ //! \note Force max single allocation size.
+ //! 4GB limit for the blit kernels and 64 bit optimizations.
+ info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
+ static_cast(settings().maxAllocSize_));
if (info_.maxMemAllocSize_ < cl_ulong(128 * Mi)) {
LogError("We are unable to get a heap large enough to support the OpenCL minimum "\
@@ -377,7 +364,7 @@ void NullDevice::fillDeviceInfo(
info_.imagePitchAlignment_ = 256; // XXX: 256 pixel pitch alignment for now
info_.imageBaseAddressAlignment_ = 256; // XXX: 256 byte base address alignment for now
- info_.bufferFromImageSupport_ = (isVirtualMode) ? CL_TRUE : CL_FALSE;
+ info_.bufferFromImageSupport_ = CL_TRUE;
}
info_.errorCorrectionSupport_ = CL_FALSE;
@@ -404,7 +391,7 @@ void NullDevice::fillDeviceInfo(
::strcpy(info_.name_, hwInfo()->targetName_);
::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1,
- AMD_BUILD_STRING "%s", (isVirtualMode) ? " (VM)": "");
+ AMD_BUILD_STRING "%s", " (VM)");
info_.profile_ = "FULL_PROFILE";
if (settings().oclVersion_ == OpenCL20) {
@@ -508,6 +495,25 @@ void NullDevice::fillDeviceInfo(
}
}
+bool
+Device::Heap::create(Device& device)
+{
+ // Create a new GPU resource
+ resource_ = new Resource(device, 0, CM_SURF_FMT_R32I);
+ if (resource_ == NULL) {
+ return false;
+ }
+
+ if (!resource_->create(Resource::Heap)) {
+ return false;
+ }
+
+ if (!device.settings().hsail_) {
+ baseAddress_ = resource_->gslResource()->getSurfaceAddress();
+ }
+ return true;
+}
+
void
Device::Engines::create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings)
{
@@ -670,7 +676,7 @@ Device::Device()
, CALGSLDevice()
, numOfVgpus_(0)
, context_(NULL)
- , heap_(NULL)
+ , heap_()
, dummyPage_(NULL)
, lockAsyncOps_(NULL)
, lockAsyncOpsForInitHeap_(NULL)
@@ -731,11 +737,6 @@ Device::~Device()
dummyPage_->release();
}
- // Destroy global heap
- if (heap_ != NULL) {
- delete heap_;
- }
-
// Destroy resource cache
delete resourceCache_;
@@ -837,26 +838,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
size_t resourceCacheSize = settings().resourceCacheSize_;
- // Allocate heap
- heapSize_ = settings().heapSize_;
-
- // Check if BE supports virtual addressing mode
- if (isVmMode()) {
- heap_ = new VirtualHeap(*this);
- gpuSettings->largeHostMemAlloc_ = (NULL != heap_) ? true : false;
- }
-
- // If virtual heap allocation failed, then try static allocation
- if (heap_ == NULL) {
- heap_ = new Heap(*this);
- // Disable resource cache if VM is disable
- resourceCacheSize = 0;
- if (NULL == heap_) {
- return false;
- }
- }
-
-
#ifdef DEBUG
std::stringstream message;
if (settings().remoteAlloc_) {
@@ -865,10 +846,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
else {
message << "Using *Local* memory";
}
- if (!heap()->isVirtual()) {
- message << ": " << settings().heapSize_ / Mi << "MB, growth: " << \
- settings().heapSizeGrowth_ / Mi << "MB";
- }
+
message << std::endl;
LogInfo(message.str().c_str());
#endif // DEBUG
@@ -883,8 +861,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
// Fill the device info structure
fillDeviceInfo(getAttribs(), getMemInfo(),
static_cast(getMaxTextureSize()),
- engines().numComputeRings(), heap()->isVirtual()
- );
+ engines().numComputeRings());
if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
if (NULL == hsaCompiler_) {
@@ -955,7 +932,7 @@ Device::initializeHeapResources()
}
// Complete initialization of the heap and other buffers
- if ((heap_ == NULL) || !heap_->create(heapSize_, settings().remoteAlloc_)) {
+ if (!heap_.create(*this)) {
LogError("Failed GPU heap creation");
return false;
}
@@ -987,7 +964,7 @@ Device::initializeHeapResources()
type = Resource::RemoteUSWC;
}
xferWrite_ = new XferBuffers(*this, type,
- amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
+ amd::alignUp(settings().stagedXferSize_, 4 * Ki));
if ((xferWrite_ == NULL) || !xferWrite_->create()) {
LogError("Couldn't allocate transfer buffer objects for read");
return false;
@@ -997,7 +974,7 @@ Device::initializeHeapResources()
// Initialize staged read buffers
if (settings().stagedXferRead_) {
xferRead_ = new XferBuffers(*this, Resource::Remote,
- amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
+ amd::alignUp(settings().stagedXferSize_, 4 * Ki));
if ((xferRead_ == NULL) || !xferRead_->create()) {
LogError("Couldn't allocate transfer buffer objects for write");
return false;
@@ -1086,52 +1063,6 @@ Device::createVirtualDevice(
}
}
-bool
-Device::reallocHeap(size_t size, bool remoteAlloc)
-{
- size_t heapSize = heapSize_ + ((size != 0) ?
- amd::alignUp(size, settings().heapSizeGrowth_) : 0);
- Heap* oldHeap = heap_;
- // Maximum heap limit size = reported size + internal memory
- size_t maxHeapLimit = static_cast(info().globalMemSize_) +
- // an extra 10MB for the alignments of allocations,
- // since the conformance test doesn't expect any
- 10 * Mi;
-
- if ((settings().heapSizeGrowth_ == 0) ||
- // Allow the heap growth up to the global memory limit
- (heapSize_ + size > maxHeapLimit)) {
- return false;
- }
- heapSize = std::min(maxHeapLimit, heapSize);
-
- heap_ = new Heap(*this);
-
- // Make sure we have allocated a new global heap
- if (NULL == heap_) {
- heap_ = oldHeap;
- return false;
- }
-
- if (!heap_->create(heapSize, remoteAlloc)) {
- delete heap_;
- heap_ = oldHeap;
- return false;
- }
-
- // Copy the old heap to the new one
- if (!oldHeap->copyTo(heap_)) {
- delete heap_;
- heap_ = oldHeap;
- return false;
- }
-
- delete oldHeap;
- heapSize_ = heapSize;
-
- return true;
-}
-
device::Program*
Device::createProgram(int oclVer)
{
@@ -1288,65 +1219,6 @@ Device::tearDown()
}
}
-//! @note This funciton must be lock protected from a caller
-HeapBlock*
-Device::allocHeapBlock(size_t size) const
-{
- HeapBlock* hb = NULL;
-
- // Allocate the underlying heap block
- hb = heap_->alloc(size);
-
- // Virtual heap should never fail allocation
- if ((hb == NULL) && (!heap_->isVirtual())) {
- // Queues can't process commands,
- // while the global heap reallocation occurs.
- // So stall all queues and then reallocate the global heap
- ScopedLockVgpus lock(*this);
-
- // Wait for idle
- for (uint idx = 0; idx < vgpus().size(); ++idx) {
- vgpus()[idx]->waitAllEngines();
- }
-
- // Acount memory alignment for the new allocation
- size_t extraSpace = heap_->granularityB();
- if (size >= heap_->freeSpace()) {
- // Required extra space = requested size - free space
- extraSpace += size - heap_->freeSpace();
- }
-
- //! @note the const cast here looks bad, but the device object
- // is a lock protected above. The rest of the code
- // doesn't change the device object.
- // So the const methods can be safly used everywhere else.
- // In general we should avoid changing the device object after initialization
-
- // Try to reallocate the heap with the same memory type
- if (const_cast(this)->reallocHeap(extraSpace, settings().remoteAlloc_)) {
- hb = heap_->alloc(size);
- }
-
- if (hb == NULL) {
- // Use reversed memory type as a temporary storage
- bool remoteAlloc = settings().remoteAlloc_ ^ true;
-
- // Try to reallocate the heap
- if (const_cast(this)->reallocHeap(extraSpace, remoteAlloc)) {
- // Back to the default location of the global heap
- remoteAlloc ^= true;
- if (!const_cast(this)->reallocHeap(0, remoteAlloc)) {
- LogWarning("New memory type for the \
- global heap after reallocation!");
- }
- hb = heap_->alloc(size);
- }
- }
- }
-
- return hb;
-}
-
gpu::Memory*
Device::getGpuMemory(amd::Memory* mem) const
{
@@ -1392,99 +1264,20 @@ Device::createScratchBuffer(size_t size) const
{
Memory* gpuMemory = NULL;
- // Use virtual heap allocation
- if (heap()->isVirtual()) {
- // Create a memory object
- gpuMemory = new gpu::Memory(*this, size);
- if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
- delete gpuMemory;
- gpuMemory = NULL;
- }
- }
- else {
- // We have to lock the heap block allocation,
- // so possible reallocation won't occur twice or
- // another thread could destroy a heap block,
- // while we didn't finish allocation
- amd::ScopedLock k(lockAsyncOps());
-
- HeapBlock* hb = allocHeapBlock(size);
- if (hb != NULL) {
- // wrap it
- gpuMemory = new gpu::Memory(*this, *hb);
-
- // Create resource
- if (NULL != gpuMemory) {
- Resource::ViewParams params;
- params.offset_ = hb->offset_;
- params.size_ = hb->size_;
- params.resource_ = &(globalMem());
- params.memory_ = NULL;
- if (!gpuMemory->create(Resource::View, ¶ms)) {
- delete gpuMemory;
- gpuMemory = NULL;
- }
- }
- }
- }
-
- return gpuMemory;
-}
-
-gpu::Memory*
-Device::createBufferFromHeap(amd::Memory& owner) const
-{
- size_t size = owner.getSize();
- gpu::Memory* gpuMemory;
-
- // We have to lock the heap block allocation,
- // so possible reallocation won't occur twice or
- // another thread could destroy a heap block,
- // while we didn't finish allocation
- amd::ScopedLock k(lockAsyncOps());
-
- HeapBlock* hb = allocHeapBlock(size);
- if (hb == NULL) {
- LogError("We don't have enough video memory!");
- return NULL;
- }
-
// Create a memory object
- gpuMemory = new gpu::Memory(*this, owner, hb);
- if (NULL == gpuMemory) {
- hb->setMemory(NULL);
- hb->free();
- return NULL;
- }
-
- Resource::ViewParams params;
- params.owner_ = &owner;
- params.offset_ = hb->offset_;
- params.size_ = hb->size_;
- params.resource_ = &(globalMem());
- params.memory_ = NULL;
-
- if (!gpuMemory->create(Resource::View, ¶ms)) {
+ gpuMemory = new gpu::Memory(*this, size);
+ if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
delete gpuMemory;
- return NULL;
+ gpuMemory = NULL;
}
- // Check if owner is interop memory
- if (owner.isInterop()) {
- if (!gpuMemory->createInterop(Memory::InteropHwEmulation)) {
- LogError("HW interop creation failed!");
- delete gpuMemory;
- return NULL;
- }
- }
return gpuMemory;
}
gpu::Memory*
Device::createBuffer(
amd::Memory& owner,
- bool directAccess,
- bool bufferAlloc) const
+ bool directAccess) const
{
size_t size = owner.getSize();
gpu::Memory* gpuMemory;
@@ -1504,39 +1297,7 @@ Device::createBuffer(
return NULL;
}
- if (!heap()->isVirtual()) {
- bool uhpAlloc =
- (owner.parent()->getMemFlags() & CL_MEM_USE_HOST_PTR) ? true : false;
-
- if (owner.parent()->getType() != CL_MEM_OBJECT_IMAGE1D_BUFFER) {
- //! \note This extra line is necessary to make sure that subbuffer
- //! allocation is a synch operation,
- //! due to a possible realloc of heap(no VM) or parent(UHP)
- amd::ScopedLock k(lockAsyncOps());
-
- //! @note: For now make sure the parent is allocated in the global heap
- //! or if it's the UHP optimization for prepinned memory
- if (((gpuParent->hb() == NULL) || uhpAlloc) &&
- !owner.parent()->reallocedDeviceMemory(this)) {
- if (reallocMemory(*owner.parent())) {
- gpuParent = getGpuMemory(owner.parent());
- }
- else {
- LogError("Can't reallocate the owner object for subbuffer allocation");
- return NULL;
- }
- }
-
- return gpuParent->createBufferView(owner);
- }
- else {
- gpuParent = getGpuMemory(owner.parent()->parent());
- return gpuParent->createBufferView(*owner.parent()->parent());
- }
- }
- else {
- return gpuParent->createBufferView(owner);
- }
+ return gpuParent->createBufferView(owner);
}
Resource::MemoryType type = (owner.forceSysMemAlloc() || (owner.getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) ?
@@ -1550,138 +1311,123 @@ Device::createBuffer(
}
// Use direct access if it's possible
- if (bufferAlloc || (type == Resource::Remote)) {
- bool forceHeapAlloc = false;
- bool remoteAlloc = false;
- // Internal means VirtualDevice!=NULL
- bool internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
- (owner.getVirtualDevice() != NULL)) ? true : false;
+ bool remoteAlloc = false;
+ // Internal means VirtualDevice!=NULL
+ bool internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
+ (owner.getVirtualDevice() != NULL)) ? true : false;
- // Create a memory object
- gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
- if (NULL == gpuMemory) {
- return NULL;
- }
+ // Create a memory object
+ gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
+ if (NULL == gpuMemory) {
+ return NULL;
+ }
- // Check if owner is interop memory
- if (owner.isInterop()) {
- result = gpuMemory->createInterop(Memory::InteropDirectAccess);
- }
- else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
- // Attempt to allocate from persistent heap
- result = gpuMemory->create(Resource::Persistent);
- }
- else if (directAccess || (type == Resource::Remote)) {
- // Check for system memory allocations
- if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
- || (settings().remoteAlloc_)) {
- // Allocate remote memory if AHP allocation and context has just 1 device
- if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
- (owner.getContext().devices().size() == 1)) {
- if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
- CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
- // GPU will be reading from this host memory buffer,
- // so assume Host write into it
- type = Resource::RemoteUSWC;
- remoteAlloc = true;
- }
+ // Check if owner is interop memory
+ if (owner.isInterop()) {
+ result = gpuMemory->createInterop(Memory::InteropDirectAccess);
+ }
+ else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
+ // Attempt to allocate from persistent heap
+ result = gpuMemory->create(Resource::Persistent);
+ }
+ else if (directAccess || (type == Resource::Remote)) {
+ // Check for system memory allocations
+ if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
+ || (settings().remoteAlloc_)) {
+ // Allocate remote memory if AHP allocation and context has just 1 device
+ if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
+ (owner.getContext().devices().size() == 1)) {
+ if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
+ CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
+ // GPU will be reading from this host memory buffer,
+ // so assume Host write into it
+ type = Resource::RemoteUSWC;
+ remoteAlloc = true;
}
- // Make sure owner has a valid hostmem pointer and it's not COPY
- if (!remoteAlloc && (owner.getHostMem() != NULL)) {
- Resource::PinnedParams params;
- params.owner_ = &owner;
- params.gpu_ =
- reinterpret_cast(owner.getVirtualDevice());
+ }
+ // Make sure owner has a valid hostmem pointer and it's not COPY
+ if (!remoteAlloc && (owner.getHostMem() != NULL)) {
+ Resource::PinnedParams params;
+ params.owner_ = &owner;
+ params.gpu_ =
+ reinterpret_cast(owner.getVirtualDevice());
- params.hostMemRef_ = owner.getHostMemRef();
- params.size_ = owner.getHostMemRef()->size();
- if (0 == params.size_) {
- params.size_ = owner.getSize();
- }
- // Create memory object
- result = gpuMemory->create(Resource::Pinned, ¶ms);
+ params.hostMemRef_ = owner.getHostMemRef();
+ params.size_ = owner.getHostMemRef()->size();
+ if (0 == params.size_) {
+ params.size_ = owner.getSize();
+ }
+ // Create memory object
+ result = gpuMemory->create(Resource::Pinned, ¶ms);
- // If direct access failed
- if (!result) {
- // and VM off, then force a heap allocation
- if (!heap()->isVirtual()) {
- // Internal pinning doesn't need a heap allocation
- if (!internalAlloc) {
- forceHeapAlloc = true;
- }
- }
- // Don't use cached allocation
- // if size is biger than max single alloc
- if (owner.getSize() > info().maxMemAllocSize_) {
- delete gpuMemory;
- return NULL;
- }
+ // If direct access failed
+ if (!result) {
+ // Don't use cached allocation
+ // if size is biger than max single alloc
+ if (owner.getSize() > info().maxMemAllocSize_) {
+ delete gpuMemory;
+ return NULL;
}
}
}
}
+ }
- if (!result && !forceHeapAlloc &&
- // Make sure it's not internal alloc
- !internalAlloc) {
- Resource::CreateParams params;
- params.owner_ = &owner;
- params.gpu_ = static_cast(owner.getVirtualDevice());
+ if (!result &&
+ // Make sure it's not internal alloc
+ !internalAlloc) {
+ Resource::CreateParams params;
+ params.owner_ = &owner;
+ params.gpu_ = static_cast(owner.getVirtualDevice());
- // Create memory object
- result = gpuMemory->create(type, ¶ms);
+ // Create memory object
+ result = gpuMemory->create(type, ¶ms);
- // If allocation was successful
- if (result) {
- // Initialize if the memory is a pipe object
- if (owner.getType() == CL_MEM_OBJECT_PIPE) {
- // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
- // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
- size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
- gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
+ // If allocation was successful
+ if (result) {
+ // Initialize if the memory is a pipe object
+ if (owner.getType() == CL_MEM_OBJECT_PIPE) {
+ // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
+ // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
+ size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
+ gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
+ }
+ // If memory has direct access from host, then get CPU address
+ if (gpuMemory->isHostMemDirectAccess() &&
+ (type != Resource::ExternalPhysical)) {
+ void* address = gpuMemory->map(NULL);
+ if (address != NULL) {
+ // Copy saved memory
+ if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
+ memcpy(address, owner.getHostMem(), owner.getSize());
+ }
+ // It should be safe to change the host memory pointer,
+ // because it's lock protected from the upper caller
+ owner.setHostMem(address);
}
- // If memory has direct access from host, then get CPU address
- if (gpuMemory->isHostMemDirectAccess() &&
- (type != Resource::ExternalPhysical)) {
- void* address = gpuMemory->map(NULL);
- if (address != NULL) {
- // Copy saved memory
- if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
- memcpy(address, owner.getHostMem(), owner.getSize());
- }
- // It should be safe to change the host memory pointer,
- // because it's lock protected from the upper caller
- owner.setHostMem(address);
- }
- else {
- result = false;
- }
- }
- // An optimization for CHP. Copy memory and destroy sysmem allocation
- else if ((gpuMemory->memoryType() != Resource::Pinned) &&
- (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
- (owner.getContext().devices().size() == 1)) {
- amd::Coord3D origin(0, 0, 0);
- amd::Coord3D region(owner.getSize());
- static const bool Entire = true;
- if (xferMgr().writeBuffer(owner.getHostMem(),
- *gpuMemory, origin, region, Entire)) {
- // Clear CHP memory
- owner.setHostMem(NULL);
- }
+ else {
+ result = false;
+ }
+ }
+ // An optimization for CHP. Copy memory and destroy sysmem allocation
+ else if ((gpuMemory->memoryType() != Resource::Pinned) &&
+ (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
+ (owner.getContext().devices().size() == 1)) {
+ amd::Coord3D origin(0, 0, 0);
+ amd::Coord3D region(owner.getSize());
+ static const bool Entire = true;
+ if (xferMgr().writeBuffer(owner.getHostMem(),
+ *gpuMemory, origin, region, Entire)) {
+ // Clear CHP memory
+ owner.setHostMem(NULL);
}
}
- }
-
- if (!result && !forceHeapAlloc) {
- delete gpuMemory;
- return NULL;
}
}
if (!result) {
- assert(!heap()->isVirtual() && "Can't have static heap allocation with VM");
- gpuMemory = createBufferFromHeap(owner);
+ delete gpuMemory;
+ return NULL;
}
return gpuMemory;
@@ -1703,10 +1449,10 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
}
// Create a view on the specified device
gpuImage = (gpu::Memory*)createView(owner, *devParent);
- if (heap()->isVirtual() && (NULL != gpuImage) && (gpuImage->owner() != NULL)) {
+ if ((NULL != gpuImage) && (gpuImage->owner() != NULL)) {
gpuImage->owner()->setHostMem((address)(owner.parent()->getHostMem()) + gpuImage->owner()->getOrigin());
}
- return gpuImage ;
+ return gpuImage;
}
gpuImage = new gpu::Image(*this, owner,
@@ -1778,11 +1524,11 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
// Ignore copy for image1D_buffer, since it was already done for buffer
- if (heap()->isVirtual() && imageBuffer) {
+ if (imageBuffer) {
// Clear CHP memory
owner.setHostMem(NULL);
}
- else if (!imageBuffer) {
+ else {
amd::Coord3D origin(0, 0, 0);
static const bool Entire = true;
if (xferMgr().writeImage(owner.getHostMem(),
@@ -1809,25 +1555,12 @@ Device::createMemory(
amd::Memory& owner) const
{
bool directAccess = false;
- bool bufferAlloc = false;
gpu::Memory* memory = NULL;
- if (heap()->isVirtual()) {
- bufferAlloc = true;
- }
- //!@todo Remove this code when VM is always on.
- // Use zero-copy transfers for sysmem allocations or persistent memory
- else {
- if (owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR |
- CL_MEM_USE_HOST_PTR)) {
- bufferAlloc = true;
- }
- }
-
if (owner.asBuffer()) {
directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemBuffer)
? true : false;
- memory = createBuffer(owner, directAccess, bufferAlloc);
+ memory = createBuffer(owner, directAccess);
}
else if (owner.asImage()) {
directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemImage)
@@ -1878,7 +1611,6 @@ bool
Device::reallocMemory(amd::Memory& owner) const
{
bool directAccess = false;
- bool bufferAlloc = heap()->isVirtual();
// For now we have to serialize reallocation code
amd::ScopedLock lk(*lockAsyncOps_);
@@ -1889,35 +1621,18 @@ Device::reallocMemory(amd::Memory& owner) const
if (gpuMemory == NULL) {
return false;
}
- if (gpuMemory->hb() != NULL) {
+
+ if (gpuMemory->pinOffset() == 0) {
return true;
}
-
- if (bufferAlloc) {
- if (gpuMemory->pinOffset() == 0) {
- return true;
- }
- else if (NULL != owner.parent()) {
- if (!reallocMemory(*owner.parent())) {
- return false;
- }
+ else if (NULL != owner.parent()) {
+ if (!reallocMemory(*owner.parent())) {
+ return false;
}
}
if (owner.asBuffer()) {
- // Disable remote allocation if no VM
- if ((gpuMemory != NULL) &&
- ((gpuMemory->memoryType() == Resource::Remote) ||
- (gpuMemory->memoryType() == Resource::RemoteUSWC)) && !bufferAlloc) {
- // Make sure we don't have a stale memory in VA cache before reallocation
- // of system memory.
- // \note: the app must unmap() memory before kernel launch
- removeVACache(gpuMemory);
- static const bool forceAllocHostMem = true;
- static const bool forceCopy = true;
- owner.allocHostMemory(owner.getHostMem(), forceAllocHostMem, forceCopy);
- }
- gpuMemory = createBuffer(owner, directAccess, bufferAlloc);
+ gpuMemory = createBuffer(owner, directAccess);
}
else if (owner.asImage()) {
return true;
@@ -2113,24 +1828,18 @@ Device::globalFreeMemory(size_t* freeMemory) const
if (!(const_cast(this)->initializeHeapResources())) {
return false;
}
- if (heap()->isVirtual()) {
- gslMemInfo memInfo = {0};
- gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
- // Fill free memory info
- freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
- memInfo.cardExtMemAvailableBytes) / Ki;
- freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
- memInfo.cardExtLargestFreeBlockBytes) / Ki;
- if (settings().apuSystem_) {
- freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
- freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
- }
- }
- else {
- freeMemory[TotalFreeMemory] = static_cast((info().globalMemSize_ -
- static_cast(heapSize_) + heap()->freeSpace()) / Ki);
- freeMemory[LargestFreeBlock] = freeMemory[TotalFreeMemory];
+ gslMemInfo memInfo = {0};
+ gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
+
+ // Fill free memory info
+ freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
+ memInfo.cardExtMemAvailableBytes) / Ki;
+ freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
+ memInfo.cardExtLargestFreeBlockBytes) / Ki;
+ if (settings().apuSystem_) {
+ freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
+ freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
}
return true;
diff --git a/rocclr/runtime/device/gpu/gpudevice.hpp b/rocclr/runtime/device/gpu/gpudevice.hpp
index 4072ed9670..045ee2b40c 100644
--- a/rocclr/runtime/device/gpu/gpudevice.hpp
+++ b/rocclr/runtime/device/gpu/gpudevice.hpp
@@ -125,8 +125,7 @@ protected:
const CALdeviceattribs& calAttr, //!< CAL device attributes info
const gslMemInfo& memInfo, //!< GSL mem info
size_t maxTextureSize, //!< Maximum texture size supported in HW
- uint numComputeRings, //!< Number of compute rings
- bool isVirtualMode //!< Device is in virtual mode
+ uint numComputeRings //!< Number of compute rings
);
};
@@ -184,6 +183,32 @@ private:
class Device : public NullDevice, public CALGSLDevice
{
public:
+ class Heap : public amd::EmbeddedObject
+ {
+ public:
+ //! The size of a heap element in bytes
+ static const size_t ElementSize = 4;
+
+ //! The type of a heap element in bytes
+ static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
+
+ Heap(): resource_(NULL), baseAddress_(0) {}
+
+ bool create(
+ Device& device //!< GPU device object
+ );
+
+ //! Gets the GPU resource associated with the global heap
+ const Resource& resource() const { return *resource_; }
+
+ //! Returns the base virtual address of the heap
+ uint64_t baseAddress() const { return baseAddress_; }
+
+ protected:
+ Resource* resource_; //!< GPU resource referencing the heap memory
+ uint64_t baseAddress_; //!< Virtual heap base address
+ };
+
//! Locks any access to the virtual GPUs
class ScopedLockVgpus : public amd::StackObject {
public:
@@ -377,12 +402,6 @@ public:
//! Destructor for the physical GPU device
virtual ~Device();
- //! Reallocates current global heap
- bool reallocHeap(
- size_t size, //!< requested size for reallocation
- bool remoteAlloc //!< allocate the new heap in remote memory
- );
-
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(
amd::CommandQueue* queue = NULL
@@ -442,15 +461,10 @@ public:
) const;
//! Gets the GPU resource associated with the global heap
- const Resource& globalMem() const { return heap_->resource(); }
+ const Resource& globalMem() const { return heap_.resource(); }
//! Gets the global heap object
- const Heap* heap() const { return heap_; }
-
- //! Allocates a heap block from the global heap
- HeapBlock* allocHeapBlock(
- size_t size //!< The heap block size for allocation
- ) const;
+ const Heap& heap() const { return heap_; }
//! Gets the memory object for the dummy page
amd::Memory* dummyPage() const { return dummyPage_; }
@@ -566,16 +580,10 @@ private:
//! Sends the stall command to all queues
bool stallQueues();
- //! Buffer allocation from static heap (no VM mode only)
- gpu::Memory* createBufferFromHeap(
- amd::Memory& owner //!< Abstraction layer memory object
- ) const;
-
//! Buffer allocation
gpu::Memory* createBuffer(
amd::Memory& owner, //!< Abstraction layer memory object
- bool directAccess, //!< Use direct host memory access
- bool bufferAlloc //!< If TRUE, then don't use heap
+ bool directAccess //!< Use direct host memory access
) const;
//! Image allocation
@@ -591,8 +599,7 @@ private:
);
amd::Context* context_; //!< A dummy context for internal allocations
- size_t heapSize_; //!< The global heap size
- Heap* heap_; //!< GPU heap manager
+ Heap heap_; //!< GPU global heap
amd::Memory* dummyPage_; //!< A dummy page for NULL pointer
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
diff --git a/rocclr/runtime/device/gpu/gpuheap.cpp b/rocclr/runtime/device/gpu/gpuheap.cpp
deleted file mode 100644
index 28cc32ed29..0000000000
--- a/rocclr/runtime/device/gpu/gpuheap.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
-//! Implementation of GPU device memory management
-
-#include "top.hpp"
-#include "thread/thread.hpp"
-#include "thread/monitor.hpp"
-#include "device/device.hpp"
-#include "device/gpu/gpuheap.hpp"
-#include "device/gpu/gpudevice.hpp"
-
-#include
-#include
-#include
-#include
-
-//! Turn this on to enable sanity checks before and after every heap operation.
-#if DEBUG
-#define EXTRA_HEAP_CHECKS 1
-#endif // DEBUG
-
-namespace gpu {
-
-// The GPU heap. Very simple implementation for now.
-Heap::Heap(
- Device& device)
- : resource_(NULL)
- , freeList_(NULL)
- , busyList_(NULL)
- , freeSize_(0)
- , device_(device)
- , granularity_(Heap::MinGranularity)
- , lock_("GPU heap lock", true)
- , virtualMode_(false)
- , baseAddress_(0)
-{
-}
-
-size_t
-Heap::granularityB() const
-{
- return granularity_ * Heap::ElementSize;
-}
-
-bool
-Heap::create(size_t totalSize, bool remoteAlloc)
-{
- Resource::MemoryType memType;
- size_t maxHeight = device_.info().image2DMaxHeight_;
- size_t sizeInElements;
- size_t npages;
-
- freeSize_ = totalSize;
-
- sizeInElements = (totalSize + Heap::ElementSize - 1) / Heap::ElementSize;
-
- // Calculate best granularity given the size and device characteristics
- npages = amd::alignUp(sizeInElements, granularity_) / granularity_;
-
- // Create a new GPU resource
- resource_ = new Resource(device_, sizeInElements, Heap::ElementType);
-
- if (resource_ == NULL) {
- return false;
- }
-
- memType = (remoteAlloc) ? Resource::RemoteUSWC : Resource::Local;
-
- if (!resource_->create(memType, NULL, true)) {
- return false;
- }
-
- // Set up initial free list
- freeList_ = new HeapBlock(this, npages * granularityB(), 0, NULL, NULL);
- if (freeList_ == NULL) {
- return false;
- }
-
- guarantee(isSane());
- return true;
-}
-
-Heap::~Heap()
-{
- amd::ScopedLock k(lock_);
-
- guarantee(isSane());
-
- // Release all heap blocks
- HeapBlock *walk, *next;
- walk = busyList_;
- while (walk) {
- next = walk->next_;
- walk->free();
- walk = next;
- }
-
- walk = freeList_;
- while (walk) {
- next = walk->next_;
- delete walk;
- walk = next;
- }
-
- // Release resource
- delete resource_;
-}
-
-HeapBlock*
-Heap::alloc(size_t size)
-{
- amd::ScopedLock k(lock_);
- HeapBlock* walk = freeList_;
- HeapBlock* best = NULL;
-
- guarantee(isSane());
-
- // Round size
- size = amd::alignUp(size, granularityB());
-
- // Walk the free list looking for a suitable block (currently best-fit)
- //! @todo:dgladdin: experiment with switching back to first-fit
-
- while (walk) {
- if ((walk->size_ > size) &&
- (best == NULL || walk->size_ < best->size_)) {
- best = walk;
- }
- else if (walk->size_ == size) {
- // No need to split, just move to busy list
- detachBlock(&freeList_, walk);
- walk->inUse_ = true;
- insertBlock(&busyList_, walk);
- guarantee(isSane());
- freeSize_ -= size;
- return walk;
- }
- walk = walk->next_;
- }
-
- if (best != NULL) {
- // Got one, but need to split it. Keep first part in free list,
- // put second part into busy list.
- HeapBlock *newblock = splitBlock(best, size);
- newblock->inUse_ = true;
- insertBlock(&busyList_, newblock);
- guarantee(isSane());
- freeSize_ -= size;
- return newblock;
- }
-
- // No free block available
- guarantee(isSane());
- return NULL;
-}
-
-bool
-Heap::copyTo(Heap* heap)
-{
- HeapBlock *walk;
-
- walk = busyList_;
- while (walk) {
- if (walk->getMemory() != NULL) {
- HeapBlock* hb = heap->alloc(walk->size_);
- if (hb == NULL) {
- return false;
- }
- hb->setMemory(walk->getMemory());
-
- walk->destroyViewsMemory();
- if (!walk->getMemory()->reallocate(hb, &(heap->resource()))) {
- return false;
- }
-
- if (!walk->reallocateViews(hb,
- static_cast(hb->offset_ - walk->offset_))) {
- return false;
- }
- }
- walk = walk->next_;
- }
-
- return true;
-}
-
-void
-Heap::free(HeapBlock* blk)
-{
- amd::ScopedLock k(lock_);
- guarantee(isSane());
- detachBlock(&busyList_, blk);
- blk->inUse_ = false;
- freeSize_ += blk->size_;
- mergeBlock(&freeList_, blk);
- guarantee(isSane());
-}
-
-void
-Heap::detachBlock(HeapBlock** list, HeapBlock* blk)
-{
- // Sanity checks
- guarantee(isSane());
-
- if (*list == blk) {
- *list = blk->next_;
- }
-
- if (blk->prev_) {
- blk->prev_->next_ = blk->next_;
- }
- if (blk->next_) {
- blk->next_->prev_ = blk->prev_;
- }
- // no heap sanity check as blk is now floating
-}
-
-void
-Heap::insertBlock(HeapBlock** head, HeapBlock* blk)
-{
- if (NULL == *head) {
- *head = blk;
- blk->prev_ = NULL;
- blk->next_ = NULL;
- guarantee(isSane());
- return;
- }
-
- // Find the place to insert it at
- HeapBlock* walk = *head;
- while (walk->next_ && walk->next_->offset_ < blk->offset_) {
- walk = walk->next_;
- }
-
- // Insert it
- if (walk == *head) {
- if (walk->offset_ >= blk->offset_) {
- *head = blk;
- blk->prev_ = NULL;
- blk->next_ = walk;
- walk->prev_ = *head;
- guarantee(isSane());
- return;
- }
- }
-
- blk->next_ = walk->next_;
- blk->prev_ = walk;
- if (walk->next_) {
- walk->next_->prev_ = blk;
- }
- walk->next_ = blk;
- guarantee(isSane());
-}
-
-HeapBlock*
-Heap::splitBlock(HeapBlock* blk, size_t tailsize)
-{
- // Sanity checks
-
- guarantee(isSane());
- guarantee(blk->size_ > tailsize && "block too small to split as requested");
- guarantee(!blk->inUse_ && "can't split in-use block");
-
- // Create a new block
-
- HeapBlock* nb = new HeapBlock(blk->owner_, tailsize,
- blk->offset_ + blk->size_ - tailsize);
-
- // Resize the old block
-
- blk->size_ = blk->size_ - tailsize;
- return nb; // no heap sanity check here as the new block hasn't been plugged in yet
-}
-
-//! Join two blocks, transferring the size of the second into the first and deleting
-//! the second. Utility fn for mergeBlock()
-
-static void
-join2Blocks(HeapBlock* first, HeapBlock* second)
-{
- // Sanity checks
-
- guarantee(first->size_ > 0 && "first block invalid");
- guarantee(!first->inUse_ && "can't join an in-use block");
- guarantee(second->size_ > 0 && "second block invalid");
- guarantee(first->offset_ + first->size_ == second->offset_);
-
- // Do the join
- first->size_ = first->size_ + second->size_;
- first->next_ = second->next_;
- if (second->next_) {
- second->next_->prev_ = first;
- }
- delete second;
-}
-
-//! Insert a block into a list, merging it with adjacent blocks if possible. Must be called
-//! under a lock, cannot be used on in-use blocks or blocks with an associated resource alias.
-
-void
-Heap::mergeBlock(HeapBlock** head, HeapBlock* blk)
-{
- insertBlock(head, blk);
-
- // Merge with successor if possible
- if ((blk->next_ != NULL) &&
- (blk->offset_ + blk->size_ == blk->next_->offset_)) {
- join2Blocks(blk, blk->next_);
- }
-
- // Merge with predecessor if possible
- if ((blk->prev_ != NULL) &&
- (blk->prev_->offset_ + blk->prev_->size_ == blk->offset_)) {
- join2Blocks(blk->prev_, blk);
- }
-
- guarantee(isSane());
-}
-
-//! Sanity check for both types of block (helper function for Heap::isSane())
-
-static bool
-isBlockSane(HeapBlock* b)
-{
- return (b->owner_ != NULL
- && (b->next_ == NULL || b->next_->prev_ == b)
- && (b->prev_ == NULL || b->prev_->next_ == b));
-}
-
-//! Sanity check for an individual free block (helper function for Heap::isSane())
-static bool
-isFreeBlockSane(HeapBlock* b)
-{
- if (isBlockSane(b) && !b->inUse_) {
- return true;
- } else {
- return false;
- }
-}
-
-//! Sanity check for an individual busy block (helper function for Heap::isSane())
-static bool
-isBusyBlockSane(HeapBlock* b)
-{
- if (isBlockSane(b) && b->inUse_) {
- return true;
- } else {
- return false;
- }
-}
-
-//! Sanity check for the heap.
-
-bool
-Heap::isSane() const
-{
- // If we got this far, everything is (probably) OK
-#if EXTRA_HEAP_CHECKS
- HeapBlock* walkFree = freeList_; // Free list position
- HeapBlock* walkBusy = busyList_; // Busy list position
- size_t offset = 0; // Current offset
-
- // We can have zero lists if Heap allocation fails
- if (walkFree == NULL && walkBusy == NULL) {
- return true;
- }
-
- // Walk both lists in parallel
- while (walkFree != NULL || walkBusy != NULL) {
- if (walkFree != NULL && walkFree->offset_ == offset) {
- if (!isFreeBlockSane(walkFree)) {
- return false;
- }
- offset += walkFree->size_;
- walkFree = walkFree->next_;
- }
- else if (walkBusy != NULL && walkBusy->offset_ == offset) {
- if (!isBusyBlockSane(walkBusy)) {
- return false;
- }
- offset += walkBusy->size_;
- walkBusy = walkBusy->next_;
- }
- else {
- return false;
- }
- }
-
-#endif // EXTRA_HEAP_CHECKS
- return true;
-}
-
-void
-HeapBlock::destroyViewsMemory()
-{
- if ((parent_ != NULL) && (0 == views_.size())) {
- memory_->free();
- }
- else if (views_.size() != 0) {
- std::list::const_iterator it;
- for (it = views_.begin(); it != views_.end(); ++it) {
- (*it)->destroyViewsMemory();
- }
- }
-}
-
-bool
-HeapBlock::reallocateViews(HeapBlock* parent, size_t shift)
-{
- if (views_.size() != 0) {
- std::list::const_iterator it;
-
- // Loop through all views and reallocate them
- for (it = views_.begin(); it != views_.end(); ++it) {
- // Get the view HeapBlock
- HeapBlock* hb = (*it);
-
- // Readjust the offset
- hb->offset_ += shift;
- // Add to the list if we have a new parent
- if (parent != this) {
- parent->addView(hb);
- }
-
- // Reallocate memory
- hb->memory_->reallocate(hb, parent->getMemory());
-
- // Process a view on view if available
- if (!hb->reallocateViews(hb, shift)) {
- return false;
- }
- }
-
- // Destroy old list
- if (parent != this) {
- views_.clear();
- }
- }
- return true;
-}
-
-//! Destructor. Frees the block if in use and does some final sanity checks.
-HeapBlock::~HeapBlock()
-{
- if (NULL != owner_) {
- if (inUse_) {
- owner_->free(this);
- }
- }
- else {
- // View destruction
- if (parent_ != NULL) {
- assert(((parent_->getMemory() != NULL) && (parent_->getMemory()->owner() != NULL)));
- amd::ScopedLock lock(parent_->getMemory()->owner()->lockMemoryOps());
- parent_->removeView(this);
- }
- }
- guarantee(size_ > 0 && "destructor called for zero-size heap block (destructor called twice?)");
- size_ = 0; // Mark as invalid
-
- if (views_.size() != 0) {
- LogError("Can't destroy a resource if we still have views!");
- }
-}
-
-void
-HeapBlock::free()
-{
- if (NULL != owner_) {
- owner_->free(this);
- }
- else {
- // It's a view. Destroy the object
- delete this;
- }
-}
-
-VirtualHeap::VirtualHeap(
- Device& device)
- : Heap(device)
-{
- virtualMode_ = true;
-}
-
-bool
-VirtualHeap::create(
- size_t totalSize,
- bool remoteAlloc)
-{
- // Create a new GPU resource
- resource_ = new Resource(device_, 0, Heap::ElementType);
- if (resource_ == NULL) {
- return false;
- }
-
- if (!resource_->create(Resource::Heap)) {
- return false;
- }
-
- if (!device_.settings().hsail_) {
- baseAddress_ = resource_->gslResource()->getSurfaceAddress();
- }
- return true;
-}
-
-VirtualHeap::~VirtualHeap()
-{
-}
-
-HeapBlock*
-VirtualHeap::alloc(size_t size)
-{
- assert(false && "Dead branch!");
- return NULL;
-}
-
-void
-VirtualHeap::free(HeapBlock* blk)
-{
- assert(false && "Dead branch!");
-}
-
-bool
-VirtualHeap::copyTo(Heap* heap)
-{
- assert(false && "Dead branch!");
- return false;
-}
-
-bool
-VirtualHeap::isSane(void) const
-{
- assert(false && "Dead branch!");
- return true;
-}
-
-} // namespace gpu
diff --git a/rocclr/runtime/device/gpu/gpuheap.hpp b/rocclr/runtime/device/gpu/gpuheap.hpp
deleted file mode 100644
index b38f316446..0000000000
--- a/rocclr/runtime/device/gpu/gpuheap.hpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//! Declarations for GPU memory management
-
-#ifndef GPUHEAP_HPP_
-#define GPUHEAP_HPP_
-
-#include "top.hpp"
-#include "thread/atomic.hpp"
-#include "device/gpu/gpudefs.hpp"
-
-/*! \addtogroup GPU
- * @{
- */
-
-//! GPU Device Implementation
-
-namespace gpu {
-
-class Device;
-class Heap;
-class Resource;
-class Memory;
-class VirtualGPU;
-
-//! @todo:dgladdin: The heap list should be singly-linked
-
-//! \brief A block on the GPU heap.
-//!
-//! Note that no code outside of the gpumemory.hpp/.cpp pair should touch this
-//! class directly as it is not thread-safe. In general, this class should be
-//! pretty much a struct and contain as little functionality as possible - just
-//! a constructor, destructor.
-//!
-//! Any other methods - in particular, anything that talks to CAL - should be no
-//! more than proxies for functionality implemented in Heap, as Heap is aware
-//! of the lock state.
-
-class HeapBlock : public amd::HeapObject
-{
-public:
- //! Constructor
- HeapBlock(
- Heap* owner = NULL,
- size_t size = 0,
- size_t offset = 0,
- HeapBlock* next=NULL,
- HeapBlock* prev=NULL)
- : owner_(owner)
- , size_(size)
- , offset_(offset)
- , next_(next)
- , prev_(prev)
- , inUse_(false)
- , parent_(NULL)
- , memory_(NULL)
- {}
-
- //! Destructor does some sanity checks.
- ~HeapBlock();
-
- //! Frees a heap block, returning its memory to the owning heap (proxy)
- void free();
-
- //! Sets the GPU memory object associated with the heap block
- void setMemory(Memory* memory) { memory_ = memory; }
-
- //! Gets the GPU memory object associated with the heap block
- Memory* getMemory() const { return memory_; }
-
- //! Adds a heapblock view to the list of views
- void addView(HeapBlock* hb)
- { views_.push_back(hb); hb->parent_ = this; }
-
- //! Removes a heapblock view from the list of views
- void removeView(HeapBlock* hb) { views_.remove(hb); }
-
- //! Destroys all views
- void destroyViewsMemory();
-
- //! Creates all new views
- bool reallocateViews(
- HeapBlock* parent, //!< Parent heap block
- size_t shift //!< The new HeapBlock shift
- );
-
- //! Gets the offset
- size_t offset() const { return offset_; }
-
- Heap* owner_; //!< Heap that owns this block
- size_t size_; //!< Size of the block in bytes
- size_t offset_; //!< Offset of this block in the heap
- HeapBlock* next_; //!< Next block on the list, or NULL
- HeapBlock* prev_; //!< Previous block on the list, or NULL
- bool inUse_; //!< true if the block is in use
- HeapBlock* parent_; //!< The parent heap block for a view
-
-private:
- //! Disable copy constructor
- HeapBlock(const HeapBlock&);
-
- //! Disable assignment
- HeapBlock& operator=(const HeapBlock&);
-
- Memory* memory_; //!< Memory object associated with the heap block
- std::list views_; //!< The list of all allocated views
-};
-
-class Heap : public amd::HeapObject
-{
-public:
- //! Minimal supported CAL granularity = 256 bytes / ElementSize
- static const size_t MinGranularity = 64;
-
- //! The size of a heap element in bytes
- static const size_t ElementSize = 4;
-
- //! The type of a heap element in bytes
- static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
-
- Heap(
- Device& device //!< GPU device object
- );
-
- virtual bool create(
- size_t totalSize, //!< total size of the allocated heap (bytes)
- bool remoteAlloc //!< allocate the heap in remote memory
- );
-
- //! Heap destructor
- virtual ~Heap();
-
- /*!
- * \brief Allocates memory from a heap (best-fit).
- * We round up to 4k granularity for alignment.
- *
- * \return A pointer to allocated heap block object.
- */
- virtual HeapBlock* alloc(
- size_t size //! The allocation size
- );
-
- //! Release memory back to a heap.
- virtual void free(HeapBlock* blk);
-
- //! Copies this heap to another
- virtual bool copyTo(Heap* heap);
-
- //! Gets the GPU resource associated with the global heap
- const Resource& resource() const { return *resource_; }
-
- //! Read the page size (bytes)
- size_t granularityB() const;
-
- //! Read the total free space (bytes)
- size_t freeSpace() const { return freeSize_; }
-
- virtual bool isSane(void) const; //!< Checks heap sanity
-
- //! Returns true if we have a virtual heap
- bool isVirtual() const { return virtualMode_; }
-
- //! Returns the base virtual address of the heap
- uint64_t baseAddress() const { return baseAddress_; }
-
-private:
- //! Insert a block into a list. Must be called under a lock.
- void insertBlock(HeapBlock** list, HeapBlock* node);
-
- //! Merge a block into a list. Must be called under a lock.
- void mergeBlock(HeapBlock** list, HeapBlock* node);
-
- //! Remove a block from a list. Must be called under a lock.
- void detachBlock(HeapBlock** list, HeapBlock* node);
-
- //! Split a block into two pieces
- HeapBlock* splitBlock(HeapBlock* node, size_t size);
-
-protected:
- Resource* resource_; //!< GPU resource referencing the heap memory
- HeapBlock* freeList_; //!< Head block for free list
- HeapBlock* busyList_; //!< Head block for busy list
- size_t freeSize_; //!< total free size of the heap
- Device& device_; //!< Device that owns this heap
- size_t granularity_; //!< Size of an allocation page
- amd::Monitor lock_; //!< Lock to serialise heap accesses
- bool virtualMode_; //!< Virtual mode
- uint64_t baseAddress_; //!< Virtual heap base address
-};
-
-class VirtualHeap : public Heap
-{
-public:
- VirtualHeap(
- Device& device //!< GPU device object
- );
-
- virtual bool create(
- size_t totalSize, //!< total size of the allocated heap (bytes)
- bool remoteAlloc //!< allocate the heap in remote memory
- );
-
- //! Heap destructor
- virtual ~VirtualHeap();
-
- /*!
- * \brief Allocates memory from a heap (best-fit).
- * We round up to 4k granularity for alignment.
- *
- * \return A pointer to allocated heap block object.
- */
- virtual HeapBlock* alloc(
- size_t size //! The allocation size
- );
-
- //! Release memory back to a heap.
- virtual void free(HeapBlock* blk);
-
- //! Copies this heap to another
- virtual bool copyTo(Heap* heap);
-
- virtual bool isSane(void) const; //!< Checks heap sanity
-};
-
-} // namespace gpu
-
-#endif // GPUHEAP_HPP_
diff --git a/rocclr/runtime/device/gpu/gpukernel.cpp b/rocclr/runtime/device/gpu/gpukernel.cpp
index 0ffabf5468..8f511311e7 100644
--- a/rocclr/runtime/device/gpu/gpukernel.cpp
+++ b/rocclr/runtime/device/gpu/gpukernel.cpp
@@ -824,17 +824,6 @@ Kernel::create(
// Initialize the kernel parameters
bool result = initParameters();
- if (!dev().heap()->isVirtual()) {
- amd::option::Options *options = nullProg().getCompilerOptions();
- // @todo Remove this. This is a hack for no VM mode
- if (!options->oVariables->EnableDumpKernel) {
- if (!name().compare(BlitName[KernelBlitManager::BlitCopyImageToBuffer]) ||
- !name().compare(BlitName[KernelBlitManager::BlitCopyBufferToImage])) {
- blitKernelHack_ = true;
- }
- }
- }
-
// Wave limiter needs to be initialized after kernel metadata is parsed
// Since it depends on it.
waveLimiter_.enable();
@@ -855,7 +844,6 @@ Kernel::Kernel(
const Program& prog,
const InitData* initData)
: NullKernel(name, gpuDev, prog)
- , blitKernelHack_(false)
, waveLimiter_(this)
{
hwPrivateSize_ = 0;
@@ -1603,10 +1591,6 @@ Kernel::debug(VirtualGPU& gpu) const
{
std::fstream stubWrite;
address src = NULL;
- if (!dev().heap()->isVirtual()) {
- src = reinterpret_cast
- (const_cast(dev().globalMem()).map(&gpu));
- }
std::cerr << "--- " << name_ << " ---" << std::endl;
for (uint i = 0; i < arguments_.size(); ++i) {
@@ -1689,9 +1673,6 @@ Kernel::debug(VirtualGPU& gpu) const
stubWrite.close();
}
}
- if (!dev().heap()->isVirtual()) {
- const_cast(dev().globalMem()).unmap(&gpu);
- }
}
bool
@@ -1824,18 +1805,10 @@ Kernel::setArgument(
type = ArgumentBuffer;
}
else {
- if (blitKernelHack_) {
- // Bind global buffer to UAV this buffer is bound to
- if (!bindResource(gpu, *gpuMem, 0, GlobalBuffer, uavRaw_)) {
- return false;
- }
- }
- else {
- // Bind global buffer to UAV this buffer is bound to
- if (!bindResource(gpu, dev().globalMem(), 0,
- GlobalBuffer, uavRaw_)) {
- return false;
- }
+ // Bind global buffer to UAV this buffer is bound to
+ if (!bindResource(gpu, dev().globalMem(), 0,
+ GlobalBuffer, uavRaw_)) {
+ return false;
}
}
@@ -1848,11 +1821,9 @@ Kernel::setArgument(
// Update offset only if we bind HeapBuffer or
// it's global address space in UAV setup on SI+
- if (!blitKernelHack_) {
- offset += gpuMem->hbOffset();
- if (!forceZeroOffset) {
- assert((offset != 0) && "Offset 0 with a real allocation!");
- }
+ offset += gpuMem->hbOffset();
+ if (!forceZeroOffset) {
+ assert((offset != 0) && "Offset 0 with a real allocation!");
}
gpu.addVmMemory(gpuMem);
}
@@ -2253,10 +2224,9 @@ Kernel::bindResource(
gslMemObject gslMem = NULL;
// Use global address space on SI+ for UAV setup
- if (((type == ArgumentBuffer) || (type == ArgumentCbID) ||
- (type == ArgumentUavID) || (type == ArgumentPrintfID)) &&
- !blitKernelHack_) {
- gslMem = dev().heap()->resource().gslResource();
+ if ((type == ArgumentBuffer) || (type == ArgumentCbID) ||
+ (type == ArgumentUavID) || (type == ArgumentPrintfID)) {
+ gslMem = dev().heap().resource().gslResource();
}
else {
gslMem = resource.gslResource();
@@ -2803,7 +2773,7 @@ NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
case KernelArg::PointerPrivate:
// Check if can't use a dedicated UAV,
// so realloc memory in the heap
- arg->memory_.realloc_ = isRealloc();
+ arg->memory_.realloc_ = false;
arg->memory_.uavBuf_ = true;
break;
case KernelArg::PointerHwConst:
diff --git a/rocclr/runtime/device/gpu/gpukernel.hpp b/rocclr/runtime/device/gpu/gpukernel.hpp
index b46242ec2d..c89b9e1589 100644
--- a/rocclr/runtime/device/gpu/gpukernel.hpp
+++ b/rocclr/runtime/device/gpu/gpukernel.hpp
@@ -450,9 +450,6 @@ public:
uint instructionCnt() const { return instructionCnt_; }
protected:
- //! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
- virtual bool isRealloc() const { return false; }
-
/*! \brief Parses the metadata structure for the kernel,
* provided by the OpenCL compiler
*
@@ -673,9 +670,6 @@ protected:
*/
bool initConstBuffers();
- //! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
- virtual bool isRealloc() const { return !dev().heap()->isVirtual(); }
-
private:
//! Disable copy constructor
Kernel(const Kernel&);
@@ -771,9 +765,6 @@ private:
uint hwPrivateSize_; //!< initial HW private size
uint hwLocalSize_; //!< initial HW local size
- //! @todo remove the blit kernel hack
- bool blitKernelHack_; //!< No VM hack for kernel blit
-
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
};
diff --git a/rocclr/runtime/device/gpu/gpumemory.cpp b/rocclr/runtime/device/gpu/gpumemory.cpp
index 3dacc145fc..0534ffe201 100644
--- a/rocclr/runtime/device/gpu/gpumemory.cpp
+++ b/rocclr/runtime/device/gpu/gpumemory.cpp
@@ -30,39 +30,24 @@ namespace gpu {
Memory::Memory(
const Device& gpuDev,
amd::Memory& owner,
- HeapBlock* hb,
size_t size)
: device::Memory(owner)
- , Resource(gpuDev, ((hb) ? hb->size_ : size) / Heap::ElementSize, Heap::ElementType)
- , hb_(hb)
+ , Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType)
{
init();
- if (NULL != hb_) hb_->setMemory(this);
-
if (owner.parent() != NULL) {
flags_ |= SubMemoryObject;
}
}
-Memory::Memory(
- const Device& gpuDev,
- HeapBlock& hb)
- : device::Memory(hb.size_)
- , Resource(gpuDev, hb.size_ / Heap::ElementSize, Heap::ElementType)
- , hb_(&hb)
-{
- init();
- hb.setMemory(this);
-}
-
Memory::Memory(
const Device& gpuDev,
size_t size)
: device::Memory(size)
, Resource(gpuDev,
- amd::alignUp(size, Heap::ElementSize) / Heap::ElementSize, Heap::ElementType)
- , hb_(NULL)
+ amd::alignUp(size, Device::Heap::ElementSize) /
+ Device::Heap::ElementSize, Device::Heap::ElementType)
{
init();
}
@@ -75,7 +60,6 @@ Memory::Memory(
)
: device::Memory(owner)
, Resource(gpuDev, width, format)
- , hb_(NULL)
{
init();
@@ -92,7 +76,6 @@ Memory::Memory(
)
: device::Memory(size)
, Resource(gpuDev, width, format)
- , hb_(NULL)
{
init();
}
@@ -110,7 +93,6 @@ Memory::Memory(
)
: device::Memory(owner)
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
- , hb_(NULL)
{
init();
@@ -132,7 +114,6 @@ Memory::Memory(
)
: device::Memory(size)
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
- , hb_(NULL)
{
init();
}
@@ -197,14 +178,9 @@ Memory::create(
break;
case Resource::Remote:
case Resource::RemoteUSWC:
- // @todo Enable unconditional optimization for remote memory
- if ((owner() != NULL &&
- owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
- (hb() == NULL)) {
- if (!cal()->tiled_) {
- // Marks memory object for direct GPU access to the host memory
- flags_ |= HostMemoryDirectAccess;
- }
+ if (!cal()->tiled_) {
+ // Marks memory object for direct GPU access to the host memory
+ flags_ |= HostMemoryDirectAccess;
}
break;
case Resource::View: {
@@ -481,8 +457,8 @@ Memory::createInterop(InteropType type)
else {
// Allocate Resource object for interop as buffer
interopMemory_ = new Memory(dev(), size(),
- amd::alignUp(size(), Heap::ElementSize) / Heap::ElementSize,
- Heap::ElementType);
+ amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
+ Device::Heap::ElementType);
// Create the interop object in CAL
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
@@ -502,14 +478,6 @@ Memory::~Memory()
// Clean VA cache
dev().removeVACache(this);
- // Release associated heap block, if any
- if (hb_) {
- // Protect heap block from simultaneous release with realloc
- amd::ScopedLock k(dev().lockAsyncOps());
- hb_->setMemory(NULL);
- hb_->free();
- }
-
delete interopMemory_;
// Release associated map target, if any
@@ -531,35 +499,6 @@ Memory::~Memory()
}
}
-bool
-Memory::reallocate(HeapBlock* hb, const Resource* parent)
-{
- Resource::ViewParams params;
- params.size_ = hb->size_;
- params.resource_ = parent;
- params.memory_ = NULL;
-
- // Check if it's a view reallocation
- if (NULL != hb->parent_) {
- // The offset inside the view is unchanged
- params.offset_ = Resource::offset();
-
- // Create a new view
- if (Resource::create(Resource::View, ¶ms)) {
- hb_ = hb;
- return true;
- }
- }
- else {
- params.offset_ = hb->offset_;
- if (Resource::reallocate(¶ms)) {
- hb_ = hb;
- return true;
- }
- }
- return false;
-}
-
void
Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
{
@@ -814,33 +753,13 @@ Memory::createBufferView(amd::Memory& subBufferOwner)
{
gpu::Memory* viewMemory;
Resource::ViewParams params;
- HeapBlock* hb = NULL;
size_t offset = subBufferOwner.getOrigin();
size_t size = subBufferOwner.getSize();
- if (!dev().heap()->isVirtual()) {
- if (NULL == hb_) {
- LogError("HeapBlock must be initialized!");
- return NULL;
- }
-
- hb = new HeapBlock(NULL, size, offset + hb_->offset());
- if (hb == NULL) {
- LogError("We don't have enough video memory!");
- return NULL;
- }
- amd::ScopedLock lock(owner()->lockMemoryOps());
- hb_->addView(hb);
- }
-
// Create a memory object
- viewMemory = new gpu::Memory(dev(), subBufferOwner, hb, size);
+ viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
if (NULL == viewMemory) {
- if (hb != NULL) {
- hb->setMemory(NULL);
- hb->free();
- }
return NULL;
}
diff --git a/rocclr/runtime/device/gpu/gpumemory.hpp b/rocclr/runtime/device/gpu/gpumemory.hpp
index c6ccb4b23e..503ca42a34 100644
--- a/rocclr/runtime/device/gpu/gpumemory.hpp
+++ b/rocclr/runtime/device/gpu/gpumemory.hpp
@@ -8,7 +8,6 @@
#include "top.hpp"
#include "thread/atomic.hpp"
#include "device/gpu/gpuresource.hpp"
-#include "device/gpu/gpuheap.hpp"
#include "device/gpu/gpudevice.hpp"
#include