P4 to Git Change 1191682 by gandryey@gera-dev-w7 on 2015/09/17 11:14:23

ECR #304775 - Remove EG/NI support
	- Remove the heap emulation (non-vm)

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#77 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#31 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#186 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#253 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#118 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#523 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#148 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.cpp#28 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuheap.hpp#16 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#297 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#116 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#122 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#227 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#329 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#94 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#379 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.cpp#143 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLDevice.h#57 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa/hsasettings.cpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsasettings.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#242 edit
此提交包含在:
foreman
2015-09-17 11:24:31 -04:00
父節點 7f9a18c1b0
當前提交 bc5a50bf7b
共有 21 個檔案被更改,包括 264 行新增1577 行删除
-2
查看文件
@@ -10,8 +10,6 @@ namespace cpu {
bool
Settings::create()
{
largeHostMemAlloc_ = true;
// This code is temporary until cl_khr_fp64 is unconditional
if (flagIsDefault(CL_KHR_FP64) || CL_KHR_FP64) {
enableExtension(ClKhrFp64);
-1
查看文件
@@ -517,7 +517,6 @@ Settings::Settings()
extensions_ = 0;
partialDispatch_ = false;
supportRA_ = true;
largeHostMemAlloc_ = false;
customHostAllocator_ = false;
waitCommand_ = AMD_OCL_WAIT_COMMAND;
supportDepthsRGB_ = false;
+1 -2
查看文件
@@ -577,13 +577,12 @@ public:
struct {
uint partialDispatch_: 1; //!< Enables partial dispatch
uint supportRA_: 1; //!< Support RA channel order format
uint largeHostMemAlloc_: 1; //!< Allow large host mem allocations (> maxSingleAlloc)
uint waitCommand_: 1; //!< Enables a wait for every submitted command
uint customHostAllocator_: 1;//!< True if device has custom host allocator
// that replaces generic OS allocation routines
uint supportDepthsRGB_: 1; //!< Support DEPTH and sRGB channel order format
uint enableHwDebug_: 1; //!< Enable HW debug support
uint reserved_: 25;
uint reserved_: 26;
};
uint value_;
};
+5 -19
查看文件
@@ -1955,20 +1955,9 @@ KernelBlitManager::copyBufferRect(
// Fall into the CAL path for rejected transfers
if (setup_.disableCopyBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) ||
(!dev().heap()->isVirtual() &&
((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))) {
// Copy data with CAL (no VM mode only)
if (gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
srcRectIn, dstRectIn, sizeIn, entire);
}
if ((!dev().heap()->isVirtual() && ((gpuMem(dstMemory).hb() == NULL) || (gpuMem(srcMemory).hb() == NULL)))
&& !result) {
result = HostBlitManager::copyBufferRect(srcMemory, dstMemory,
srcRectIn, dstRectIn, sizeIn, entire);
}
gpuMem(srcMemory).isHostMemDirectAccess() || gpuMem(dstMemory).isHostMemDirectAccess()) {
result = DmaBlitManager::copyBufferRect(srcMemory, dstMemory,
srcRectIn, dstRectIn, sizeIn, entire);
if (result) {
synchronize();
@@ -2395,11 +2384,9 @@ KernelBlitManager::copyBuffer(
{
amd::ScopedLock k(lockXferOps_);
bool result = false;
bool forceCal = !dev().heap()->isVirtual() &&
((gpuMem(srcMemory).hb() == NULL) || (gpuMem(dstMemory).hb() == NULL));
if ((!forceCal && !gpuMem(srcMemory).isHostMemDirectAccess() &&
!gpuMem(dstMemory).isHostMemDirectAccess())) {
if (!gpuMem(srcMemory).isHostMemDirectAccess() &&
!gpuMem(dstMemory).isHostMemDirectAccess()) {
uint blitType = BlitCopyBuffer;
size_t dim = 1;
size_t globalWorkOffset[3] = { 0, 0, 0 };
@@ -2489,7 +2476,6 @@ KernelBlitManager::copyBuffer(
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters);
}
else {
// Copy data with CAL (no VM mode only)
result = DmaBlitManager::copyBuffer(
srcMemory, dstMemory, srcOrigin, dstOrigin, sizeIn, entire);
}
+185 -476
查看文件
@@ -173,7 +173,7 @@ NullDevice::create(CALtarget target)
calAttr.localRAM = 512;
// Fill the device info structure
fillDeviceInfo(calAttr, memInfo, 4096, 1, true);
fillDeviceInfo(calAttr, memInfo, 4096, 1);
if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
// Runtime doesn't know what local size could be on the real board
@@ -225,9 +225,7 @@ void NullDevice::fillDeviceInfo(
const CALdeviceattribs& calAttr,
const gslMemInfo& memInfo,
size_t maxTextureSize,
uint numComputeRings,
bool isVirtualMode
)
uint numComputeRings)
{
info_.type_ = CL_DEVICE_TYPE_GPU;
info_.vendorId_ = 0x1002;
@@ -276,56 +274,45 @@ void NullDevice::fillDeviceInfo(
info_.globalMemCacheType_ = CL_NONE;
}
if (isVirtualMode) {
#if defined(ATI_OS_LINUX)
info_.globalMemSize_ =
(static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
// globalMemSize is the actual available size for app on Linux
// Because Linux base driver doesn't support paging
static_cast<cl_ulong>(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
info_.globalMemSize_ =
(static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
// globalMemSize is the actual available size for app on Linux
// Because Linux base driver doesn't support paging
static_cast<cl_ulong>(memInfo.cardMemAvailableBytes + memInfo.cardExtMemAvailableBytes) / 100u);
#else
info_.globalMemSize_ =
(static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
static_cast<cl_ulong>(calAttr.localRAM) / 100u) * Mi;
info_.globalMemSize_ =
(static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
static_cast<cl_ulong>(calAttr.localRAM) / 100u) * Mi;
#endif
if (settings().apuSystem_) {
info_.globalMemSize_ +=
(static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
}
if (settings().apuSystem_) {
info_.globalMemSize_ +=
(static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100;
}
// We try to calculate the largest available memory size from
// the largest available block in either heap. In theory this
// should be the size we can actually allocate at application
// start. Note that it may not be a guarantee still as the
// application progresses.
info_.maxMemAllocSize_ = std::max(
cl_ulong(memInfo.cardLargestFreeBlockBytes),
cl_ulong(memInfo.cardExtLargestFreeBlockBytes));
// We try to calculate the largest available memory size from
// the largest available block in either heap. In theory this
// should be the size we can actually allocate at application
// start. Note that it may not be a guarantee still as the
// application progresses.
info_.maxMemAllocSize_ = std::max(
cl_ulong(memInfo.cardLargestFreeBlockBytes),
cl_ulong(memInfo.cardExtLargestFreeBlockBytes));
#if defined(ATI_OS_WIN)
if (settings().apuSystem_) {
info_.maxMemAllocSize_ = std::max(
(static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
info_.maxMemAllocSize_);
}
if (settings().apuSystem_) {
info_.maxMemAllocSize_ = std::max(
(static_cast<cl_ulong>(calAttr.uncachedRemoteRAM) * Mi * 75)/100,
info_.maxMemAllocSize_);
}
#endif
info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);
info_.maxMemAllocSize_ = cl_ulong(info_.maxMemAllocSize_ *
std::min(GPU_SINGLE_ALLOC_PERCENT, 100u) / 100u);
//! \note Force max single allocation size.
//! 4GB limit for the blit kernels and 64 bit optimizations.
info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
static_cast<cl_ulong>(settings().maxAllocSize_));
}
else {
uint maxHeapSize = flagIsDefault(GPU_MAX_HEAP_SIZE) ? 50 : GPU_MAX_HEAP_SIZE;
info_.globalMemSize_ = (std::min(maxHeapSize, 100u)
* calAttr.localRAM / 100u) * Mi;
uint maxAllocSize = flagIsDefault(GPU_SINGLE_ALLOC_PERCENT) ? 25 : GPU_SINGLE_ALLOC_PERCENT;
info_.maxMemAllocSize_ = cl_ulong(info_.globalMemSize_ *
std::min(maxAllocSize, 100u) / 100u);
}
//! \note Force max single allocation size.
//! 4GB limit for the blit kernels and 64 bit optimizations.
info_.maxMemAllocSize_ = std::min(info_.maxMemAllocSize_,
static_cast<cl_ulong>(settings().maxAllocSize_));
if (info_.maxMemAllocSize_ < cl_ulong(128 * Mi)) {
LogError("We are unable to get a heap large enough to support the OpenCL minimum "\
@@ -377,7 +364,7 @@ void NullDevice::fillDeviceInfo(
info_.imagePitchAlignment_ = 256; // XXX: 256 pixel pitch alignment for now
info_.imageBaseAddressAlignment_ = 256; // XXX: 256 byte base address alignment for now
info_.bufferFromImageSupport_ = (isVirtualMode) ? CL_TRUE : CL_FALSE;
info_.bufferFromImageSupport_ = CL_TRUE;
}
info_.errorCorrectionSupport_ = CL_FALSE;
@@ -404,7 +391,7 @@ void NullDevice::fillDeviceInfo(
::strcpy(info_.name_, hwInfo()->targetName_);
::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1,
AMD_BUILD_STRING "%s", (isVirtualMode) ? " (VM)": "");
AMD_BUILD_STRING "%s", " (VM)");
info_.profile_ = "FULL_PROFILE";
if (settings().oclVersion_ == OpenCL20) {
@@ -508,6 +495,25 @@ void NullDevice::fillDeviceInfo(
}
}
bool
Device::Heap::create(Device& device)
{
// Create a new GPU resource
resource_ = new Resource(device, 0, CM_SURF_FMT_R32I);
if (resource_ == NULL) {
return false;
}
if (!resource_->create(Resource::Heap)) {
return false;
}
if (!device.settings().hsail_) {
baseAddress_ = resource_->gslResource()->getSurfaceAddress();
}
return true;
}
void
Device::Engines::create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings)
{
@@ -670,7 +676,7 @@ Device::Device()
, CALGSLDevice()
, numOfVgpus_(0)
, context_(NULL)
, heap_(NULL)
, heap_()
, dummyPage_(NULL)
, lockAsyncOps_(NULL)
, lockAsyncOpsForInitHeap_(NULL)
@@ -731,11 +737,6 @@ Device::~Device()
dummyPage_->release();
}
// Destroy global heap
if (heap_ != NULL) {
delete heap_;
}
// Destroy resource cache
delete resourceCache_;
@@ -837,26 +838,6 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
size_t resourceCacheSize = settings().resourceCacheSize_;
// Allocate heap
heapSize_ = settings().heapSize_;
// Check if BE supports virtual addressing mode
if (isVmMode()) {
heap_ = new VirtualHeap(*this);
gpuSettings->largeHostMemAlloc_ = (NULL != heap_) ? true : false;
}
// If virtual heap allocation failed, then try static allocation
if (heap_ == NULL) {
heap_ = new Heap(*this);
// Disable resource cache if VM is disable
resourceCacheSize = 0;
if (NULL == heap_) {
return false;
}
}
#ifdef DEBUG
std::stringstream message;
if (settings().remoteAlloc_) {
@@ -865,10 +846,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
else {
message << "Using *Local* memory";
}
if (!heap()->isVirtual()) {
message << ": " << settings().heapSize_ / Mi << "MB, growth: " << \
settings().heapSizeGrowth_ / Mi << "MB";
}
message << std::endl;
LogInfo(message.str().c_str());
#endif // DEBUG
@@ -883,8 +861,7 @@ Device::create(CALuint ordinal, CALuint numOfDevices)
// Fill the device info structure
fillDeviceInfo(getAttribs(), getMemInfo(),
static_cast<size_t>(getMaxTextureSize()),
engines().numComputeRings(), heap()->isVirtual()
);
engines().numComputeRings());
if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
if (NULL == hsaCompiler_) {
@@ -955,7 +932,7 @@ Device::initializeHeapResources()
}
// Complete initialization of the heap and other buffers
if ((heap_ == NULL) || !heap_->create(heapSize_, settings().remoteAlloc_)) {
if (!heap_.create(*this)) {
LogError("Failed GPU heap creation");
return false;
}
@@ -987,7 +964,7 @@ Device::initializeHeapResources()
type = Resource::RemoteUSWC;
}
xferWrite_ = new XferBuffers(*this, type,
amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
amd::alignUp(settings().stagedXferSize_, 4 * Ki));
if ((xferWrite_ == NULL) || !xferWrite_->create()) {
LogError("Couldn't allocate transfer buffer objects for read");
return false;
@@ -997,7 +974,7 @@ Device::initializeHeapResources()
// Initialize staged read buffers
if (settings().stagedXferRead_) {
xferRead_ = new XferBuffers(*this, Resource::Remote,
amd::alignUp(settings().stagedXferSize_, heap()->granularityB()));
amd::alignUp(settings().stagedXferSize_, 4 * Ki));
if ((xferRead_ == NULL) || !xferRead_->create()) {
LogError("Couldn't allocate transfer buffer objects for write");
return false;
@@ -1086,52 +1063,6 @@ Device::createVirtualDevice(
}
}
bool
Device::reallocHeap(size_t size, bool remoteAlloc)
{
size_t heapSize = heapSize_ + ((size != 0) ?
amd::alignUp(size, settings().heapSizeGrowth_) : 0);
Heap* oldHeap = heap_;
// Maximum heap limit size = reported size + internal memory
size_t maxHeapLimit = static_cast<size_t>(info().globalMemSize_) +
// an extra 10MB for the alignments of allocations,
// since the conformance test doesn't expect any
10 * Mi;
if ((settings().heapSizeGrowth_ == 0) ||
// Allow the heap growth up to the global memory limit
(heapSize_ + size > maxHeapLimit)) {
return false;
}
heapSize = std::min(maxHeapLimit, heapSize);
heap_ = new Heap(*this);
// Make sure we have allocated a new global heap
if (NULL == heap_) {
heap_ = oldHeap;
return false;
}
if (!heap_->create(heapSize, remoteAlloc)) {
delete heap_;
heap_ = oldHeap;
return false;
}
// Copy the old heap to the new one
if (!oldHeap->copyTo(heap_)) {
delete heap_;
heap_ = oldHeap;
return false;
}
delete oldHeap;
heapSize_ = heapSize;
return true;
}
device::Program*
Device::createProgram(int oclVer)
{
@@ -1288,65 +1219,6 @@ Device::tearDown()
}
}
//! @note This funciton must be lock protected from a caller
HeapBlock*
Device::allocHeapBlock(size_t size) const
{
HeapBlock* hb = NULL;
// Allocate the underlying heap block
hb = heap_->alloc(size);
// Virtual heap should never fail allocation
if ((hb == NULL) && (!heap_->isVirtual())) {
// Queues can't process commands,
// while the global heap reallocation occurs.
// So stall all queues and then reallocate the global heap
ScopedLockVgpus lock(*this);
// Wait for idle
for (uint idx = 0; idx < vgpus().size(); ++idx) {
vgpus()[idx]->waitAllEngines();
}
// Acount memory alignment for the new allocation
size_t extraSpace = heap_->granularityB();
if (size >= heap_->freeSpace()) {
// Required extra space = requested size - free space
extraSpace += size - heap_->freeSpace();
}
//! @note the const cast here looks bad, but the device object
// is a lock protected above. The rest of the code
// doesn't change the device object.
// So the const methods can be safly used everywhere else.
// In general we should avoid changing the device object after initialization
// Try to reallocate the heap with the same memory type
if (const_cast<Device*>(this)->reallocHeap(extraSpace, settings().remoteAlloc_)) {
hb = heap_->alloc(size);
}
if (hb == NULL) {
// Use reversed memory type as a temporary storage
bool remoteAlloc = settings().remoteAlloc_ ^ true;
// Try to reallocate the heap
if (const_cast<Device*>(this)->reallocHeap(extraSpace, remoteAlloc)) {
// Back to the default location of the global heap
remoteAlloc ^= true;
if (!const_cast<Device*>(this)->reallocHeap(0, remoteAlloc)) {
LogWarning("New memory type for the \
global heap after reallocation!");
}
hb = heap_->alloc(size);
}
}
}
return hb;
}
gpu::Memory*
Device::getGpuMemory(amd::Memory* mem) const
{
@@ -1392,99 +1264,20 @@ Device::createScratchBuffer(size_t size) const
{
Memory* gpuMemory = NULL;
// Use virtual heap allocation
if (heap()->isVirtual()) {
// Create a memory object
gpuMemory = new gpu::Memory(*this, size);
if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
delete gpuMemory;
gpuMemory = NULL;
}
}
else {
// We have to lock the heap block allocation,
// so possible reallocation won't occur twice or
// another thread could destroy a heap block,
// while we didn't finish allocation
amd::ScopedLock k(lockAsyncOps());
HeapBlock* hb = allocHeapBlock(size);
if (hb != NULL) {
// wrap it
gpuMemory = new gpu::Memory(*this, *hb);
// Create resource
if (NULL != gpuMemory) {
Resource::ViewParams params;
params.offset_ = hb->offset_;
params.size_ = hb->size_;
params.resource_ = &(globalMem());
params.memory_ = NULL;
if (!gpuMemory->create(Resource::View, &params)) {
delete gpuMemory;
gpuMemory = NULL;
}
}
}
}
return gpuMemory;
}
gpu::Memory*
Device::createBufferFromHeap(amd::Memory& owner) const
{
size_t size = owner.getSize();
gpu::Memory* gpuMemory;
// We have to lock the heap block allocation,
// so possible reallocation won't occur twice or
// another thread could destroy a heap block,
// while we didn't finish allocation
amd::ScopedLock k(lockAsyncOps());
HeapBlock* hb = allocHeapBlock(size);
if (hb == NULL) {
LogError("We don't have enough video memory!");
return NULL;
}
// Create a memory object
gpuMemory = new gpu::Memory(*this, owner, hb);
if (NULL == gpuMemory) {
hb->setMemory(NULL);
hb->free();
return NULL;
}
Resource::ViewParams params;
params.owner_ = &owner;
params.offset_ = hb->offset_;
params.size_ = hb->size_;
params.resource_ = &(globalMem());
params.memory_ = NULL;
if (!gpuMemory->create(Resource::View, &params)) {
gpuMemory = new gpu::Memory(*this, size);
if (NULL == gpuMemory || !gpuMemory->create(Resource::Local)) {
delete gpuMemory;
return NULL;
gpuMemory = NULL;
}
// Check if owner is interop memory
if (owner.isInterop()) {
if (!gpuMemory->createInterop(Memory::InteropHwEmulation)) {
LogError("HW interop creation failed!");
delete gpuMemory;
return NULL;
}
}
return gpuMemory;
}
gpu::Memory*
Device::createBuffer(
amd::Memory& owner,
bool directAccess,
bool bufferAlloc) const
bool directAccess) const
{
size_t size = owner.getSize();
gpu::Memory* gpuMemory;
@@ -1504,39 +1297,7 @@ Device::createBuffer(
return NULL;
}
if (!heap()->isVirtual()) {
bool uhpAlloc =
(owner.parent()->getMemFlags() & CL_MEM_USE_HOST_PTR) ? true : false;
if (owner.parent()->getType() != CL_MEM_OBJECT_IMAGE1D_BUFFER) {
//! \note This extra line is necessary to make sure that subbuffer
//! allocation is a synch operation,
//! due to a possible realloc of heap(no VM) or parent(UHP)
amd::ScopedLock k(lockAsyncOps());
//! @note: For now make sure the parent is allocated in the global heap
//! or if it's the UHP optimization for prepinned memory
if (((gpuParent->hb() == NULL) || uhpAlloc) &&
!owner.parent()->reallocedDeviceMemory(this)) {
if (reallocMemory(*owner.parent())) {
gpuParent = getGpuMemory(owner.parent());
}
else {
LogError("Can't reallocate the owner object for subbuffer allocation");
return NULL;
}
}
return gpuParent->createBufferView(owner);
}
else {
gpuParent = getGpuMemory(owner.parent()->parent());
return gpuParent->createBufferView(*owner.parent()->parent());
}
}
else {
return gpuParent->createBufferView(owner);
}
return gpuParent->createBufferView(owner);
}
Resource::MemoryType type = (owner.forceSysMemAlloc() || (owner.getMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) ?
@@ -1550,138 +1311,123 @@ Device::createBuffer(
}
// Use direct access if it's possible
if (bufferAlloc || (type == Resource::Remote)) {
bool forceHeapAlloc = false;
bool remoteAlloc = false;
// Internal means VirtualDevice!=NULL
bool internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
(owner.getVirtualDevice() != NULL)) ? true : false;
bool remoteAlloc = false;
// Internal means VirtualDevice!=NULL
bool internalAlloc = ((owner.getMemFlags() & CL_MEM_USE_HOST_PTR) &&
(owner.getVirtualDevice() != NULL)) ? true : false;
// Create a memory object
gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
if (NULL == gpuMemory) {
return NULL;
}
// Create a memory object
gpuMemory = new gpu::Buffer(*this, owner, owner.getSize());
if (NULL == gpuMemory) {
return NULL;
}
// Check if owner is interop memory
if (owner.isInterop()) {
result = gpuMemory->createInterop(Memory::InteropDirectAccess);
}
else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
// Attempt to allocate from persistent heap
result = gpuMemory->create(Resource::Persistent);
}
else if (directAccess || (type == Resource::Remote)) {
// Check for system memory allocations
if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
|| (settings().remoteAlloc_)) {
// Allocate remote memory if AHP allocation and context has just 1 device
if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
// GPU will be reading from this host memory buffer,
// so assume Host write into it
type = Resource::RemoteUSWC;
remoteAlloc = true;
}
// Check if owner is interop memory
if (owner.isInterop()) {
result = gpuMemory->createInterop(Memory::InteropDirectAccess);
}
else if (owner.getMemFlags() & CL_MEM_USE_PERSISTENT_MEM_AMD) {
// Attempt to allocate from persistent heap
result = gpuMemory->create(Resource::Persistent);
}
else if (directAccess || (type == Resource::Remote)) {
// Check for system memory allocations
if ((owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR))
|| (settings().remoteAlloc_)) {
// Allocate remote memory if AHP allocation and context has just 1 device
if ((owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
if (owner.getMemFlags() & (CL_MEM_READ_ONLY |
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) {
// GPU will be reading from this host memory buffer,
// so assume Host write into it
type = Resource::RemoteUSWC;
remoteAlloc = true;
}
// Make sure owner has a valid hostmem pointer and it's not COPY
if (!remoteAlloc && (owner.getHostMem() != NULL)) {
Resource::PinnedParams params;
params.owner_ = &owner;
params.gpu_ =
reinterpret_cast<VirtualGPU*>(owner.getVirtualDevice());
}
// Make sure owner has a valid hostmem pointer and it's not COPY
if (!remoteAlloc && (owner.getHostMem() != NULL)) {
Resource::PinnedParams params;
params.owner_ = &owner;
params.gpu_ =
reinterpret_cast<VirtualGPU*>(owner.getVirtualDevice());
params.hostMemRef_ = owner.getHostMemRef();
params.size_ = owner.getHostMemRef()->size();
if (0 == params.size_) {
params.size_ = owner.getSize();
}
// Create memory object
result = gpuMemory->create(Resource::Pinned, &params);
params.hostMemRef_ = owner.getHostMemRef();
params.size_ = owner.getHostMemRef()->size();
if (0 == params.size_) {
params.size_ = owner.getSize();
}
// Create memory object
result = gpuMemory->create(Resource::Pinned, &params);
// If direct access failed
if (!result) {
// and VM off, then force a heap allocation
if (!heap()->isVirtual()) {
// Internal pinning doesn't need a heap allocation
if (!internalAlloc) {
forceHeapAlloc = true;
}
}
// Don't use cached allocation
// if size is biger than max single alloc
if (owner.getSize() > info().maxMemAllocSize_) {
delete gpuMemory;
return NULL;
}
// If direct access failed
if (!result) {
// Don't use cached allocation
// if size is biger than max single alloc
if (owner.getSize() > info().maxMemAllocSize_) {
delete gpuMemory;
return NULL;
}
}
}
}
}
if (!result && !forceHeapAlloc &&
// Make sure it's not internal alloc
!internalAlloc) {
Resource::CreateParams params;
params.owner_ = &owner;
params.gpu_ = static_cast<VirtualGPU*>(owner.getVirtualDevice());
if (!result &&
// Make sure it's not internal alloc
!internalAlloc) {
Resource::CreateParams params;
params.owner_ = &owner;
params.gpu_ = static_cast<VirtualGPU*>(owner.getVirtualDevice());
// Create memory object
result = gpuMemory->create(type, &params);
// Create memory object
result = gpuMemory->create(type, &params);
// If allocation was successful
if (result) {
// Initialize if the memory is a pipe object
if (owner.getType() == CL_MEM_OBJECT_PIPE) {
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
// If allocation was successful
if (result) {
// Initialize if the memory is a pipe object
if (owner.getType() == CL_MEM_OBJECT_PIPE) {
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
size_t pipeInit[3] = {0 , 0, owner.asPipe()->getMaxNumPackets()};
gpuMemory->writeRawData(*xferQueue_, sizeof(pipeInit), pipeInit, true);
}
// If memory has direct access from host, then get CPU address
if (gpuMemory->isHostMemDirectAccess() &&
(type != Resource::ExternalPhysical)) {
void* address = gpuMemory->map(NULL);
if (address != NULL) {
// Copy saved memory
if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
memcpy(address, owner.getHostMem(), owner.getSize());
}
// It should be safe to change the host memory pointer,
// because it's lock protected from the upper caller
owner.setHostMem(address);
}
// If memory has direct access from host, then get CPU address
if (gpuMemory->isHostMemDirectAccess() &&
(type != Resource::ExternalPhysical)) {
void* address = gpuMemory->map(NULL);
if (address != NULL) {
// Copy saved memory
if (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) {
memcpy(address, owner.getHostMem(), owner.getSize());
}
// It should be safe to change the host memory pointer,
// because it's lock protected from the upper caller
owner.setHostMem(address);
}
else {
result = false;
}
}
// An optimization for CHP. Copy memory and destroy sysmem allocation
else if ((gpuMemory->memoryType() != Resource::Pinned) &&
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(owner.getSize());
static const bool Entire = true;
if (xferMgr().writeBuffer(owner.getHostMem(),
*gpuMemory, origin, region, Entire)) {
// Clear CHP memory
owner.setHostMem(NULL);
}
else {
result = false;
}
}
// An optimization for CHP. Copy memory and destroy sysmem allocation
else if ((gpuMemory->memoryType() != Resource::Pinned) &&
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
amd::Coord3D origin(0, 0, 0);
amd::Coord3D region(owner.getSize());
static const bool Entire = true;
if (xferMgr().writeBuffer(owner.getHostMem(),
*gpuMemory, origin, region, Entire)) {
// Clear CHP memory
owner.setHostMem(NULL);
}
}
}
if (!result && !forceHeapAlloc) {
delete gpuMemory;
return NULL;
}
}
if (!result) {
assert(!heap()->isVirtual() && "Can't have static heap allocation with VM");
gpuMemory = createBufferFromHeap(owner);
delete gpuMemory;
return NULL;
}
return gpuMemory;
@@ -1703,10 +1449,10 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
}
// Create a view on the specified device
gpuImage = (gpu::Memory*)createView(owner, *devParent);
if (heap()->isVirtual() && (NULL != gpuImage) && (gpuImage->owner() != NULL)) {
if ((NULL != gpuImage) && (gpuImage->owner() != NULL)) {
gpuImage->owner()->setHostMem((address)(owner.parent()->getHostMem()) + gpuImage->owner()->getOrigin());
}
return gpuImage ;
return gpuImage;
}
gpuImage = new gpu::Image(*this, owner,
@@ -1778,11 +1524,11 @@ Device::createImage(amd::Memory& owner, bool directAccess) const
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
// Ignore copy for image1D_buffer, since it was already done for buffer
if (heap()->isVirtual() && imageBuffer) {
if (imageBuffer) {
// Clear CHP memory
owner.setHostMem(NULL);
}
else if (!imageBuffer) {
else {
amd::Coord3D origin(0, 0, 0);
static const bool Entire = true;
if (xferMgr().writeImage(owner.getHostMem(),
@@ -1809,25 +1555,12 @@ Device::createMemory(
amd::Memory& owner) const
{
bool directAccess = false;
bool bufferAlloc = false;
gpu::Memory* memory = NULL;
if (heap()->isVirtual()) {
bufferAlloc = true;
}
//!@todo Remove this code when VM is always on.
// Use zero-copy transfers for sysmem allocations or persistent memory
else {
if (owner.getMemFlags() & (CL_MEM_ALLOC_HOST_PTR |
CL_MEM_USE_HOST_PTR)) {
bufferAlloc = true;
}
}
if (owner.asBuffer()) {
directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemBuffer)
? true : false;
memory = createBuffer(owner, directAccess, bufferAlloc);
memory = createBuffer(owner, directAccess);
}
else if (owner.asImage()) {
directAccess = (settings().hostMemDirectAccess_ & Settings::HostMemImage)
@@ -1878,7 +1611,6 @@ bool
Device::reallocMemory(amd::Memory& owner) const
{
bool directAccess = false;
bool bufferAlloc = heap()->isVirtual();
// For now we have to serialize reallocation code
amd::ScopedLock lk(*lockAsyncOps_);
@@ -1889,35 +1621,18 @@ Device::reallocMemory(amd::Memory& owner) const
if (gpuMemory == NULL) {
return false;
}
if (gpuMemory->hb() != NULL) {
if (gpuMemory->pinOffset() == 0) {
return true;
}
if (bufferAlloc) {
if (gpuMemory->pinOffset() == 0) {
return true;
}
else if (NULL != owner.parent()) {
if (!reallocMemory(*owner.parent())) {
return false;
}
else if (NULL != owner.parent()) {
if (!reallocMemory(*owner.parent())) {
return false;
}
}
if (owner.asBuffer()) {
// Disable remote allocation if no VM
if ((gpuMemory != NULL) &&
((gpuMemory->memoryType() == Resource::Remote) ||
(gpuMemory->memoryType() == Resource::RemoteUSWC)) && !bufferAlloc) {
// Make sure we don't have a stale memory in VA cache before reallocation
// of system memory.
// \note: the app must unmap() memory before kernel launch
removeVACache(gpuMemory);
static const bool forceAllocHostMem = true;
static const bool forceCopy = true;
owner.allocHostMemory(owner.getHostMem(), forceAllocHostMem, forceCopy);
}
gpuMemory = createBuffer(owner, directAccess, bufferAlloc);
gpuMemory = createBuffer(owner, directAccess);
}
else if (owner.asImage()) {
return true;
@@ -2113,24 +1828,18 @@ Device::globalFreeMemory(size_t* freeMemory) const
if (!(const_cast<Device*>(this)->initializeHeapResources())) {
return false;
}
if (heap()->isVirtual()) {
gslMemInfo memInfo = {0};
gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
// Fill free memory info
freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
memInfo.cardExtMemAvailableBytes) / Ki;
freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
memInfo.cardExtLargestFreeBlockBytes) / Ki;
if (settings().apuSystem_) {
freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
}
}
else {
freeMemory[TotalFreeMemory] = static_cast<size_t>((info().globalMemSize_ -
static_cast<cl_ulong>(heapSize_) + heap()->freeSpace()) / Ki);
freeMemory[LargestFreeBlock] = freeMemory[TotalFreeMemory];
gslMemInfo memInfo = {0};
gslCtx()->getMemInfo(&memInfo, GSL_MEMINFO_BASIC);
// Fill free memory info
freeMemory[TotalFreeMemory] = (memInfo.cardMemAvailableBytes +
memInfo.cardExtMemAvailableBytes) / Ki;
freeMemory[LargestFreeBlock] = std::max(memInfo.cardLargestFreeBlockBytes,
memInfo.cardExtLargestFreeBlockBytes) / Ki;
if (settings().apuSystem_) {
freeMemory[TotalFreeMemory] += memInfo.agpMemAvailableBytes / Ki;
freeMemory[LargestFreeBlock] += memInfo.agpLargestFreeBlockBytes / Ki;
}
return true;
+31 -24
查看文件
@@ -125,8 +125,7 @@ protected:
const CALdeviceattribs& calAttr, //!< CAL device attributes info
const gslMemInfo& memInfo, //!< GSL mem info
size_t maxTextureSize, //!< Maximum texture size supported in HW
uint numComputeRings, //!< Number of compute rings
bool isVirtualMode //!< Device is in virtual mode
uint numComputeRings //!< Number of compute rings
);
};
@@ -184,6 +183,32 @@ private:
class Device : public NullDevice, public CALGSLDevice
{
public:
class Heap : public amd::EmbeddedObject
{
public:
//! The size of a heap element in bytes
static const size_t ElementSize = 4;
//! The type of a heap element in bytes
static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
Heap(): resource_(NULL), baseAddress_(0) {}
bool create(
Device& device //!< GPU device object
);
//! Gets the GPU resource associated with the global heap
const Resource& resource() const { return *resource_; }
//! Returns the base virtual address of the heap
uint64_t baseAddress() const { return baseAddress_; }
protected:
Resource* resource_; //!< GPU resource referencing the heap memory
uint64_t baseAddress_; //!< Virtual heap base address
};
//! Locks any access to the virtual GPUs
class ScopedLockVgpus : public amd::StackObject {
public:
@@ -377,12 +402,6 @@ public:
//! Destructor for the physical GPU device
virtual ~Device();
//! Reallocates current global heap
bool reallocHeap(
size_t size, //!< requested size for reallocation
bool remoteAlloc //!< allocate the new heap in remote memory
);
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(
amd::CommandQueue* queue = NULL
@@ -442,15 +461,10 @@ public:
) const;
//! Gets the GPU resource associated with the global heap
const Resource& globalMem() const { return heap_->resource(); }
const Resource& globalMem() const { return heap_.resource(); }
//! Gets the global heap object
const Heap* heap() const { return heap_; }
//! Allocates a heap block from the global heap
HeapBlock* allocHeapBlock(
size_t size //!< The heap block size for allocation
) const;
const Heap& heap() const { return heap_; }
//! Gets the memory object for the dummy page
amd::Memory* dummyPage() const { return dummyPage_; }
@@ -566,16 +580,10 @@ private:
//! Sends the stall command to all queues
bool stallQueues();
//! Buffer allocation from static heap (no VM mode only)
gpu::Memory* createBufferFromHeap(
amd::Memory& owner //!< Abstraction layer memory object
) const;
//! Buffer allocation
gpu::Memory* createBuffer(
amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess, //!< Use direct host memory access
bool bufferAlloc //!< If TRUE, then don't use heap
bool directAccess //!< Use direct host memory access
) const;
//! Image allocation
@@ -591,8 +599,7 @@ private:
);
amd::Context* context_; //!< A dummy context for internal allocations
size_t heapSize_; //!< The global heap size
Heap* heap_; //!< GPU heap manager
Heap heap_; //!< GPU global heap
amd::Memory* dummyPage_; //!< A dummy page for NULL pointer
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
-536
查看文件
@@ -1,536 +0,0 @@
//! Implementation of GPU device memory management
#include "top.hpp"
#include "thread/thread.hpp"
#include "thread/monitor.hpp"
#include "device/device.hpp"
#include "device/gpu/gpuheap.hpp"
#include "device/gpu/gpudevice.hpp"
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
//! Turn this on to enable sanity checks before and after every heap operation.
#if DEBUG
#define EXTRA_HEAP_CHECKS 1
#endif // DEBUG
namespace gpu {
// The GPU heap. Very simple implementation for now.
Heap::Heap(
Device& device)
: resource_(NULL)
, freeList_(NULL)
, busyList_(NULL)
, freeSize_(0)
, device_(device)
, granularity_(Heap::MinGranularity)
, lock_("GPU heap lock", true)
, virtualMode_(false)
, baseAddress_(0)
{
}
size_t
Heap::granularityB() const
{
return granularity_ * Heap::ElementSize;
}
bool
Heap::create(size_t totalSize, bool remoteAlloc)
{
Resource::MemoryType memType;
size_t maxHeight = device_.info().image2DMaxHeight_;
size_t sizeInElements;
size_t npages;
freeSize_ = totalSize;
sizeInElements = (totalSize + Heap::ElementSize - 1) / Heap::ElementSize;
// Calculate best granularity given the size and device characteristics
npages = amd::alignUp(sizeInElements, granularity_) / granularity_;
// Create a new GPU resource
resource_ = new Resource(device_, sizeInElements, Heap::ElementType);
if (resource_ == NULL) {
return false;
}
memType = (remoteAlloc) ? Resource::RemoteUSWC : Resource::Local;
if (!resource_->create(memType, NULL, true)) {
return false;
}
// Set up initial free list
freeList_ = new HeapBlock(this, npages * granularityB(), 0, NULL, NULL);
if (freeList_ == NULL) {
return false;
}
guarantee(isSane());
return true;
}
Heap::~Heap()
{
amd::ScopedLock k(lock_);
guarantee(isSane());
// Release all heap blocks
HeapBlock *walk, *next;
walk = busyList_;
while (walk) {
next = walk->next_;
walk->free();
walk = next;
}
walk = freeList_;
while (walk) {
next = walk->next_;
delete walk;
walk = next;
}
// Release resource
delete resource_;
}
HeapBlock*
Heap::alloc(size_t size)
{
amd::ScopedLock k(lock_);
HeapBlock* walk = freeList_;
HeapBlock* best = NULL;
guarantee(isSane());
// Round size
size = amd::alignUp(size, granularityB());
// Walk the free list looking for a suitable block (currently best-fit)
//! @todo:dgladdin: experiment with switching back to first-fit
while (walk) {
if ((walk->size_ > size) &&
(best == NULL || walk->size_ < best->size_)) {
best = walk;
}
else if (walk->size_ == size) {
// No need to split, just move to busy list
detachBlock(&freeList_, walk);
walk->inUse_ = true;
insertBlock(&busyList_, walk);
guarantee(isSane());
freeSize_ -= size;
return walk;
}
walk = walk->next_;
}
if (best != NULL) {
// Got one, but need to split it. Keep first part in free list,
// put second part into busy list.
HeapBlock *newblock = splitBlock(best, size);
newblock->inUse_ = true;
insertBlock(&busyList_, newblock);
guarantee(isSane());
freeSize_ -= size;
return newblock;
}
// No free block available
guarantee(isSane());
return NULL;
}
bool
Heap::copyTo(Heap* heap)
{
HeapBlock *walk;
walk = busyList_;
while (walk) {
if (walk->getMemory() != NULL) {
HeapBlock* hb = heap->alloc(walk->size_);
if (hb == NULL) {
return false;
}
hb->setMemory(walk->getMemory());
walk->destroyViewsMemory();
if (!walk->getMemory()->reallocate(hb, &(heap->resource()))) {
return false;
}
if (!walk->reallocateViews(hb,
static_cast<size_t>(hb->offset_ - walk->offset_))) {
return false;
}
}
walk = walk->next_;
}
return true;
}
void
Heap::free(HeapBlock* blk)
{
amd::ScopedLock k(lock_);
guarantee(isSane());
detachBlock(&busyList_, blk);
blk->inUse_ = false;
freeSize_ += blk->size_;
mergeBlock(&freeList_, blk);
guarantee(isSane());
}
void
Heap::detachBlock(HeapBlock** list, HeapBlock* blk)
{
// Sanity checks
guarantee(isSane());
if (*list == blk) {
*list = blk->next_;
}
if (blk->prev_) {
blk->prev_->next_ = blk->next_;
}
if (blk->next_) {
blk->next_->prev_ = blk->prev_;
}
// no heap sanity check as blk is now floating
}
void
Heap::insertBlock(HeapBlock** head, HeapBlock* blk)
{
if (NULL == *head) {
*head = blk;
blk->prev_ = NULL;
blk->next_ = NULL;
guarantee(isSane());
return;
}
// Find the place to insert it at
HeapBlock* walk = *head;
while (walk->next_ && walk->next_->offset_ < blk->offset_) {
walk = walk->next_;
}
// Insert it
if (walk == *head) {
if (walk->offset_ >= blk->offset_) {
*head = blk;
blk->prev_ = NULL;
blk->next_ = walk;
walk->prev_ = *head;
guarantee(isSane());
return;
}
}
blk->next_ = walk->next_;
blk->prev_ = walk;
if (walk->next_) {
walk->next_->prev_ = blk;
}
walk->next_ = blk;
guarantee(isSane());
}
HeapBlock*
Heap::splitBlock(HeapBlock* blk, size_t tailsize)
{
// Sanity checks
guarantee(isSane());
guarantee(blk->size_ > tailsize && "block too small to split as requested");
guarantee(!blk->inUse_ && "can't split in-use block");
// Create a new block
HeapBlock* nb = new HeapBlock(blk->owner_, tailsize,
blk->offset_ + blk->size_ - tailsize);
// Resize the old block
blk->size_ = blk->size_ - tailsize;
return nb; // no heap sanity check here as the new block hasn't been plugged in yet
}
//! Join two blocks, transferring the size of the second into the first and deleting
//! the second. Utility fn for mergeBlock()
static void
join2Blocks(HeapBlock* first, HeapBlock* second)
{
// Sanity checks
guarantee(first->size_ > 0 && "first block invalid");
guarantee(!first->inUse_ && "can't join an in-use block");
guarantee(second->size_ > 0 && "second block invalid");
guarantee(first->offset_ + first->size_ == second->offset_);
// Do the join
first->size_ = first->size_ + second->size_;
first->next_ = second->next_;
if (second->next_) {
second->next_->prev_ = first;
}
delete second;
}
//! Insert a block into a list, merging it with adjacent blocks if possible. Must be called
//! under a lock, cannot be used on in-use blocks or blocks with an associated resource alias.
void
Heap::mergeBlock(HeapBlock** head, HeapBlock* blk)
{
insertBlock(head, blk);
// Merge with successor if possible
if ((blk->next_ != NULL) &&
(blk->offset_ + blk->size_ == blk->next_->offset_)) {
join2Blocks(blk, blk->next_);
}
// Merge with predecessor if possible
if ((blk->prev_ != NULL) &&
(blk->prev_->offset_ + blk->prev_->size_ == blk->offset_)) {
join2Blocks(blk->prev_, blk);
}
guarantee(isSane());
}
//! Sanity check for both types of block (helper function for Heap::isSane())
static bool
isBlockSane(HeapBlock* b)
{
return (b->owner_ != NULL
&& (b->next_ == NULL || b->next_->prev_ == b)
&& (b->prev_ == NULL || b->prev_->next_ == b));
}
//! Sanity check for an individual free block (helper function for Heap::isSane())
static bool
isFreeBlockSane(HeapBlock* b)
{
if (isBlockSane(b) && !b->inUse_) {
return true;
} else {
return false;
}
}
//! Sanity check for an individual busy block (helper function for Heap::isSane())
static bool
isBusyBlockSane(HeapBlock* b)
{
if (isBlockSane(b) && b->inUse_) {
return true;
} else {
return false;
}
}
//! Sanity check for the heap.
bool
Heap::isSane() const
{
// If we got this far, everything is (probably) OK
#if EXTRA_HEAP_CHECKS
HeapBlock* walkFree = freeList_; // Free list position
HeapBlock* walkBusy = busyList_; // Busy list position
size_t offset = 0; // Current offset
// We can have zero lists if Heap allocation fails
if (walkFree == NULL && walkBusy == NULL) {
return true;
}
// Walk both lists in parallel
while (walkFree != NULL || walkBusy != NULL) {
if (walkFree != NULL && walkFree->offset_ == offset) {
if (!isFreeBlockSane(walkFree)) {
return false;
}
offset += walkFree->size_;
walkFree = walkFree->next_;
}
else if (walkBusy != NULL && walkBusy->offset_ == offset) {
if (!isBusyBlockSane(walkBusy)) {
return false;
}
offset += walkBusy->size_;
walkBusy = walkBusy->next_;
}
else {
return false;
}
}
#endif // EXTRA_HEAP_CHECKS
return true;
}
void
HeapBlock::destroyViewsMemory()
{
if ((parent_ != NULL) && (0 == views_.size())) {
memory_->free();
}
else if (views_.size() != 0) {
std::list<HeapBlock*>::const_iterator it;
for (it = views_.begin(); it != views_.end(); ++it) {
(*it)->destroyViewsMemory();
}
}
}
bool
HeapBlock::reallocateViews(HeapBlock* parent, size_t shift)
{
if (views_.size() != 0) {
std::list<HeapBlock*>::const_iterator it;
// Loop through all views and reallocate them
for (it = views_.begin(); it != views_.end(); ++it) {
// Get the view HeapBlock
HeapBlock* hb = (*it);
// Readjust the offset
hb->offset_ += shift;
// Add to the list if we have a new parent
if (parent != this) {
parent->addView(hb);
}
// Reallocate memory
hb->memory_->reallocate(hb, parent->getMemory());
// Process a view on view if available
if (!hb->reallocateViews(hb, shift)) {
return false;
}
}
// Destroy old list
if (parent != this) {
views_.clear();
}
}
return true;
}
//! Destructor. Frees the block if in use and does some final sanity checks.
HeapBlock::~HeapBlock()
{
if (NULL != owner_) {
if (inUse_) {
owner_->free(this);
}
}
else {
// View destruction
if (parent_ != NULL) {
assert(((parent_->getMemory() != NULL) && (parent_->getMemory()->owner() != NULL)));
amd::ScopedLock lock(parent_->getMemory()->owner()->lockMemoryOps());
parent_->removeView(this);
}
}
guarantee(size_ > 0 && "destructor called for zero-size heap block (destructor called twice?)");
size_ = 0; // Mark as invalid
if (views_.size() != 0) {
LogError("Can't destroy a resource if we still have views!");
}
}
void
HeapBlock::free()
{
if (NULL != owner_) {
owner_->free(this);
}
else {
// It's a view. Destroy the object
delete this;
}
}
VirtualHeap::VirtualHeap(
Device& device)
: Heap(device)
{
virtualMode_ = true;
}
bool
VirtualHeap::create(
size_t totalSize,
bool remoteAlloc)
{
// Create a new GPU resource
resource_ = new Resource(device_, 0, Heap::ElementType);
if (resource_ == NULL) {
return false;
}
if (!resource_->create(Resource::Heap)) {
return false;
}
if (!device_.settings().hsail_) {
baseAddress_ = resource_->gslResource()->getSurfaceAddress();
}
return true;
}
VirtualHeap::~VirtualHeap()
{
}
HeapBlock*
VirtualHeap::alloc(size_t size)
{
assert(false && "Dead branch!");
return NULL;
}
void
VirtualHeap::free(HeapBlock* blk)
{
assert(false && "Dead branch!");
}
bool
VirtualHeap::copyTo(Heap* heap)
{
assert(false && "Dead branch!");
return false;
}
bool
VirtualHeap::isSane(void) const
{
assert(false && "Dead branch!");
return true;
}
} // namespace gpu
-225
查看文件
@@ -1,225 +0,0 @@
//! Declarations for GPU memory management
#ifndef GPUHEAP_HPP_
#define GPUHEAP_HPP_
#include "top.hpp"
#include "thread/atomic.hpp"
#include "device/gpu/gpudefs.hpp"
/*! \addtogroup GPU
* @{
*/
//! GPU Device Implementation
namespace gpu {
class Device;
class Heap;
class Resource;
class Memory;
class VirtualGPU;
//! @todo:dgladdin: The heap list should be singly-linked
//! \brief A block on the GPU heap.
//!
//! Note that no code outside of the gpumemory.hpp/.cpp pair should touch this
//! class directly as it is not thread-safe. In general, this class should be
//! pretty much a struct and contain as little functionality as possible - just
//! a constructor, destructor.
//!
//! Any other methods - in particular, anything that talks to CAL - should be no
//! more than proxies for functionality implemented in Heap, as Heap is aware
//! of the lock state.
class HeapBlock : public amd::HeapObject
{
public:
//! Constructor
HeapBlock(
Heap* owner = NULL,
size_t size = 0,
size_t offset = 0,
HeapBlock* next=NULL,
HeapBlock* prev=NULL)
: owner_(owner)
, size_(size)
, offset_(offset)
, next_(next)
, prev_(prev)
, inUse_(false)
, parent_(NULL)
, memory_(NULL)
{}
//! Destructor does some sanity checks.
~HeapBlock();
//! Frees a heap block, returning its memory to the owning heap (proxy)
void free();
//! Sets the GPU memory object associated with the heap block
void setMemory(Memory* memory) { memory_ = memory; }
//! Gets the GPU memory object associated with the heap block
Memory* getMemory() const { return memory_; }
//! Adds a heapblock view to the list of views
void addView(HeapBlock* hb)
{ views_.push_back(hb); hb->parent_ = this; }
//! Removes a heapblock view from the list of views
void removeView(HeapBlock* hb) { views_.remove(hb); }
//! Destroys all views
void destroyViewsMemory();
//! Creates all new views
bool reallocateViews(
HeapBlock* parent, //!< Parent heap block
size_t shift //!< The new HeapBlock shift
);
//! Gets the offset
size_t offset() const { return offset_; }
Heap* owner_; //!< Heap that owns this block
size_t size_; //!< Size of the block in bytes
size_t offset_; //!< Offset of this block in the heap
HeapBlock* next_; //!< Next block on the list, or NULL
HeapBlock* prev_; //!< Previous block on the list, or NULL
bool inUse_; //!< true if the block is in use
HeapBlock* parent_; //!< The parent heap block for a view
private:
//! Disable copy constructor
HeapBlock(const HeapBlock&);
//! Disable assignment
HeapBlock& operator=(const HeapBlock&);
Memory* memory_; //!< Memory object associated with the heap block
std::list<HeapBlock*> views_; //!< The list of all allocated views
};
class Heap : public amd::HeapObject
{
public:
//! Minimal supported CAL granularity = 256 bytes / ElementSize
static const size_t MinGranularity = 64;
//! The size of a heap element in bytes
static const size_t ElementSize = 4;
//! The type of a heap element in bytes
static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
Heap(
Device& device //!< GPU device object
);
virtual bool create(
size_t totalSize, //!< total size of the allocated heap (bytes)
bool remoteAlloc //!< allocate the heap in remote memory
);
//! Heap destructor
virtual ~Heap();
/*!
* \brief Allocates memory from a heap (best-fit).
* We round up to 4k granularity for alignment.
*
* \return A pointer to allocated heap block object.
*/
virtual HeapBlock* alloc(
size_t size //! The allocation size
);
//! Release memory back to a heap.
virtual void free(HeapBlock* blk);
//! Copies this heap to another
virtual bool copyTo(Heap* heap);
//! Gets the GPU resource associated with the global heap
const Resource& resource() const { return *resource_; }
//! Read the page size (bytes)
size_t granularityB() const;
//! Read the total free space (bytes)
size_t freeSpace() const { return freeSize_; }
virtual bool isSane(void) const; //!< Checks heap sanity
//! Returns true if we have a virtual heap
bool isVirtual() const { return virtualMode_; }
//! Returns the base virtual address of the heap
uint64_t baseAddress() const { return baseAddress_; }
private:
//! Insert a block into a list. Must be called under a lock.
void insertBlock(HeapBlock** list, HeapBlock* node);
//! Merge a block into a list. Must be called under a lock.
void mergeBlock(HeapBlock** list, HeapBlock* node);
//! Remove a block from a list. Must be called under a lock.
void detachBlock(HeapBlock** list, HeapBlock* node);
//! Split a block into two pieces
HeapBlock* splitBlock(HeapBlock* node, size_t size);
protected:
Resource* resource_; //!< GPU resource referencing the heap memory
HeapBlock* freeList_; //!< Head block for free list
HeapBlock* busyList_; //!< Head block for busy list
size_t freeSize_; //!< total free size of the heap
Device& device_; //!< Device that owns this heap
size_t granularity_; //!< Size of an allocation page
amd::Monitor lock_; //!< Lock to serialise heap accesses
bool virtualMode_; //!< Virtual mode
uint64_t baseAddress_; //!< Virtual heap base address
};
class VirtualHeap : public Heap
{
public:
VirtualHeap(
Device& device //!< GPU device object
);
virtual bool create(
size_t totalSize, //!< total size of the allocated heap (bytes)
bool remoteAlloc //!< allocate the heap in remote memory
);
//! Heap destructor
virtual ~VirtualHeap();
/*!
* \brief Allocates memory from a heap (best-fit).
* We round up to 4k granularity for alignment.
*
* \return A pointer to allocated heap block object.
*/
virtual HeapBlock* alloc(
size_t size //! The allocation size
);
//! Release memory back to a heap.
virtual void free(HeapBlock* blk);
//! Copies this heap to another
virtual bool copyTo(Heap* heap);
virtual bool isSane(void) const; //!< Checks heap sanity
};
} // namespace gpu
#endif // GPUHEAP_HPP_
+11 -41
查看文件
@@ -824,17 +824,6 @@ Kernel::create(
// Initialize the kernel parameters
bool result = initParameters();
if (!dev().heap()->isVirtual()) {
amd::option::Options *options = nullProg().getCompilerOptions();
// @todo Remove this. This is a hack for no VM mode
if (!options->oVariables->EnableDumpKernel) {
if (!name().compare(BlitName[KernelBlitManager::BlitCopyImageToBuffer]) ||
!name().compare(BlitName[KernelBlitManager::BlitCopyBufferToImage])) {
blitKernelHack_ = true;
}
}
}
// Wave limiter needs to be initialized after kernel metadata is parsed
// Since it depends on it.
waveLimiter_.enable();
@@ -855,7 +844,6 @@ Kernel::Kernel(
const Program& prog,
const InitData* initData)
: NullKernel(name, gpuDev, prog)
, blitKernelHack_(false)
, waveLimiter_(this)
{
hwPrivateSize_ = 0;
@@ -1603,10 +1591,6 @@ Kernel::debug(VirtualGPU& gpu) const
{
std::fstream stubWrite;
address src = NULL;
if (!dev().heap()->isVirtual()) {
src = reinterpret_cast<address>
(const_cast<Resource&>(dev().globalMem()).map(&gpu));
}
std::cerr << "--- " << name_ << " ---" << std::endl;
for (uint i = 0; i < arguments_.size(); ++i) {
@@ -1689,9 +1673,6 @@ Kernel::debug(VirtualGPU& gpu) const
stubWrite.close();
}
}
if (!dev().heap()->isVirtual()) {
const_cast<Resource&>(dev().globalMem()).unmap(&gpu);
}
}
bool
@@ -1824,18 +1805,10 @@ Kernel::setArgument(
type = ArgumentBuffer;
}
else {
if (blitKernelHack_) {
// Bind global buffer to UAV this buffer is bound to
if (!bindResource(gpu, *gpuMem, 0, GlobalBuffer, uavRaw_)) {
return false;
}
}
else {
// Bind global buffer to UAV this buffer is bound to
if (!bindResource(gpu, dev().globalMem(), 0,
GlobalBuffer, uavRaw_)) {
return false;
}
// Bind global buffer to UAV this buffer is bound to
if (!bindResource(gpu, dev().globalMem(), 0,
GlobalBuffer, uavRaw_)) {
return false;
}
}
@@ -1848,11 +1821,9 @@ Kernel::setArgument(
// Update offset only if we bind HeapBuffer or
// it's global address space in UAV setup on SI+
if (!blitKernelHack_) {
offset += gpuMem->hbOffset();
if (!forceZeroOffset) {
assert((offset != 0) && "Offset 0 with a real allocation!");
}
offset += gpuMem->hbOffset();
if (!forceZeroOffset) {
assert((offset != 0) && "Offset 0 with a real allocation!");
}
gpu.addVmMemory(gpuMem);
}
@@ -2253,10 +2224,9 @@ Kernel::bindResource(
gslMemObject gslMem = NULL;
// Use global address space on SI+ for UAV setup
if (((type == ArgumentBuffer) || (type == ArgumentCbID) ||
(type == ArgumentUavID) || (type == ArgumentPrintfID)) &&
!blitKernelHack_) {
gslMem = dev().heap()->resource().gslResource();
if ((type == ArgumentBuffer) || (type == ArgumentCbID) ||
(type == ArgumentUavID) || (type == ArgumentPrintfID)) {
gslMem = dev().heap().resource().gslResource();
}
else {
gslMem = resource.gslResource();
@@ -2803,7 +2773,7 @@ NullKernel::parseArguments(const std::string& metaData, uint* uavRefCount)
case KernelArg::PointerPrivate:
// Check if can't use a dedicated UAV,
// so realloc memory in the heap
arg->memory_.realloc_ = isRealloc();
arg->memory_.realloc_ = false;
arg->memory_.uavBuf_ = true;
break;
case KernelArg::PointerHwConst:
-9
查看文件
@@ -450,9 +450,6 @@ public:
uint instructionCnt() const { return instructionCnt_; }
protected:
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
virtual bool isRealloc() const { return false; }
/*! \brief Parses the metadata structure for the kernel,
* provided by the OpenCL compiler
*
@@ -673,9 +670,6 @@ protected:
*/
bool initConstBuffers();
//! Returns TRUE if memory should be reallocated, returns FALSE always for NullDevice
virtual bool isRealloc() const { return !dev().heap()->isVirtual(); }
private:
//! Disable copy constructor
Kernel(const Kernel&);
@@ -771,9 +765,6 @@ private:
uint hwPrivateSize_; //!< initial HW private size
uint hwLocalSize_; //!< initial HW local size
//! @todo remove the blit kernel hack
bool blitKernelHack_; //!< No VM hack for kernel blit
WaveLimiterManager waveLimiter_; //!< adaptively control number of waves
};
+9 -90
查看文件
@@ -30,39 +30,24 @@ namespace gpu {
Memory::Memory(
const Device& gpuDev,
amd::Memory& owner,
HeapBlock* hb,
size_t size)
: device::Memory(owner)
, Resource(gpuDev, ((hb) ? hb->size_ : size) / Heap::ElementSize, Heap::ElementType)
, hb_(hb)
, Resource(gpuDev, size / Device::Heap::ElementSize, Device::Heap::ElementType)
{
init();
if (NULL != hb_) hb_->setMemory(this);
if (owner.parent() != NULL) {
flags_ |= SubMemoryObject;
}
}
Memory::Memory(
const Device& gpuDev,
HeapBlock& hb)
: device::Memory(hb.size_)
, Resource(gpuDev, hb.size_ / Heap::ElementSize, Heap::ElementType)
, hb_(&hb)
{
init();
hb.setMemory(this);
}
Memory::Memory(
const Device& gpuDev,
size_t size)
: device::Memory(size)
, Resource(gpuDev,
amd::alignUp(size, Heap::ElementSize) / Heap::ElementSize, Heap::ElementType)
, hb_(NULL)
amd::alignUp(size, Device::Heap::ElementSize) /
Device::Heap::ElementSize, Device::Heap::ElementType)
{
init();
}
@@ -75,7 +60,6 @@ Memory::Memory(
)
: device::Memory(owner)
, Resource(gpuDev, width, format)
, hb_(NULL)
{
init();
@@ -92,7 +76,6 @@ Memory::Memory(
)
: device::Memory(size)
, Resource(gpuDev, width, format)
, hb_(NULL)
{
init();
}
@@ -110,7 +93,6 @@ Memory::Memory(
)
: device::Memory(owner)
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
, hb_(NULL)
{
init();
@@ -132,7 +114,6 @@ Memory::Memory(
)
: device::Memory(size)
, Resource(gpuDev, width, height, depth, format, chOrder, imageType, mipLevels)
, hb_(NULL)
{
init();
}
@@ -197,14 +178,9 @@ Memory::create(
break;
case Resource::Remote:
case Resource::RemoteUSWC:
// @todo Enable unconditional optimization for remote memory
if ((owner() != NULL &&
owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR) ||
(hb() == NULL)) {
if (!cal()->tiled_) {
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
}
if (!cal()->tiled_) {
// Marks memory object for direct GPU access to the host memory
flags_ |= HostMemoryDirectAccess;
}
break;
case Resource::View: {
@@ -481,8 +457,8 @@ Memory::createInterop(InteropType type)
else {
// Allocate Resource object for interop as buffer
interopMemory_ = new Memory(dev(), size(),
amd::alignUp(size(), Heap::ElementSize) / Heap::ElementSize,
Heap::ElementType);
amd::alignUp(size(), Device::Heap::ElementSize) / Device::Heap::ElementSize,
Device::Heap::ElementType);
// Create the interop object in CAL
if (NULL == interopMemory_ || !interopMemory_->create(memType, createParams)) {
@@ -502,14 +478,6 @@ Memory::~Memory()
// Clean VA cache
dev().removeVACache(this);
// Release associated heap block, if any
if (hb_) {
// Protect heap block from simultaneous release with realloc
amd::ScopedLock k(dev().lockAsyncOps());
hb_->setMemory(NULL);
hb_->free();
}
delete interopMemory_;
// Release associated map target, if any
@@ -531,35 +499,6 @@ Memory::~Memory()
}
}
bool
Memory::reallocate(HeapBlock* hb, const Resource* parent)
{
Resource::ViewParams params;
params.size_ = hb->size_;
params.resource_ = parent;
params.memory_ = NULL;
// Check if it's a view reallocation
if (NULL != hb->parent_) {
// The offset inside the view is unchanged
params.offset_ = Resource::offset();
// Create a new view
if (Resource::create(Resource::View, &params)) {
hb_ = hb;
return true;
}
}
else {
params.offset_ = hb->offset_;
if (Resource::reallocate(&params)) {
hb_ = hb;
return true;
}
}
return false;
}
void
Memory::syncCacheFromHost(VirtualGPU& gpu, device::Memory::SyncFlags syncFlags)
{
@@ -814,33 +753,13 @@ Memory::createBufferView(amd::Memory& subBufferOwner)
{
gpu::Memory* viewMemory;
Resource::ViewParams params;
HeapBlock* hb = NULL;
size_t offset = subBufferOwner.getOrigin();
size_t size = subBufferOwner.getSize();
if (!dev().heap()->isVirtual()) {
if (NULL == hb_) {
LogError("HeapBlock must be initialized!");
return NULL;
}
hb = new HeapBlock(NULL, size, offset + hb_->offset());
if (hb == NULL) {
LogError("We don't have enough video memory!");
return NULL;
}
amd::ScopedLock lock(owner()->lockMemoryOps());
hb_->addView(hb);
}
// Create a memory object
viewMemory = new gpu::Memory(dev(), subBufferOwner, hb, size);
viewMemory = new gpu::Memory(dev(), subBufferOwner, size);
if (NULL == viewMemory) {
if (hb != NULL) {
hb->setMemory(NULL);
hb->free();
}
return NULL;
}
-18
查看文件
@@ -8,7 +8,6 @@
#include "top.hpp"
#include "thread/atomic.hpp"
#include "device/gpu/gpuresource.hpp"
#include "device/gpu/gpuheap.hpp"
#include "device/gpu/gpudevice.hpp"
#include <map>
@@ -27,7 +26,6 @@ class Heap;
class Resource;
class Memory;
class VirtualGPU;
class HeapBlock;
//! GPU memory object.
// Wrapper that can contain a heap block or an interop buffer/image.
@@ -44,14 +42,8 @@ public:
Memory(
const Device& gpuDev,
amd::Memory& owner,
HeapBlock* hb,
size_t size = 0);
//! Constructor (nonfat version for local scratch mem use)
Memory(
const Device& gpuDev,
HeapBlock& hb);
//! Constructor (nonfat version for local scratch mem use without heap block)
Memory(
const Device& gpuDev,
@@ -102,12 +94,6 @@ public:
//! Default destructor
~Memory();
//! Reallocates the memory object in the new heap block
bool reallocate(
HeapBlock* hb, //! The new heap block for this memory object
const Resource* parent //! Parent resource for view reallocaiton
);
//! Creates the interop memory
bool createInterop(
InteropType type //!< The interop type
@@ -189,9 +175,6 @@ public:
//! Sets interop type for this memory object
void setInteropType(InteropType type) { interopType_ = type; }
//! Returns the HeapBlock pointer
const HeapBlock* hb() const { return hb_; }
//! Set the owner
void setOwner(amd::Memory* owner) { owner_ = owner; }
@@ -229,7 +212,6 @@ private:
InteropType interopType_; //!< Interop type
Memory* interopMemory_; //!< interop memory
HeapBlock* hb_; //!< Heap Block, or NULL if not in-heap memory
Memory* pinnedMemory_; //!< Memory used as pinned system memory
const Memory* parent_; //!< Parent memory object
};
+8 -57
查看文件
@@ -322,7 +322,7 @@ static uint32_t GetHSAILImageOrderType(gslChannelOrder chOrder, cmSurfFmt format
}
bool
Resource::create(MemoryType memType, CreateParams* params, bool heap)
Resource::create(MemoryType memType, CreateParams* params)
{
bool calRes = false;
gslMemObject gslResource = 0;
@@ -382,7 +382,7 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
}
// Force remote allocation if it was requested in the settings
if (dev().settings().remoteAlloc_ && !heap &&
if (dev().settings().remoteAlloc_ &&
((memoryType() == Local) ||
(memoryType() == Persistent))) {
if (dev().settings().apuSystem_ && dev().settings().viPlus_) {
@@ -515,7 +515,7 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
if (memoryType() == Local) {
cal_.type_ = Persistent;
}
else if (!heap && (memoryType() == Persistent)) {
else if (memoryType() == Persistent) {
cal_.type_ = RemoteUSWC;
}
// Remote cacheable to uncacheable
@@ -553,11 +553,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
reinterpret_cast<const char*>(address_) - tmpHost);
pinOffset_ = hostMemOffset & 0xff;
//!@note GSL has a problem with the defines for flags and
//! view creation, so check the restriction here
if (!dev().heap()->isVirtual() && (pinOffset_ != 0)) {
return false;
}
pinAddress = tmpHost;
// Align width to avoid GSL useless assert with a view
@@ -629,20 +624,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
calRes = true;
}
// Check if it's a heap allocation
if (!dev().heap()->isVirtual()) {
if (viewOwner_ == &dev().globalMem()) {
// Allocation directly from the heap
hbOffset_ = static_cast<uint64_t>(view->offset_);
}
else {
// Allocation from another memory object
hbOffset_ = static_cast<uint64_t>(view->offset_) +
viewOwner_->hbOffset();
}
hbSize_ = view->size_;
}
if (viewOwner_->isMemoryType(Pinned)) {
address_ = viewOwner_->data() + offset();
}
@@ -952,11 +933,9 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
cal_.tiled_ = (GSL_MOA_TILING_LINEAR != tiling) &&
(GSL_MOA_TILING_LINEAR_GENERAL != tiling);
// Get the heap block offset if it's a virtual heap
if (dev().heap()->isVirtual()) {
hbOffset_ = gslResource->getSurfaceAddress() -
dev().heap()->baseAddress();
}
// Get the heap block offset
hbOffset_ = gslResource->getSurfaceAddress() -
dev().heap().baseAddress();
hbSize_ = static_cast<uint64_t>(gslResource->getSurfaceSize());
if (!dev().settings().use64BitPtr_ &&
@@ -1036,32 +1015,6 @@ Resource::create(MemoryType memType, CreateParams* params, bool heap)
return true;
}
bool
Resource::reallocate(CreateParams* params)
{
GslResourceReference* old;
GslResourceReference* active;
old = gslRef_;
if (!create(memoryType(), params)) {
gslRef_ = old;
return false;
}
// Get the new active resource
active = gslRef_;
gslRef_ = old;
dev().resCopy(old->gslResource(),
active->gslResource(), CAL_MEMCOPY_SYNC);
// Free all old resources
assert(renames_.size() == 0);
free();
gslRef_ = active;
return true;
}
void
Resource::free()
{
@@ -1813,10 +1766,8 @@ Resource::setActiveRename(VirtualGPU& gpu, GslResourceReference* rename)
gslRef_ = rename;
address_ = rename->cpuAddress_;
if (dev().heap()->isVirtual()) {
hbOffset_ = rename->gslResource()->getSurfaceAddress() -
dev().heap()->baseAddress();
}
hbOffset_ = rename->gslResource()->getSurfaceAddress() -
dev().heap().baseAddress();
}
bool
-9
查看文件
@@ -209,15 +209,6 @@ public:
*/
virtual bool create(
MemoryType memType, //!< memory type
CreateParams* params = 0, //!< special parameters for resource allocation
bool heap = false //!< Global heap allocation for not VM mode
);
/*! \brief Reallocates a CAL object, associated with the resource
*
* \return True if we succesfully reallocated a CAL resource
*/
bool reallocate(
CreateParams* params = 0 //!< special parameters for resource allocation
);
-4
查看文件
@@ -50,10 +50,6 @@ Settings::Settings()
maxRenames_ = 16;
maxRenameSize_ = 4 * Mi;
// The global heap settings
heapSize_ = GPU_INITIAL_HEAP_SIZE * Mi;
heapSizeGrowth_ = GPU_HEAP_GROWTH_INCREMENT * Mi;
imageSupport_ = false;
hwLDSSize_ = 0;
-2
查看文件
@@ -82,8 +82,6 @@ public:
size_t stagedXferSize_; //!< Staged buffer size
uint maxRenames_; //!< Maximum number of possible renames
uint maxRenameSize_; //!< Maximum size for all renames
size_t heapSize_; //!< The global heap size
size_t heapSizeGrowth_; //!< The global heap size growth
uint hwLDSSize_; //!< HW local data store size
uint maxWorkGroupSize_; //!< Requested workgroup size for this device
uint hostMemDirectAccess_; //!< Enables direct access to the host memory
+13 -19
查看文件
@@ -517,10 +517,6 @@ VirtualGPU::create(
// Fall through ...
case Settings::BlitEngineCAL:
case Settings::BlitEngineKernel:
if (!dev().heap()->isVirtual()) {
blitSetup.disableReadBufferRect_ = true;
blitSetup.disableWriteBufferRect_ = true;
}
// use host blit for HW debug
if (dev().settings().enableHwDebug_) {
blitSetup.disableCopyImageToBuffer_ = true;
@@ -3166,23 +3162,21 @@ VirtualGPU::profilingCollectResults(CommandBatch* cb, const amd::Event* waitingE
bool
VirtualGPU::addVmMemory(const Resource* resource)
{
if (dev().heap()->isVirtual()) {
uint* cnt = &cal_.memCount_;
(*cnt)++;
// Reallocate array if kernel uses more memory objects
if (numVmMems_ < *cnt) {
gslMemObject* tmp;
tmp = new gslMemObject [*cnt];
if (tmp == NULL) {
return false;
}
memcpy(tmp, vmMems_, sizeof(gslMemObject) * numVmMems_);
delete [] vmMems_;
vmMems_ = tmp;
numVmMems_ = *cnt;
uint* cnt = &cal_.memCount_;
(*cnt)++;
// Reallocate array if kernel uses more memory objects
if (numVmMems_ < *cnt) {
gslMemObject* tmp;
tmp = new gslMemObject [*cnt];
if (tmp == NULL) {
return false;
}
vmMems_[*cnt - 1] = resource->gslResource();
memcpy(tmp, vmMems_, sizeof(gslMemObject) * numVmMems_);
delete [] vmMems_;
vmMems_ = tmp;
numVmMems_ = *cnt;
}
vmMems_[*cnt - 1] = resource->gslResource();
return true;
}
+1 -33
查看文件
@@ -496,7 +496,7 @@ CALGSLDevice::SetupContext(int32 &asic_id)
getAttribs_int(temp_cs);
temp_cs->getMemInfo(&m_memInfo, GSL_MEMINFO_BASIC);
m_vmMode = temp_cs->getVMMode();
assert(temp_cs->getVMMode());
m_adp->deleteContext(temp_cs);
@@ -1313,38 +1313,6 @@ CALGSLDevice::PerformDMACopy(gslMemObject srcMem, gslMemObject destMem, cmSurfFm
return true;
}
void
CALGSLDevice::resCopy(gslMemObject srcRes, gslMemObject dstRes, uint32 flags) const
{
assert(m_cs != 0);
assert(srcRes != 0);
assert(dstRes != 0);
//! @note: GSL device isn't thread safe
amd::ScopedLock k(gslDeviceOps());
uint64 surfaceSize;
CopyType type = GetCopyType(srcRes, dstRes, 0, 0, m_allowDMA, 0, surfaceSize, 0, 0);
if (type == USE_DRMDMA)
{
m_cs->DMACopy(srcRes, 0, dstRes, 0, surfaceSize, GSL_SYNCUPLOAD_SYNC_WAIT, NULL);
m_cs->Flush();
Wait(m_cs, GSL_DRMDMA_SYNC_ATI, m_mapDMAQuery);
}
else if (type == USE_CPDMA)
{
m_cs->syncUploadRaw(srcRes, 0, dstRes, 0, surfaceSize, 0);
m_cs->Flush();
Wait(m_cs, GSL_SYNC_ATI, m_mapQuery);
}
else
{
assert(0 && "No copy engine is being used");
}
}
#define CPDMA_THRESHOLD 131072
CopyType
-5
查看文件
@@ -97,14 +97,10 @@ public:
const CALdeviceattribs& getAttribs() const { return m_attribs; }
const gslMemInfo& getMemInfo() const { return m_memInfo; }
bool isVmMode() const { return m_vmMode; };
uint32 getVPUMask() const { return m_vpuMask; }
bool canDMA() const { return m_canDMA; }
gslMemObject m_srcDRMDMAMem, m_dstDRMDMAMem; // memory object of flush buffer, used for DRMDMA flush
void resCopy(gslMemObject srcRes, gslMemObject dstRes, uint32 flags) const;
void PerformAdapterInitialization() const;
void PerformFullInitialization() const;
@@ -211,7 +207,6 @@ private:
uint m_computeRing : 1;
uint m_usePerVPUAdapterModel : 1;
uint m_PerformLazyDeviceInit : 1;
uint m_vmMode : 1;
uint m_isComputeRingIDForced : 1;
};
};
-1
查看文件
@@ -34,7 +34,6 @@ Settings::Settings()
bool
Settings::create(bool doublePrecision)
{
largeHostMemAlloc_ = true;
customHostAllocator_ = true;
// Enable extensions
-4
查看文件
@@ -52,12 +52,8 @@ release(cstring, GPU_DEVICE_ORDINAL, "", \
"Select the device ordinal (comma seperated list of available devices)") \
release(bool, REMOTE_ALLOC, false, \
"Use remote memory for the global heap allocation") \
release(int, GPU_INITIAL_HEAP_SIZE, 16, \
"Initial size of the GPU heap in MiB") \
release(uint, GPU_MAX_HEAP_SIZE, 100, \
"Set maximum size of the GPU heap to % of board memory") \
release(int, GPU_HEAP_GROWTH_INCREMENT, 8, \
"Amount to grow the GPU heap by in MiB") \
release(uint, GPU_STAGING_BUFFER_SIZE, 512, \
"Size of the GPU staging buffer in KiB") \
release(bool, GPU_DUMP_BLIT_KERNELS, false, \