Capture more memory allocation types with the 2MB allocator.

TensorFlow was running out of VRAM due to padding up allocations
from legacy memory APIs.  These allocations have been added to
the fragment allocator to improve VRAM utilization.

Change-Id: Ic680fff576a0434b3b17a4c91746da44e09957fa


[ROCm/ROCR-Runtime commit: 4f299a9909]
Este commit está contenido en:
Sean Keely
2017-10-07 05:27:10 -05:00
padre e5a242acf5
commit 93e58b9100
@@ -171,15 +171,19 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi
// Only allow using the suballocator for ordinary VRAM.
if (IsLocalMemory()) {
bool useSubAlloc = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
useSubAlloc &= (alloc_flags == AllocateRestrict);
bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
// Avoid modifying executable or queue allocations.
bool useSubAlloc = subAllocEnabled;
useSubAlloc &= ((alloc_flags & (~AllocateRestrict)) == 0);
useSubAlloc &= (size <= fragment_allocator_.max_alloc());
if (useSubAlloc) {
*address = fragment_allocator_.alloc(size);
return HSA_STATUS_SUCCESS;
}
// Pad up larger VRAM allocations.
size = AlignUp(size, fragment_allocator_.max_alloc());
if (subAllocEnabled) {
// Pad up larger VRAM allocations.
size = AlignUp(size, fragment_allocator_.max_alloc());
}
}
// Allocate memory.