From 93e58b91002467008da58ac01a02cd0d06f62f6e Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Sat, 7 Oct 2017 05:27:10 -0500 Subject: [PATCH] Capture more memory allocation types with the 2MB allocator. TensorFlow was running out of VRAM due to padding up allocations from legacy memory APIs. These allocations have been added to the fragment allocator to improve VRAM utilization. Change-Id: Ic680fff576a0434b3b17a4c91746da44e09957fa [ROCm/ROCR-Runtime commit: 4f299a99090c0bc814848af9a460c93b471a8ec4] --- .../hsa-runtime/core/runtime/amd_memory_region.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index e72150f764..a38689443b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -171,15 +171,19 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi // Only allow using the suballocator for ordinary VRAM. if (IsLocalMemory()) { - bool useSubAlloc = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc(); - useSubAlloc &= (alloc_flags == AllocateRestrict); + bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc(); + // Avoid modifying executable or queue allocations. + bool useSubAlloc = subAllocEnabled; + useSubAlloc &= ((alloc_flags & (~AllocateRestrict)) == 0); useSubAlloc &= (size <= fragment_allocator_.max_alloc()); if (useSubAlloc) { *address = fragment_allocator_.alloc(size); return HSA_STATUS_SUCCESS; } - // Pad up larger VRAM allocations. - size = AlignUp(size, fragment_allocator_.max_alloc()); + if (subAllocEnabled) { + // Pad up larger VRAM allocations. + size = AlignUp(size, fragment_allocator_.max_alloc()); + } } // Allocate memory.