Capture more memory allocation types with the 2MB allocator.

TensorFlow was running out of VRAM due to padding up allocations from legacy memory APIs. These allocations have been added to the fragment allocator to improve VRAM utilization. Change-Id: Ic680fff576a0434b3b17a4c91746da44e09957fa [ROCm/ROCR-Runtime commit: 4f299a9909]
2017-10-07 05:27:10 -05:00
commit 93e58b9100
@@ -171,15 +171,19 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi

  // Only allow using the suballocator for ordinary VRAM.
  if (IsLocalMemory()) {
-    bool useSubAlloc = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
-    useSubAlloc &= (alloc_flags == AllocateRestrict);
+    bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
+    // Avoid modifying executable or queue allocations.
+    bool useSubAlloc = subAllocEnabled;
+    useSubAlloc &= ((alloc_flags & (~AllocateRestrict)) == 0);
    useSubAlloc &= (size <= fragment_allocator_.max_alloc());
    if (useSubAlloc) {
      *address = fragment_allocator_.alloc(size);
      return HSA_STATUS_SUCCESS;
    }
-    // Pad up larger VRAM allocations.
-    size = AlignUp(size, fragment_allocator_.max_alloc());
+    if (subAllocEnabled) {
+      // Pad up larger VRAM allocations.
+      size = AlignUp(size, fragment_allocator_.max_alloc());
+    }
  }

  // Allocate memory.