From 93e58b91002467008da58ac01a02cd0d06f62f6e Mon Sep 17 00:00:00 2001
From: Sean Keely <Sean.Keely@amd.com>
Date: Sat, 7 Oct 2017 05:27:10 -0500
Subject: [PATCH] Capture more memory allocation types with the 2MB allocator.

TensorFlow was running out of VRAM due to padding up allocations
from legacy memory APIs.  These allocations have been added to
the fragment allocator to improve VRAM utilization.

Change-Id: Ic680fff576a0434b3b17a4c91746da44e09957fa


[ROCm/ROCR-Runtime commit: 4f299a99090c0bc814848af9a460c93b471a8ec4]
---
 .../hsa-runtime/core/runtime/amd_memory_region.cpp   | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp
index e72150f764..a38689443b 100644
--- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp
+++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp
@@ -171,15 +171,19 @@ hsa_status_t MemoryRegion::Allocate(size_t& size, AllocateFlags alloc_flags, voi
 
   // Only allow using the suballocator for ordinary VRAM.
   if (IsLocalMemory()) {
-    bool useSubAlloc = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
-    useSubAlloc &= (alloc_flags == AllocateRestrict);
+    bool subAllocEnabled = !core::Runtime::runtime_singleton_->flag().disable_fragment_alloc();
+    // Avoid modifying executable or queue allocations.
+    bool useSubAlloc = subAllocEnabled;
+    useSubAlloc &= ((alloc_flags & (~AllocateRestrict)) == 0);
     useSubAlloc &= (size <= fragment_allocator_.max_alloc());
     if (useSubAlloc) {
       *address = fragment_allocator_.alloc(size);
       return HSA_STATUS_SUCCESS;
     }
-    // Pad up larger VRAM allocations.
-    size = AlignUp(size, fragment_allocator_.max_alloc());
+    if (subAllocEnabled) {
+      // Pad up larger VRAM allocations.
+      size = AlignUp(size, fragment_allocator_.max_alloc());
+    }
   }
 
   // Allocate memory.