Unify APU and dGPU initial queue scratch allocation.

Both support dynamic scratch allocation so there is no reason to preemptively allocate on APUs. Change-Id: I22eaec01a83a091ee9dc1f594a1a9106e8dd81fc
2018-11-20 21:38:03 -06:00
@@ -910,20 +910,13 @@ hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type,
  // Allocate scratch memory
  ScratchInfo scratch;
  if (private_segment_size == UINT_MAX) {
-    private_segment_size = (profile_ == HSA_PROFILE_BASE) ? 0 : scratch_per_thread_;
-  }
-
-  if (private_segment_size > 262128) {
-    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
-  }
-
-  scratch.size_per_thread = AlignUp(private_segment_size, 16);
-  if (scratch.size_per_thread > 262128) {
-    return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
+    private_segment_size = 0;
  }
+  scratch.size_per_thread = private_segment_size;

  const uint32_t num_cu = properties_.NumFComputeCores / properties_.NumSIMDPerCU;
-  scratch.size = scratch.size_per_thread * 32 * 64 * num_cu;
+  scratch.size =
+      scratch.size_per_thread * properties_.MaxSlotsScratchCU * properties_.WaveFrontSize * num_cu;
  scratch.queue_base = nullptr;
  scratch.queue_process_offset = 0;