Unify APU and dGPU initial queue scratch allocation.
Both support dynamic scratch allocation so there is no reason to preemptively allocate on APUs. Change-Id: I22eaec01a83a091ee9dc1f594a1a9106e8dd81fc
This commit is contained in:
@@ -910,20 +910,13 @@ hsa_status_t GpuAgent::QueueCreate(size_t size, hsa_queue_type32_t queue_type,
|
||||
// Allocate scratch memory
|
||||
ScratchInfo scratch;
|
||||
if (private_segment_size == UINT_MAX) {
|
||||
private_segment_size = (profile_ == HSA_PROFILE_BASE) ? 0 : scratch_per_thread_;
|
||||
}
|
||||
|
||||
if (private_segment_size > 262128) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
scratch.size_per_thread = AlignUp(private_segment_size, 16);
|
||||
if (scratch.size_per_thread > 262128) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
private_segment_size = 0;
|
||||
}
|
||||
scratch.size_per_thread = private_segment_size;
|
||||
|
||||
const uint32_t num_cu = properties_.NumFComputeCores / properties_.NumSIMDPerCU;
|
||||
scratch.size = scratch.size_per_thread * 32 * 64 * num_cu;
|
||||
scratch.size =
|
||||
scratch.size_per_thread * properties_.MaxSlotsScratchCU * properties_.WaveFrontSize * num_cu;
|
||||
scratch.queue_base = nullptr;
|
||||
scratch.queue_process_offset = 0;
|
||||
|
||||
|
||||
مرجع در شماره جدید
Block a user