From cedc3e80a8a1f2e87243d1b7b77bafa293acbc2d Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Thu, 24 Feb 2022 23:49:09 -0600 Subject: [PATCH] Do not bump up total scratch size for large cached allocations. HW does not ignore low bits of the scratch wave count and will stride beyond the end of the allocation if the wave count is ever indivisible by SE count. Rather than returning the allocation size for cached large scratch allocations, use the requested scratch size in scratch setup. Scratch cache will retain the cached allocation's size. Change-Id: I0129ddc99a8940d01d8fbcd0b02d5061f31f456d --- runtime/hsa-runtime/core/inc/scratch_cache.h | 2 -- runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/scratch_cache.h b/runtime/hsa-runtime/core/inc/scratch_cache.h index 1e079babb7..6fa6a19af3 100644 --- a/runtime/hsa-runtime/core/inc/scratch_cache.h +++ b/runtime/hsa-runtime/core/inc/scratch_cache.h @@ -135,7 +135,6 @@ class ScratchCache { if (it->second.isFree()) { it->second.alloc(); info.queue_base = it->second.base; - info.size = it->first; info.scratch_node = it; available_bytes -= it->first; return true; @@ -155,7 +154,6 @@ class ScratchCache { } it->second.free(); available_bytes += it->first; - assert(it->first == info.size && "Scratch cache size mismatch."); } bool trim(bool trim_nodes_in_use) { diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 2981205a76..6b89400f46 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -1328,6 +1328,8 @@ void AqlQueue::InitScratchSRD() { uint32_t num_waves = queue_scratch_.size / (tmpring_size.bits.WAVESIZE * 1024); tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves); amd_queue_.compute_tmpring_size = tmpring_size.u32All; + assert((tmpring_size.bits.WAVES % agent_props.NumShaderBanks == 0) && + "Invalid scratch wave count. Must be divisible by #SEs."); return; }