Do not bump up total scratch size for large cached allocations.
HW does not ignore low bits of the scratch wave count and will stride beyond the end of the allocation if the wave count is ever indivisible by SE count. Rather than returning the allocation size for cached large scratch allocations, use the requested scratch size in scratch setup. Scratch cache will retain the cached allocation's size. Change-Id: I0129ddc99a8940d01d8fbcd0b02d5061f31f456d
Этот коммит содержится в:
@@ -135,7 +135,6 @@ class ScratchCache {
|
||||
if (it->second.isFree()) {
|
||||
it->second.alloc();
|
||||
info.queue_base = it->second.base;
|
||||
info.size = it->first;
|
||||
info.scratch_node = it;
|
||||
available_bytes -= it->first;
|
||||
return true;
|
||||
@@ -155,7 +154,6 @@ class ScratchCache {
|
||||
}
|
||||
it->second.free();
|
||||
available_bytes += it->first;
|
||||
assert(it->first == info.size && "Scratch cache size mismatch.");
|
||||
}
|
||||
|
||||
bool trim(bool trim_nodes_in_use) {
|
||||
|
||||
@@ -1328,6 +1328,8 @@ void AqlQueue::InitScratchSRD() {
|
||||
uint32_t num_waves = queue_scratch_.size / (tmpring_size.bits.WAVESIZE * 1024);
|
||||
tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
|
||||
amd_queue_.compute_tmpring_size = tmpring_size.u32All;
|
||||
assert((tmpring_size.bits.WAVES % agent_props.NumShaderBanks == 0) &&
|
||||
"Invalid scratch wave count. Must be divisible by #SEs.");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user