Do not bump up total scratch size for large cached allocations.

HW does not ignore low bits of the scratch wave count and will
stride beyond the end of the allocation if the wave count is
ever indivisible by SE count.  Rather than returning the allocation
size for cached large scratch allocations, use the requested
scratch size in scratch setup.  Scratch cache will retain the
cached allocation's size.

Change-Id: I0129ddc99a8940d01d8fbcd0b02d5061f31f456d
Этот коммит содержится в:
Sean Keely
2022-02-24 23:49:09 -06:00
родитель a496adafaa
Коммит cedc3e80a8
2 изменённых файлов: 2 добавлений и 2 удалений
-2
Просмотреть файл
@@ -135,7 +135,6 @@ class ScratchCache {
if (it->second.isFree()) {
it->second.alloc();
info.queue_base = it->second.base;
info.size = it->first;
info.scratch_node = it;
available_bytes -= it->first;
return true;
@@ -155,7 +154,6 @@ class ScratchCache {
}
it->second.free();
available_bytes += it->first;
assert(it->first == info.size && "Scratch cache size mismatch.");
}
bool trim(bool trim_nodes_in_use) {
+2
Просмотреть файл
@@ -1328,6 +1328,8 @@ void AqlQueue::InitScratchSRD() {
uint32_t num_waves = queue_scratch_.size / (tmpring_size.bits.WAVESIZE * 1024);
tmpring_size.bits.WAVES = std::min(num_waves, max_scratch_waves);
amd_queue_.compute_tmpring_size = tmpring_size.u32All;
assert((tmpring_size.bits.WAVES % agent_props.NumShaderBanks == 0) &&
"Invalid scratch wave count. Must be divisible by #SEs.");
return;
}