SWDEV-513197 - Improve launch perf for Device Heap kernels

- If any kernel uses device heap, the launch needs to be preceeded by an
  init kernel, Save on the extra barrier packet launch/flush between the
init heap kernel and user kernel

Change-Id: I8ebc6246188200e5f673dc464bc76a53bcb8b7c6


[ROCm/clr commit: ca530c660b]
Dieser Commit ist enthalten in:
Saleel Kudchadker
2025-02-27 18:11:42 +00:00
Ursprung efd1f3f012
Commit ef505c7cd8
2 geänderte Dateien mit 6 neuen und 6 gelöschten Zeilen
@@ -2537,9 +2537,10 @@ bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Mem
setArgument(kernels_[blitType], 2, sizeof(uint), &heap_size);
setArgument(kernels_[blitType], 3, sizeof(uint), &number_of_initial_blocks);
address parameters = captureArguments(kernels_[blitType]);
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr);
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters,
nullptr, 0, nullptr, nullptr, true);
releaseArguments(parameters);
gpu().releaseGpuMemoryFence();
gpu().Barriers().WaitCurrent();
return result;
}
@@ -851,10 +851,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
WriteAqlArgAt(const_cast<address>(params), mem, sizeof(void*), it->second);
}
ClPrint(amd::LOG_INFO, amd::LOG_KERN,
"Arg%d: %s %s = val:%s", i, desc.typeName_.c_str(), desc.name_.c_str(),
(desc.size_ == 4) ? std::to_string(*reinterpret_cast<const int*>(srcArgPtr)).c_str() :
(desc.size_ == 8) ? std::to_string(*reinterpret_cast<const long long*>(srcArgPtr)).c_str()
: "");
"Arg%d: %s %s = val:0x%lx", i, desc.typeName_.c_str(), desc.name_.c_str(),
(desc.size_ == 4) ? *reinterpret_cast<const int*>(srcArgPtr) :
(desc.size_ == 8) ? *reinterpret_cast<const long long*>(srcArgPtr) : 0LL);
}
else if (desc.type_ == T_SAMPLER) {
uint32_t index = desc.info_.arrayIndex_;