SWDEV-513197 - Improve launch perf for Device Heap kernels
- If any kernel uses device heap, the launch needs to be preceeded by an
init kernel, Save on the extra barrier packet launch/flush between the
init heap kernel and user kernel
Change-Id: I8ebc6246188200e5f673dc464bc76a53bcb8b7c6
[ROCm/clr commit: ca530c660b]
Dieser Commit ist enthalten in:
@@ -2537,9 +2537,10 @@ bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Mem
|
||||
setArgument(kernels_[blitType], 2, sizeof(uint), &heap_size);
|
||||
setArgument(kernels_[blitType], 3, sizeof(uint), &number_of_initial_blocks);
|
||||
address parameters = captureArguments(kernels_[blitType]);
|
||||
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr);
|
||||
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters,
|
||||
nullptr, 0, nullptr, nullptr, true);
|
||||
releaseArguments(parameters);
|
||||
gpu().releaseGpuMemoryFence();
|
||||
gpu().Barriers().WaitCurrent();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -851,10 +851,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
|
||||
WriteAqlArgAt(const_cast<address>(params), mem, sizeof(void*), it->second);
|
||||
}
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_KERN,
|
||||
"Arg%d: %s %s = val:%s", i, desc.typeName_.c_str(), desc.name_.c_str(),
|
||||
(desc.size_ == 4) ? std::to_string(*reinterpret_cast<const int*>(srcArgPtr)).c_str() :
|
||||
(desc.size_ == 8) ? std::to_string(*reinterpret_cast<const long long*>(srcArgPtr)).c_str()
|
||||
: "");
|
||||
"Arg%d: %s %s = val:0x%lx", i, desc.typeName_.c_str(), desc.name_.c_str(),
|
||||
(desc.size_ == 4) ? *reinterpret_cast<const int*>(srcArgPtr) :
|
||||
(desc.size_ == 8) ? *reinterpret_cast<const long long*>(srcArgPtr) : 0LL);
|
||||
}
|
||||
else if (desc.type_ == T_SAMPLER) {
|
||||
uint32_t index = desc.info_.arrayIndex_;
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren