From ef505c7cd8b2e0359be93ebbc3d438c237dda3d0 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 27 Feb 2025 18:11:42 +0000 Subject: [PATCH] SWDEV-513197 - Improve launch perf for Device Heap kernels - If any kernel uses device heap, the launch needs to be preceeded by an init kernel, Save on the extra barrier packet launch/flush between the init heap kernel and user kernel Change-Id: I8ebc6246188200e5f673dc464bc76a53bcb8b7c6 [ROCm/clr commit: ca530c660ba4a8f3d64648a17c262c58b5e24b7a] --- projects/clr/rocclr/device/rocm/rocblit.cpp | 5 +++-- projects/clr/rocclr/device/rocm/rocvirtual.cpp | 7 +++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index 2ccd922244..765790c378 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -2537,9 +2537,10 @@ bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Mem setArgument(kernels_[blitType], 2, sizeof(uint), &heap_size); setArgument(kernels_[blitType], 3, sizeof(uint), &number_of_initial_blocks); address parameters = captureArguments(kernels_[blitType]); - result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr); + result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, + nullptr, 0, nullptr, nullptr, true); releaseArguments(parameters); - gpu().releaseGpuMemoryFence(); + gpu().Barriers().WaitCurrent(); return result; } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index cd517ca864..9c06aad581 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -851,10 +851,9 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para WriteAqlArgAt(const_cast
(params), mem, sizeof(void*), it->second); } ClPrint(amd::LOG_INFO, amd::LOG_KERN, - "Arg%d: %s %s = val:%s", i, desc.typeName_.c_str(), desc.name_.c_str(), - (desc.size_ == 4) ? std::to_string(*reinterpret_cast(srcArgPtr)).c_str() : - (desc.size_ == 8) ? std::to_string(*reinterpret_cast(srcArgPtr)).c_str() - : ""); + "Arg%d: %s %s = val:0x%lx", i, desc.typeName_.c_str(), desc.name_.c_str(), + (desc.size_ == 4) ? *reinterpret_cast(srcArgPtr) : + (desc.size_ == 8) ? *reinterpret_cast(srcArgPtr) : 0LL); } else if (desc.type_ == T_SAMPLER) { uint32_t index = desc.info_.arrayIndex_;