From 4aa52155ee9dd5a3df3995575f0ec8fd991bf37f Mon Sep 17 00:00:00 2001 From: Jaydeep Patel Date: Thu, 17 Oct 2024 08:56:31 +0000 Subject: [PATCH] SWDEV-491149 - OCL does not need to update scratch as it can't update stack size using API unlike hipDeviceSetLimit. for HIP, Update should be only if compiler notifies use of stack size. Change-Id: Ic781bcac6fcf586da39ec4aafd4809da3652ede3 --- rocclr/device/pal/paldevice.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index bb46dac684..79487a5f6a 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2293,7 +2293,12 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi // Find the number of scratch registers used in the kernel const device::Kernel* devKernel = kernel.getDeviceKernel(*this); uint32_t regNum = static_cast(devKernel->workGroupInfo()->scratchRegs_); - regNum = std::max(static_cast(stack_size_) / sizeof(uint32_t), regNum); + // OCL does not have API to set dynamic stack size i.e. hipDeviceSetLimit and hence there + // is no need for OCL to refresh value here and even for HIP, Update should be only if + // compiler notifies use of stack size. + if (IS_HIP && (devKernel->workGroupInfo()->usedStackSize_ & 0x1) == 0x1) { + regNum = std::max(static_cast(stack_size_) / sizeof(uint32_t), regNum); + } const VirtualGPU* vgpu = static_cast(vdev); if (!allocScratch(regNum, vgpu, devKernel->workGroupInfo()->usedVGPRs_)) {