diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index bb46dac684..79487a5f6a 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -2293,7 +2293,12 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi // Find the number of scratch registers used in the kernel const device::Kernel* devKernel = kernel.getDeviceKernel(*this); uint32_t regNum = static_cast(devKernel->workGroupInfo()->scratchRegs_); - regNum = std::max(static_cast(stack_size_) / sizeof(uint32_t), regNum); + // OCL does not have API to set dynamic stack size i.e. hipDeviceSetLimit and hence there + // is no need for OCL to refresh value here and even for HIP, Update should be only if + // compiler notifies use of stack size. + if (IS_HIP && (devKernel->workGroupInfo()->usedStackSize_ & 0x1) == 0x1) { + regNum = std::max(static_cast(stack_size_) / sizeof(uint32_t), regNum); + } const VirtualGPU* vgpu = static_cast(vdev); if (!allocScratch(regNum, vgpu, devKernel->workGroupInfo()->usedVGPRs_)) {