From 7461a5b46fe4f4b4dd70a1bfbc76036d3c39f31d Mon Sep 17 00:00:00 2001 From: German Date: Tue, 9 Jan 2024 13:15:17 -0500 Subject: [PATCH] SWDEV-438532 - Enable wave limit for HSAIL Luxmark still uses HSAIL path and one subtest can benefit from the wave limit. Change-Id: I16c94e09cd6e2afd6341cb76bf2e9ab7b7713214 [ROCm/clr commit: dec1158d04d5d20bb91ada1a46b495312f0b831b] --- projects/clr/rocclr/device/pal/palvirtual.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index f3131e611c..44b06436d6 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -2661,7 +2661,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, } dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlCode(); dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress(); - dispatchParam.wavesPerSh = 0; + if (!hsaKernel.prog().isLC() && hsaKernel.workGroupInfo()->wavesPerSimdHint_ != 0) { + constexpr uint32_t kWavesPerSimdLimit = 4; + dispatchParam.wavesPerSh = kWavesPerSimdLimit * + dev().info().cuPerShaderArray_ * dev().info().simdPerCU_; + } else { + dispatchParam.wavesPerSh = 0; + } dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false; dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize(); dispatchParam.aqlPacketIndex = aql_index;