diff --git a/projects/clr/hipamd/src/hip_platform.cpp b/projects/clr/hipamd/src/hip_platform.cpp index ea095adf81..ae50e4ce09 100644 --- a/projects/clr/hipamd/src/hip_platform.cpp +++ b/projects/clr/hipamd/src/hip_platform.cpp @@ -404,8 +404,14 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); GprWaves = std::min(VgprWaves, SgprWaves); } - uint32_t simdPerCU = (device.isa().versionMajor() <= 9) ? device.info().simdPerCU_ - : (wrkGrpInfo->isWGPMode_ ? 4 : 2); + + // The table contains SIMD per CU, not per WGP, so when WGP mode is set on kernel metadata, + // multiply the number of SIMDs by 2, to account for 2CUs in 1 WGP. + uint32_t simdPerCU = device.isa().simdPerCU(); + if (wrkGrpInfo->isWGPMode_) { + simdPerCU *= 2; + } + const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves); const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;