From 5dff283f4eb60ecc1739b346e4c04d0d25bc3752 Mon Sep 17 00:00:00 2001 From: "Jayaprakash, Karthik" Date: Mon, 12 May 2025 11:09:03 -0400 Subject: [PATCH] SWDEV-526855 - Modify the SIMDPerCU calculation for gfx1250/1. (#275) [ROCm/clr commit: acb1f7e8d5e16ed9eca5b56dafc42e3c10811b52] --- projects/clr/hipamd/src/hip_platform.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/projects/clr/hipamd/src/hip_platform.cpp b/projects/clr/hipamd/src/hip_platform.cpp index ea095adf81..ae50e4ce09 100644 --- a/projects/clr/hipamd/src/hip_platform.cpp +++ b/projects/clr/hipamd/src/hip_platform.cpp @@ -404,8 +404,14 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16); GprWaves = std::min(VgprWaves, SgprWaves); } - uint32_t simdPerCU = (device.isa().versionMajor() <= 9) ? device.info().simdPerCU_ - : (wrkGrpInfo->isWGPMode_ ? 4 : 2); + + // The table contains SIMD per CU, not per WGP, so when WGP mode is set on kernel metadata, + // multiply the number of SIMDs by 2, to account for 2CUs in 1 WGP. + uint32_t simdPerCU = device.isa().simdPerCU(); + if (wrkGrpInfo->isWGPMode_) { + simdPerCU *= 2; + } + const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves); const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;