SWDEV-526855 - Modify the SIMDPerCU calculation for gfx1250/1. (#275)

[ROCm/clr commit: acb1f7e8d5]
Dieser Commit ist enthalten in:
Jayaprakash, Karthik
2025-05-12 11:09:03 -04:00
committet von GitHub
Ursprung 3d862af4df
Commit 5dff283f4e
+8 -2
Datei anzeigen
@@ -404,8 +404,14 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
GprWaves = std::min(VgprWaves, SgprWaves);
}
uint32_t simdPerCU = (device.isa().versionMajor() <= 9) ? device.info().simdPerCU_
: (wrkGrpInfo->isWGPMode_ ? 4 : 2);
// The table contains SIMD per CU, not per WGP, so when WGP mode is set on kernel metadata,
// multiply the number of SIMDs by 2, to account for 2CUs in 1 WGP.
uint32_t simdPerCU = device.isa().simdPerCU();
if (wrkGrpInfo->isWGPMode_) {
simdPerCU *= 2;
}
const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves);
const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;