From ddff87b4138dfd419eab3fa419246df8f6a015a5 Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Mon, 3 May 2021 11:26:08 -0700 Subject: [PATCH] SWDEV-281034 - Occupancy calculations for gfx10.3 adjustments based on available VGPRs, SIMDs per CU and number of waves per SIMD. Change-Id: I0e34decebc9f730fe0e0c3c3258f301de5748e72 --- hipamd/rocclr/hip_platform.cpp | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/hipamd/rocclr/hip_platform.cpp b/hipamd/rocclr/hip_platform.cpp index 01390fd9e3..f99f4a30e5 100755 --- a/hipamd/rocclr/hip_platform.cpp +++ b/hipamd/rocclr/hip_platform.cpp @@ -346,10 +346,34 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( } } // Find wave occupancy per CU => simd_per_cu * GPR usage - constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + size_t MaxWavesPerSimd; + + if (device.isa().versionMajor() <= 9) { + MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD + } else { + MaxWavesPerSimd = 16; + } size_t VgprWaves = MaxWavesPerSimd; - if (wrkGrpInfo->usedVGPRs_ > 0) { - VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4); + size_t maxVGPRs; + uint32_t VgprGranularity; + if (device.isa().versionMajor() <= 9) { + if (device.isa().versionMajor() == 9 && + device.isa().versionMinor() == 0 && + device.isa().versionStepping() == 10) { + maxVGPRs = 512; + VgprGranularity = 8; + } + else { + maxVGPRs = 256; + VgprGranularity = 4; + } + } + else { + maxVGPRs = 1024; + VgprGranularity = 8; + } + if (wrkGrpInfo->usedSGPRs_ > 0) { + VgprWaves = maxVGPRs / amd::alignUp(wrkGrpInfo->usedVGPRs_, VgprGranularity); } size_t GprWaves = VgprWaves;