SWDEV-281034 - Occupancy calculations for gfx10.3 adjustments based on available VGPRs, SIMDs per CU and number of waves per SIMD.
Change-Id: I0e34decebc9f730fe0e0c3c3258f301de5748e72
Этот коммит содержится в:
коммит произвёл
Sourabh Betigeri
родитель
20eff6c298
Коммит
ddff87b413
@@ -346,10 +346,34 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
}
|
||||
}
|
||||
// Find wave occupancy per CU => simd_per_cu * GPR usage
|
||||
constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
|
||||
size_t MaxWavesPerSimd;
|
||||
|
||||
if (device.isa().versionMajor() <= 9) {
|
||||
MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
|
||||
} else {
|
||||
MaxWavesPerSimd = 16;
|
||||
}
|
||||
size_t VgprWaves = MaxWavesPerSimd;
|
||||
if (wrkGrpInfo->usedVGPRs_ > 0) {
|
||||
VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4);
|
||||
size_t maxVGPRs;
|
||||
uint32_t VgprGranularity;
|
||||
if (device.isa().versionMajor() <= 9) {
|
||||
if (device.isa().versionMajor() == 9 &&
|
||||
device.isa().versionMinor() == 0 &&
|
||||
device.isa().versionStepping() == 10) {
|
||||
maxVGPRs = 512;
|
||||
VgprGranularity = 8;
|
||||
}
|
||||
else {
|
||||
maxVGPRs = 256;
|
||||
VgprGranularity = 4;
|
||||
}
|
||||
}
|
||||
else {
|
||||
maxVGPRs = 1024;
|
||||
VgprGranularity = 8;
|
||||
}
|
||||
if (wrkGrpInfo->usedSGPRs_ > 0) {
|
||||
VgprWaves = maxVGPRs / amd::alignUp(wrkGrpInfo->usedVGPRs_, VgprGranularity);
|
||||
}
|
||||
|
||||
size_t GprWaves = VgprWaves;
|
||||
|
||||
Ссылка в новой задаче
Block a user