SWDEV-373173 - Correct max VGPRs and VGPR Granularity in Occupancy calculation for different asics

Change-Id: I6f595428ed31733cdc04e65e7c3e383f454ab72b


[ROCm/clr commit: 2b50636809]
Este commit está contenido en:
Anusha GodavarthySurya
2023-01-17 04:16:40 +00:00
cometido por Anusha Godavarthy Surya
padre 339bb6a7de
commit 6ae2298d06
Se han modificado 3 ficheros con 66 adiciones y 1 borrados
+5
Ver fichero
@@ -613,6 +613,11 @@ struct Info : public amd::EmbeddedObject {
size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity
uint32_t driverNodeId_;
//! Number of Physical SGPRs per SIMD
uint32_t sgprsPerSimd_;
//! Number of VGPRs per SIMD
uint32_t vgprsPerSimd_;
uint32_t vgprAllocGranularity_;
};
//! Device settings
+5 -1
Ver fichero
@@ -632,7 +632,11 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
}
}
info_.virtualMemoryManagement_ = true;
info_.virtualMemAllocGranularity_ = static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
info_.virtualMemAllocGranularity_ =
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity;
info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd;
info_.sgprsPerSimd_ = palProp.gfxipProperties.shaderCore.sgprsPerSimd;
}
Device::XferBuffers::~XferBuffers() {
@@ -1648,7 +1648,63 @@ bool Device::populateOCLDeviceConstants() {
info_.globalCUMask_ = {};
info_.virtualMemoryManagement_ = false;
switch (isa().versionMajor()) {
case (11):
if (isa().versionMinor() == 0) {
switch (isa().versionStepping()) {
case (0):
case (1):
info_.vgprAllocGranularity_ = 24;
info_.vgprsPerSimd_ = 1536;
break;
case (2):
case (3):
default:
info_.vgprAllocGranularity_ = 16;
info_.vgprsPerSimd_ = 1024;
break;
}
}
break;
case (10):
switch (isa().versionMinor()) {
case (0):
case (1):
info_.vgprAllocGranularity_ = 8;
info_.vgprsPerSimd_ = 1024;
break;
case (3):
default:
info_.vgprAllocGranularity_ = 16;
info_.vgprsPerSimd_ = 1024;
break;
}
break;
case (9):
if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) ||
(isa().versionMinor() == 4 && isa().versionStepping() == 0)) {
info_.vgprAllocGranularity_ = 8;
info_.vgprsPerSimd_ = 512;
} else {
info_.vgprAllocGranularity_ = 4;
info_.vgprsPerSimd_ = 256;
}
break;
default:
// For gfx<=8
info_.vgprAllocGranularity_ = 4;
info_.vgprsPerSimd_ = 256;
break;
}
if (isa().versionMajor() < 8) {
info_.sgprsPerSimd_ = 512;
} else if (isa().versionMajor() < 10) {
info_.sgprsPerSimd_ = 800;
} else {
info_.sgprsPerSimd_ =
std::numeric_limits<uint32_t>::max(); // gfx10+ does not share SGPRs between waves
}
return true;
}