SWDEV-373173 - Correct max VGPRs and VGPR Granularity in Occupancy calculation for different asics
Change-Id: I6f595428ed31733cdc04e65e7c3e383f454ab72b
[ROCm/clr commit: 2b50636809]
Este commit está contenido en:
cometido por
Anusha Godavarthy Surya
padre
339bb6a7de
commit
6ae2298d06
@@ -613,6 +613,11 @@ struct Info : public amd::EmbeddedObject {
|
||||
size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity
|
||||
|
||||
uint32_t driverNodeId_;
|
||||
//! Number of Physical SGPRs per SIMD
|
||||
uint32_t sgprsPerSimd_;
|
||||
//! Number of VGPRs per SIMD
|
||||
uint32_t vgprsPerSimd_;
|
||||
uint32_t vgprAllocGranularity_;
|
||||
};
|
||||
|
||||
//! Device settings
|
||||
|
||||
@@ -632,7 +632,11 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
}
|
||||
}
|
||||
info_.virtualMemoryManagement_ = true;
|
||||
info_.virtualMemAllocGranularity_ = static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
|
||||
info_.virtualMemAllocGranularity_ =
|
||||
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
|
||||
info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity;
|
||||
info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd;
|
||||
info_.sgprsPerSimd_ = palProp.gfxipProperties.shaderCore.sgprsPerSimd;
|
||||
}
|
||||
|
||||
Device::XferBuffers::~XferBuffers() {
|
||||
|
||||
@@ -1648,7 +1648,63 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
info_.globalCUMask_ = {};
|
||||
info_.virtualMemoryManagement_ = false;
|
||||
switch (isa().versionMajor()) {
|
||||
case (11):
|
||||
if (isa().versionMinor() == 0) {
|
||||
switch (isa().versionStepping()) {
|
||||
case (0):
|
||||
case (1):
|
||||
info_.vgprAllocGranularity_ = 24;
|
||||
info_.vgprsPerSimd_ = 1536;
|
||||
break;
|
||||
case (2):
|
||||
case (3):
|
||||
default:
|
||||
info_.vgprAllocGranularity_ = 16;
|
||||
info_.vgprsPerSimd_ = 1024;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case (10):
|
||||
switch (isa().versionMinor()) {
|
||||
case (0):
|
||||
case (1):
|
||||
info_.vgprAllocGranularity_ = 8;
|
||||
info_.vgprsPerSimd_ = 1024;
|
||||
break;
|
||||
case (3):
|
||||
default:
|
||||
info_.vgprAllocGranularity_ = 16;
|
||||
info_.vgprsPerSimd_ = 1024;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case (9):
|
||||
if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) ||
|
||||
(isa().versionMinor() == 4 && isa().versionStepping() == 0)) {
|
||||
info_.vgprAllocGranularity_ = 8;
|
||||
info_.vgprsPerSimd_ = 512;
|
||||
} else {
|
||||
info_.vgprAllocGranularity_ = 4;
|
||||
info_.vgprsPerSimd_ = 256;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// For gfx<=8
|
||||
info_.vgprAllocGranularity_ = 4;
|
||||
info_.vgprsPerSimd_ = 256;
|
||||
break;
|
||||
}
|
||||
|
||||
if (isa().versionMajor() < 8) {
|
||||
info_.sgprsPerSimd_ = 512;
|
||||
} else if (isa().versionMajor() < 10) {
|
||||
info_.sgprsPerSimd_ = 800;
|
||||
} else {
|
||||
info_.sgprsPerSimd_ =
|
||||
std::numeric_limits<uint32_t>::max(); // gfx10+ does not share SGPRs between waves
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user