diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index fe276be2f4..64d6746598 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -613,6 +613,11 @@ struct Info : public amd::EmbeddedObject { size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity uint32_t driverNodeId_; + //! Number of Physical SGPRs per SIMD + uint32_t sgprsPerSimd_; + //! Number of VGPRs per SIMD + uint32_t vgprsPerSimd_; + uint32_t vgprAllocGranularity_; }; //! Device settings diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index ae3df532bc..c82546267a 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -632,7 +632,11 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, } } info_.virtualMemoryManagement_ = true; - info_.virtualMemAllocGranularity_ = static_cast(palProp.gpuMemoryProperties.virtualMemAllocGranularity); + info_.virtualMemAllocGranularity_ = + static_cast(palProp.gpuMemoryProperties.virtualMemAllocGranularity); + info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity; + info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd; + info_.sgprsPerSimd_ = palProp.gfxipProperties.shaderCore.sgprsPerSimd; } Device::XferBuffers::~XferBuffers() { diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 9a9849d56a..39e8dce18a 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1648,7 +1648,63 @@ bool Device::populateOCLDeviceConstants() { info_.globalCUMask_ = {}; info_.virtualMemoryManagement_ = false; + switch (isa().versionMajor()) { + case (11): + if (isa().versionMinor() == 0) { + switch (isa().versionStepping()) { + case (0): + case (1): + info_.vgprAllocGranularity_ = 24; + info_.vgprsPerSimd_ = 1536; + break; + case (2): + case (3): + default: + info_.vgprAllocGranularity_ = 16; + info_.vgprsPerSimd_ = 1024; + break; + } + } + break; + case (10): + switch (isa().versionMinor()) { + case (0): + case (1): + info_.vgprAllocGranularity_ = 8; + info_.vgprsPerSimd_ = 1024; + break; + case (3): + default: + info_.vgprAllocGranularity_ = 16; + info_.vgprsPerSimd_ = 1024; + break; + } + break; + case (9): + if ((isa().versionMinor() == 0 && isa().versionStepping() == 10) || + (isa().versionMinor() == 4 && isa().versionStepping() == 0)) { + info_.vgprAllocGranularity_ = 8; + info_.vgprsPerSimd_ = 512; + } else { + info_.vgprAllocGranularity_ = 4; + info_.vgprsPerSimd_ = 256; + } + break; + default: + // For gfx<=8 + info_.vgprAllocGranularity_ = 4; + info_.vgprsPerSimd_ = 256; + break; + } + if (isa().versionMajor() < 8) { + info_.sgprsPerSimd_ = 512; + } else if (isa().versionMajor() < 10) { + info_.sgprsPerSimd_ = 800; + } else { + info_.sgprsPerSimd_ = + std::numeric_limits::max(); // gfx10+ does not share SGPRs between waves + } return true; }