From 083b8ee0b41c074875e18cb37a03386edca2ab11 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 22 Sep 2017 11:05:41 -0400
Subject: [PATCH] P4 to Git Change 1462147 by asalmanp@asalmanp-ocl-stg on
2017/09/22 10:49:57
SWDEV-132899 - [OCL][GFX10] Add support for GFX10
Adjusting WaveFrontSize for Null Devices based on the gfxip (the WaveFrontSize is 32 for gfxip10)
ReviewBoardURL = http://ocltc.amd.com/reviews/r/13486/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#37 edit
[ROCm/clr commit: cb4585939dd4c104ca0f494dceb29071b98fe25a]
---
projects/clr/rocclr/runtime/device/pal/paldevice.cpp | 6 ++++--
projects/clr/rocclr/runtime/device/pal/palkernel.cpp | 8 +++-----
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index 768c25bec0..2e448255d9 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -229,6 +229,8 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
// Runtime doesn't know what local size could be on the real board
info_.maxGlobalVariableSize_ = static_cast(512 * Mi);
+ info_.wavefrontWidth_ = (ipLevel >= Pal::GfxIpLevel::GfxIp10) ? 32 : 64;
+
return true;
}
@@ -1796,7 +1798,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
uint sb = vgpu->hwRing();
static const uint WaveSizeLimit = ((1 << 21) - 256);
const uint threadSizeLimit =
- WaveSizeLimit / properties().gfxipProperties.shaderCore.wavefrontSize;
+ WaveSizeLimit / info().wavefrontWidth_;
if (regNum > threadSizeLimit) {
LogError("Requested private memory is bigger than HW supports!");
regNum = threadSizeLimit;
@@ -1819,7 +1821,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
uint32_t numTotalCUs = info().maxComputeUnits_;
uint32_t numMaxWaves = settings().numScratchWavesPerCu_ * numTotalCUs;
scratchBuf->size_ =
- static_cast(properties().gfxipProperties.shaderCore.wavefrontSize) *
+ static_cast(info().wavefrontWidth_) *
scratchBuf->regNum_ * numMaxWaves * sizeof(uint32_t);
scratchBuf->size_ = std::min(scratchBuf->size_, info().maxMemAllocSize_);
scratchBuf->size_ = std::min(scratchBuf->size_, uint64_t(3 * Gi));
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index 526e5199a0..a99b9bb93f 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -406,7 +406,7 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
workGroupInfo_.availableVGPRs_ =
dev().properties().gfxipProperties.shaderCore.numAvailableVgprs;
workGroupInfo_.preferredSizeMultiple_ = workGroupInfo_.wavefrontPerSIMD_ =
- dev().properties().gfxipProperties.shaderCore.wavefrontSize;
+ dev().info().wavefrontWidth_;
} else {
workGroupInfo_.availableLDSSize_ = 64 * Ki;
workGroupInfo_.availableSGPRs_ = 104;
@@ -652,8 +652,7 @@ bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
}
// Copy wavefront size
- workGroupInfo_.wavefrontSize_ =
- prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
+ workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *
@@ -1533,8 +1532,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
}
// Copy wavefront size
- workGroupInfo_.wavefrontSize_ =
- prog().isNull() ? 64 : dev().properties().gfxipProperties.shaderCore.wavefrontSize;
+ workGroupInfo_.wavefrontSize_ = dev().info().wavefrontWidth_;
// Find total workgroup size
if (workGroupInfo_.compileSize_[0] != 0) {
workGroupInfo_.size_ = workGroupInfo_.compileSize_[0] * workGroupInfo_.compileSize_[1] *