From e80fefab9635fe81ff9e23ad75d75babc17071ff Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 14 Jun 2019 18:36:26 -0400
Subject: [PATCH] P4 to Git Change 1838371 by gandryey@gera-win10 on 2019/06/14
18:28:24
SWDEV-79445 - OCL generic changes and code clean-up
Align scratch buffer size to match the HW requirement. HSAIL path has an alignment in SC, but LC doesn't have any
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#79 edit
[ROCm/clr commit: 0225412522c7a956e67001e056e7cb60699140ea]
---
projects/clr/rocclr/runtime/device/pal/palkernel.cpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index 7a4823ddaa..de15c52a93 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -29,8 +29,13 @@ namespace pal {
void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
const uint16_t numVGPRs) {
- workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint);
- workGroupInfo_.privateMemSize_ = privateSegmentSize;
+ workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t);
+ // Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
+ constexpr uint32_t ScratchRegAlignment = 256;
+ workGroupInfo_.scratchRegs_ =
+ amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
+ ScratchRegAlignment) / dev().info().wavefrontWidth_;
+ workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
workGroupInfo_.usedSGPRs_ = numSGPRs;
workGroupInfo_.usedStackSize_ = 0;