P4 to Git Change 1838371 by gandryey@gera-win10 on 2019/06/14 18:28:24

SWDEV-79445 - OCL generic changes and code clean-up
	Align scratch buffer size to match the HW requirement. HSAIL path has an alignment in SC, but LC doesn't have any

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#79 edit


[ROCm/clr commit: 0225412522]
Bu işleme şunda yer alıyor:
foreman
2019-06-14 18:36:26 -04:00
ebeveyn 980e5279bc
işleme e80fefab96
+7 -2
Dosyayı Görüntüle
@@ -29,8 +29,13 @@ namespace pal {
void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
const uint16_t numVGPRs) {
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint);
workGroupInfo_.privateMemSize_ = privateSegmentSize;
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t);
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
constexpr uint32_t ScratchRegAlignment = 256;
workGroupInfo_.scratchRegs_ =
amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
ScratchRegAlignment) / dev().info().wavefrontWidth_;
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
workGroupInfo_.usedSGPRs_ = numSGPRs;
workGroupInfo_.usedStackSize_ = 0;