P4 to Git Change 1838371 by gandryey@gera-win10 on 2019/06/14 18:28:24
SWDEV-79445 - OCL generic changes and code clean-up
Align scratch buffer size to match the HW requirement. HSAIL path has an alignment in SC, but LC doesn't have any
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#79 edit
[ROCm/clr commit: 0225412522]
Bu işleme şunda yer alıyor:
@@ -29,8 +29,13 @@ namespace pal {
|
||||
void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
|
||||
const uint32_t groupSegmentSize, const uint16_t numSGPRs,
|
||||
const uint16_t numVGPRs) {
|
||||
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint);
|
||||
workGroupInfo_.privateMemSize_ = privateSegmentSize;
|
||||
workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint32_t);
|
||||
// Make sure runtime matches HW alignment, which is 256 scratch regs (DWORDs) per wave
|
||||
constexpr uint32_t ScratchRegAlignment = 256;
|
||||
workGroupInfo_.scratchRegs_ =
|
||||
amd::alignUp((workGroupInfo_.scratchRegs_ * dev().info().wavefrontWidth_),
|
||||
ScratchRegAlignment) / dev().info().wavefrontWidth_;
|
||||
workGroupInfo_.privateMemSize_ = workGroupInfo_.scratchRegs_ * sizeof(uint32_t);
|
||||
workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
|
||||
workGroupInfo_.usedSGPRs_ = numSGPRs;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle