SWDEV-475938 - Update dynamic stack in submit kernel internal.

Change-Id: I816bf9cfe8aaac5486ff3b719dbdc4f4d6134e01
This commit is contained in:
Jaydeep Patel
2024-07-26 17:58:58 +00:00
committed by Jaydeepkumar Patel
parent e6a5c81221
commit 9c90bc43a5
3 changed files with 13 additions and 4 deletions
+1 -1
View File
@@ -909,7 +909,7 @@ bool Device::UpdateStackSize(uint64_t stackSize) {
if (stackSize > kStackSize) {
return false;
}
stack_size_ = stackSize;
stack_size_ = amd::alignUp(stackSize, 16);
return true;
}
+2 -1
View File
@@ -2277,7 +2277,8 @@ bool Device::validateKernel(const amd::Kernel& kernel, const device::VirtualDevi
bool coop_groups) {
// Find the number of scratch registers used in the kernel
const device::Kernel* devKernel = kernel.getDeviceKernel(*this);
uint regNum = static_cast<uint>(devKernel->workGroupInfo()->scratchRegs_);
uint32_t regNum = static_cast<uint32_t>(devKernel->workGroupInfo()->scratchRegs_);
regNum = std::max<uint32_t>(static_cast<uint32_t>(stack_size_) / sizeof(uint32_t), regNum);
const VirtualGPU* vgpu = static_cast<const VirtualGPU*>(vdev);
if (!allocScratch(regNum, vgpu, devKernel->workGroupInfo()->usedVGPRs_)) {
+10 -2
View File
@@ -2685,16 +2685,24 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes,
LogError("Couldn't load kernel arguments");
return false;
}
// Dynamic call stack size is considered to calculate private segment size and scratch regs
// in LightningKernel::postLoad(). As it is not called during hipModuleLaunchKernel unlike
// hipLaunchKernel/hipLaunchKernelGGL, Updated value is passed to dispatch packet.
size_t privateMemSize = hsaKernel.spillSegSize();
if ((hsaKernel.workGroupInfo()->usedStackSize_ & 0x1) == 0x1) {
privateMemSize = std::max<uint32_t>(static_cast<uint32_t>(device().StackSize()),
hsaKernel.workGroupInfo()->scratchRegs_ * sizeof(uint32_t)) ;
}
// Set up the dispatch information
Pal::DispatchAqlParams dispatchParam = {};
dispatchParam.pAqlPacket = aqlPkt;
if (hsaKernel.workGroupInfo()->scratchRegs_ > 0) {
if (privateMemSize > 0) {
const Device::ScratchBuffer* scratch = dev().scratch(hwRing());
dispatchParam.scratchAddr = scratch->memObj_->vmAddress();
dispatchParam.scratchSize = scratch->size_;
dispatchParam.scratchOffset = scratch->offset_;
dispatchParam.workitemPrivateSegmentSize = hsaKernel.spillSegSize();
dispatchParam.workitemPrivateSegmentSize = privateMemSize;
}
dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlCode();
dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress();