diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index 6abfd86ba6..f35ac18b7a 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -420,7 +420,6 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) { return false; } - assert((akc->workitem_private_segment_byte_size & 3) == 0 && "Scratch must be DWORD aligned"); workGroupInfo_.scratchRegs_ = amd::alignUp(akc->workitem_private_segment_byte_size, 16) / sizeof(uint); workGroupInfo_.privateMemSize_ = akc->workitem_private_segment_byte_size; diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp index 7ffc144c8b..d62cabb436 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp @@ -161,7 +161,7 @@ class HSAILKernel : public device::Kernel { size_t argsBufferSize() const { return cpuAqlCode_->kernarg_segment_byte_size; } //! Returns spill reg size per workitem - int spillSegSize() const { return cpuAqlCode_->workitem_private_segment_byte_size; } + int spillSegSize() const { return amd::alignUp(cpuAqlCode_->workitem_private_segment_byte_size, sizeof(uint32_t)); } //! Returns TRUE if kernel uses dynamic parallelism bool dynamicParallelism() const { return (flags_.dynamicParallelism_) ? true : false; }