diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index f181218413..a800bf2839 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -958,7 +958,8 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( size_t offset; switch (it.info_.oclObject_) { case amd::KernelParameterDescriptor::HiddenNone: - //WriteAqlArgAt(aqlArgBuf, &zero, it.size_, it.offset_); + // void* zero = 0; + // WriteAqlArgAt(const_cast
(parameters), &zero, it.size_, it.offset_); break; case amd::KernelParameterDescriptor::HiddenGlobalOffsetX: offset = sizes.offset()[0]; @@ -1001,8 +1002,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( } // Load all kernel arguments - WriteAqlArgAt(aqlArgBuf, parameters, signature.paramsSize(), 0); - assert(argsBufferSize() == amd::alignUp(signature.paramsSize(), 16) && + WriteAqlArgAt(aqlArgBuf, parameters, argsBufferSize(), 0); + // Note: In a case of structs the size won't match, + // since HSAIL compiler expects a reference... + assert(argsBufferSize() <= signature.paramsSize() && "A mismatch of sizes of arguments between compiler and runtime!"); //hsa_kernel_dispatch_packet_t disp; diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 7c45951176..d441dc981b 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -3115,7 +3115,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p else if (desc.type_ == T_VOID) { if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) { // Copy the current structure into CB1 - size_t gpuPtr = static_cast(cb(1)->UploadDataToHw(params, desc.size_)); + size_t gpuPtr = static_cast(cb(1)->UploadDataToHw( + params + desc.offset_, desc.size_)); // Then use a pointer in aqlArgBuffer to CB1 const auto it = hsaKernel.patch().find(desc.offset_); // Patch the GPU VA address in the original arguments diff --git a/projects/clr/rocclr/runtime/platform/kernel.cpp b/projects/clr/rocclr/runtime/platform/kernel.cpp index a4616033aa..d710170fe8 100644 --- a/projects/clr/rocclr/runtime/platform/kernel.cpp +++ b/projects/clr/rocclr/runtime/platform/kernel.cpp @@ -295,7 +295,8 @@ KernelSignature::KernelSignature(const std::vector& p lastSize = alignUp(lastSize, sizeof(uint64_t)); } paramsSize_ = params[last].offset_ + lastSize; - paramsSize_ = alignUp(paramsSize_, sizeof(intptr_t)); + // 16 bytes is the current HW alignment for the arguments + paramsSize_ = alignUp(paramsSize_, 16); } } } // namespace amd