From 4e580ef2a023b27e4155697af9252259fb06bfdf Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 13 Jun 2018 12:01:00 -0400 Subject: [PATCH] P4 to Git Change 1567789 by gandryey@gera-w8 on 2018/06/13 11:45:56 SWDEV-79445 - OCL generic changes and code clean-up Following CL#1567428. Fix "pass by reference" logic: - Make sure we add the offset to the struct location - Adjust assert condition Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#54 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#108 edit ... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#31 edit [ROCm/clr commit: ec029f4a400feb011e8c18b4bf97c89a5f4e53f1] --- projects/clr/rocclr/runtime/device/pal/palkernel.cpp | 9 ++++++--- projects/clr/rocclr/runtime/device/pal/palvirtual.cpp | 3 ++- projects/clr/rocclr/runtime/platform/kernel.cpp | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp index f181218413..a800bf2839 100644 --- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp @@ -958,7 +958,8 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( size_t offset; switch (it.info_.oclObject_) { case amd::KernelParameterDescriptor::HiddenNone: - //WriteAqlArgAt(aqlArgBuf, &zero, it.size_, it.offset_); + // void* zero = 0; + // WriteAqlArgAt(const_cast
(parameters), &zero, it.size_, it.offset_); break; case amd::KernelParameterDescriptor::HiddenGlobalOffsetX: offset = sizes.offset()[0]; @@ -1001,8 +1002,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments( } // Load all kernel arguments - WriteAqlArgAt(aqlArgBuf, parameters, signature.paramsSize(), 0); - assert(argsBufferSize() == amd::alignUp(signature.paramsSize(), 16) && + WriteAqlArgAt(aqlArgBuf, parameters, argsBufferSize(), 0); + // Note: In a case of structs the size won't match, + // since HSAIL compiler expects a reference... + assert(argsBufferSize() <= signature.paramsSize() && "A mismatch of sizes of arguments between compiler and runtime!"); //hsa_kernel_dispatch_packet_t disp; diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 7c45951176..d441dc981b 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -3115,7 +3115,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p else if (desc.type_ == T_VOID) { if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) { // Copy the current structure into CB1 - size_t gpuPtr = static_cast(cb(1)->UploadDataToHw(params, desc.size_)); + size_t gpuPtr = static_cast(cb(1)->UploadDataToHw( + params + desc.offset_, desc.size_)); // Then use a pointer in aqlArgBuffer to CB1 const auto it = hsaKernel.patch().find(desc.offset_); // Patch the GPU VA address in the original arguments diff --git a/projects/clr/rocclr/runtime/platform/kernel.cpp b/projects/clr/rocclr/runtime/platform/kernel.cpp index a4616033aa..d710170fe8 100644 --- a/projects/clr/rocclr/runtime/platform/kernel.cpp +++ b/projects/clr/rocclr/runtime/platform/kernel.cpp @@ -295,7 +295,8 @@ KernelSignature::KernelSignature(const std::vector& p lastSize = alignUp(lastSize, sizeof(uint64_t)); } paramsSize_ = params[last].offset_ + lastSize; - paramsSize_ = alignUp(paramsSize_, sizeof(intptr_t)); + // 16 bytes is the current HW alignment for the arguments + paramsSize_ = alignUp(paramsSize_, 16); } } } // namespace amd