From 4e580ef2a023b27e4155697af9252259fb06bfdf Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 13 Jun 2018 12:01:00 -0400
Subject: [PATCH] P4 to Git Change 1567789 by gandryey@gera-w8 on 2018/06/13
11:45:56
SWDEV-79445 - OCL generic changes and code clean-up
Following CL#1567428. Fix "pass by reference" logic:
- Make sure we add the offset to the struct location
- Adjust assert condition
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#108 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#31 edit
[ROCm/clr commit: ec029f4a400feb011e8c18b4bf97c89a5f4e53f1]
---
projects/clr/rocclr/runtime/device/pal/palkernel.cpp | 9 ++++++---
projects/clr/rocclr/runtime/device/pal/palvirtual.cpp | 3 ++-
projects/clr/rocclr/runtime/platform/kernel.cpp | 3 ++-
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index f181218413..a800bf2839 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -958,7 +958,8 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
size_t offset;
switch (it.info_.oclObject_) {
case amd::KernelParameterDescriptor::HiddenNone:
- //WriteAqlArgAt(aqlArgBuf, &zero, it.size_, it.offset_);
+ // void* zero = 0;
+ // WriteAqlArgAt(const_cast(parameters), &zero, it.size_, it.offset_);
break;
case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
offset = sizes.offset()[0];
@@ -1001,8 +1002,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
}
// Load all kernel arguments
- WriteAqlArgAt(aqlArgBuf, parameters, signature.paramsSize(), 0);
- assert(argsBufferSize() == amd::alignUp(signature.paramsSize(), 16) &&
+ WriteAqlArgAt(aqlArgBuf, parameters, argsBufferSize(), 0);
+ // Note: In a case of structs the size won't match,
+ // since HSAIL compiler expects a reference...
+ assert(argsBufferSize() <= signature.paramsSize() &&
"A mismatch of sizes of arguments between compiler and runtime!");
//hsa_kernel_dispatch_packet_t disp;
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 7c45951176..d441dc981b 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -3115,7 +3115,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
else if (desc.type_ == T_VOID) {
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) {
// Copy the current structure into CB1
- size_t gpuPtr = static_cast(cb(1)->UploadDataToHw(params, desc.size_));
+ size_t gpuPtr = static_cast(cb(1)->UploadDataToHw(
+ params + desc.offset_, desc.size_));
// Then use a pointer in aqlArgBuffer to CB1
const auto it = hsaKernel.patch().find(desc.offset_);
// Patch the GPU VA address in the original arguments
diff --git a/projects/clr/rocclr/runtime/platform/kernel.cpp b/projects/clr/rocclr/runtime/platform/kernel.cpp
index a4616033aa..d710170fe8 100644
--- a/projects/clr/rocclr/runtime/platform/kernel.cpp
+++ b/projects/clr/rocclr/runtime/platform/kernel.cpp
@@ -295,7 +295,8 @@ KernelSignature::KernelSignature(const std::vector& p
lastSize = alignUp(lastSize, sizeof(uint64_t));
}
paramsSize_ = params[last].offset_ + lastSize;
- paramsSize_ = alignUp(paramsSize_, sizeof(intptr_t));
+ // 16 bytes is the current HW alignment for the arguments
+ paramsSize_ = alignUp(paramsSize_, 16);
}
}
} // namespace amd