P4 to Git Change 1567789 by gandryey@gera-w8 on 2018/06/13 11:45:56
SWDEV-79445 - OCL generic changes and code clean-up
Following CL#1567428. Fix "pass by reference" logic:
- Make sure we add the offset to the struct location
- Adjust assert condition
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#54 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#108 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#31 edit
[ROCm/clr commit: ec029f4a40]
Este commit está contenido en:
@@ -958,7 +958,8 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
|
||||
size_t offset;
|
||||
switch (it.info_.oclObject_) {
|
||||
case amd::KernelParameterDescriptor::HiddenNone:
|
||||
//WriteAqlArgAt(aqlArgBuf, &zero, it.size_, it.offset_);
|
||||
// void* zero = 0;
|
||||
// WriteAqlArgAt(const_cast<address>(parameters), &zero, it.size_, it.offset_);
|
||||
break;
|
||||
case amd::KernelParameterDescriptor::HiddenGlobalOffsetX:
|
||||
offset = sizes.offset()[0];
|
||||
@@ -1001,8 +1002,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
|
||||
}
|
||||
|
||||
// Load all kernel arguments
|
||||
WriteAqlArgAt(aqlArgBuf, parameters, signature.paramsSize(), 0);
|
||||
assert(argsBufferSize() == amd::alignUp(signature.paramsSize(), 16) &&
|
||||
WriteAqlArgAt(aqlArgBuf, parameters, argsBufferSize(), 0);
|
||||
// Note: In a case of structs the size won't match,
|
||||
// since HSAIL compiler expects a reference...
|
||||
assert(argsBufferSize() <= signature.paramsSize() &&
|
||||
"A mismatch of sizes of arguments between compiler and runtime!");
|
||||
|
||||
//hsa_kernel_dispatch_packet_t disp;
|
||||
|
||||
@@ -3115,7 +3115,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
else if (desc.type_ == T_VOID) {
|
||||
if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) {
|
||||
// Copy the current structure into CB1
|
||||
size_t gpuPtr = static_cast<size_t>(cb(1)->UploadDataToHw(params, desc.size_));
|
||||
size_t gpuPtr = static_cast<size_t>(cb(1)->UploadDataToHw(
|
||||
params + desc.offset_, desc.size_));
|
||||
// Then use a pointer in aqlArgBuffer to CB1
|
||||
const auto it = hsaKernel.patch().find(desc.offset_);
|
||||
// Patch the GPU VA address in the original arguments
|
||||
|
||||
@@ -295,7 +295,8 @@ KernelSignature::KernelSignature(const std::vector<KernelParameterDescriptor>& p
|
||||
lastSize = alignUp(lastSize, sizeof(uint64_t));
|
||||
}
|
||||
paramsSize_ = params[last].offset_ + lastSize;
|
||||
paramsSize_ = alignUp(paramsSize_, sizeof(intptr_t));
|
||||
// 16 bytes is the current HW alignment for the arguments
|
||||
paramsSize_ = alignUp(paramsSize_, 16);
|
||||
}
|
||||
}
|
||||
} // namespace amd
|
||||
|
||||
Referencia en una nueva incidencia
Block a user