P4 to Git Change 1150348 by rayxiao@alit_opencl_rayxiao on 2015/05/13 10:49:22

EPR #396242 - Solution to cpu device alignment bug.

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/edg/src/amd_ocl_attribute.c#24 edit
... //depot/stg/opencl/drivers/opencl/compiler/edg/src/cmd_line.c#86 edit
... //depot/stg/opencl/drivers/opencl/compiler/edg/src/il.c#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#1 add
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#65 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#231 edit
... //depot/stg/opencl/drivers/opencl/tools/runocl/options.c#11 edit
This commit is contained in:
foreman
2015-05-13 12:01:50 -04:00
rodzic ca22504f20
commit 70aabc5325
6 zmienionych plików z 462 dodań i 10 usunięć
+18 -4
Wyświetl plik
@@ -14,6 +14,7 @@
#include "thread/thread.hpp"
#include "os/os.hpp"
#include "utils/util.hpp"
#include "utils/options.hpp"
#include <amdocl/cl_kernel.h>
@@ -302,7 +303,6 @@ NDRangeKernelBatch::patchParameters(
size_t alignment = cpuKernel.getArgAlignment(i);
effectiveOffset = amd::alignUp(effectiveOffset, std::min(alignment, size_t(16)));
param = params + effectiveOffset;
if (desc.size_ == 0) {
// __local memory parameter
localMemPtr = amd::alignUp(localMemPtr, sizeof(cl_long16));
@@ -362,10 +362,24 @@ NDRangeKernelBatch::patchParameters(
*reinterpret_cast<uint32_t*>(param) = (uint32_t)samplerArg->state();
}
else {
::memcpy(param, cmdParam, desc.size_);
//Using HCtoDCmap
HCtoDCmap arg_map = cpuKernel.getHCtoDCmap(i);
unsigned int arg_offset = effectiveOffset;
int err_code = 0;
int inStruct = 0;
int sys_64bit = LP64_SWITCH(0, 1); // Mapping only required for 32 bit targets
if (CPU_USE_ALIGNMENT_MAP == 0 && !sys_64bit) {
effectiveOffset += arg_map.copy_params(param, cmdParam, arg_offset, err_code, inStruct);
if (err_code) {
return false;
}
prmSize = arg_map.dc_size;
}
else {
::memcpy(param, cmdParam, desc.size_);
}
}
effectiveOffset += cpuKernel.getArgSize(i);
effectiveOffset += prmSize;
}
localMemPtr = amd::alignUp(localMemPtr, sizeof(cl_long16));