Files
rocm-systems/rocclr/runtime/device/cpu/cpuprogram.cpp
T
foreman 465c1c0287 P4 to Git Change 1398097 by lmoriche@lmoriche_opencl_dev2 on 2017/04/13 13:01:56
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment
	- Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html).

Affected files ...

... //depot/stg/opencl/drivers/opencl/.clang-format#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
2017-04-13 13:56:38 -04:00

1278 خطوط
39 KiB
C++

//
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpuprogram.hpp"
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cpukernel.hpp"
#include "platform/program.hpp"
#include "utils/options.hpp"
#include "os/os.hpp"
#include <algorithm>
#include <functional>
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#if defined(_WIN32)
#include <windows.h>
#endif
// amdrt.o
#if defined(WITH_ONLINE_COMPILER) && !defined(_LP64) && !defined(ATI_ARCH_ARM)
#include "amdrt.inc"
#endif
// CLC_IN_PROCESS_CHANGE
extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typInfo = NULL);
namespace cpu {
static inline bool isScalar(clk_value_type_t type) {
switch (type) {
case T_CHAR:
case T_SHORT:
case T_INT:
case T_LONG:
case T_FLOAT:
case T_DOUBLE:
case T_POINTER:
return true;
default:
return false;
}
}
static cl_kernel_arg_address_qualifier getParamAddressQualifier(
const clk_parameter_descriptor_t* desc) {
switch (desc->space) {
case A_LOCAL:
return CL_KERNEL_ARG_ADDRESS_LOCAL;
break;
case A_CONSTANT:
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
break;
case A_GLOBAL:
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
break;
default:
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
break;
}
}
static cl_kernel_arg_type_qualifier getParamTypeQualifier(const clk_parameter_descriptor_t* desc) {
cl_kernel_arg_type_qualifier typeQualifier = CL_KERNEL_ARG_TYPE_NONE;
if (desc->space == A_CONSTANT) {
typeQualifier |= CL_KERNEL_ARG_TYPE_CONST;
}
if ((desc->qualifier & Q_CONST) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_CONST;
}
if ((desc->qualifier & Q_RESTRICT) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_RESTRICT;
}
if ((desc->qualifier & Q_VOLATILE) != 0) {
typeQualifier |= CL_KERNEL_ARG_TYPE_VOLATILE;
}
if ((desc->qualifier & Q_PIPE) != 0) {
typeQualifier = CL_KERNEL_ARG_TYPE_PIPE;
}
return typeQualifier;
}
static cl_kernel_arg_access_qualifier getParamAccessQualifier(
const clk_parameter_descriptor_t* desc) {
uint access = desc->qualifier & (Q_READ | Q_WRITE);
switch (access) {
case Q_READ:
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
break;
case Q_WRITE:
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
break;
case (Q_READ | Q_WRITE):
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
break;
default:
return CL_KERNEL_ARG_ACCESS_NONE;
break;
}
}
static size_t getScalarParamSize(bool cpuLayer, const clk_value_type_t type,
cl_kernel_arg_address_qualifier qualifier) {
size_t size = 0;
if (qualifier == CL_KERNEL_ARG_ADDRESS_LOCAL) {
return cpuLayer ? sizeof(void*) : 0;
}
switch (type) {
case T_CHAR:
size = 1;
break;
case T_SHORT:
case T_CHAR2:
size = 2;
break;
case T_FLOAT:
case T_INT:
case T_CHAR4:
case T_SHORT2:
case T_CHAR3:
size = 4;
break;
case T_SAMPLER:
size = cpuLayer ? sizeof(uint32_t) : sizeof(cl_sampler);
break;
case T_LONG:
case T_DOUBLE:
case T_CHAR8:
case T_SHORT4:
case T_INT2:
case T_FLOAT2:
case T_SHORT3:
size = 8;
break;
case T_INT3:
case T_FLOAT3:
case T_CHAR16:
case T_SHORT8:
case T_INT4:
case T_FLOAT4:
case T_LONG2:
case T_DOUBLE2:
size = 16;
break;
case T_LONG3:
case T_DOUBLE3:
case T_SHORT16:
case T_INT8:
case T_FLOAT8:
case T_LONG4:
case T_DOUBLE4:
size = 32;
break;
case T_INT16:
case T_FLOAT16:
case T_LONG8:
case T_DOUBLE8:
size = 64;
break;
case T_LONG16:
case T_DOUBLE16:
size = 128;
break;
case T_POINTER:
case T_VOID:
size = sizeof(void*);
break;
default:
ShouldNotReachHere();
break;
}
return size;
}
static size_t getParamSizeImpl(bool cpuLayer, const clk_parameter_descriptor_t* desc,
unsigned index, cl_kernel_arg_address_qualifier qualifier,
size_t* alignment, unsigned* index_out) {
size_t size = 0;
if (desc[index].type == T_STRUCT || desc[index].type == T_PAD) {
size_t maxAlignment = 0;
size_t structSize = 0;
size_t structAlignment = 0;
index++;
while (desc[index].type != T_VOID) {
size_t elementAlignment = 0;
size_t elementSize =
getParamSizeImpl(cpuLayer, desc, index, qualifier, &elementAlignment, index_out);
#if defined(_WIN32)
maxAlignment = std::max(maxAlignment, elementAlignment);
#else
// In Linux, the alignment of long field is 4 for GCC,
// but it is 8 on LLVM side
if (desc[index].type == T_LONG)
structAlignment = cpuLayer ? LP64_SWITCH(4, 8) : 8;
else
structAlignment = std::max(maxAlignment, elementAlignment);
maxAlignment = std::max(maxAlignment, structAlignment);
#endif
index = *index_out;
structSize = amd::alignUp(structSize, std::min(elementAlignment, size_t(16))) + elementSize;
}
*index_out = index + 1;
*alignment = maxAlignment;
size = amd::alignUp(structSize, std::min(maxAlignment, size_t(16)));
} else {
size = getScalarParamSize(cpuLayer, desc[index].type, qualifier);
if (desc[index].type == T_DOUBLE) {
#if defined(_WIN32)
*alignment = 8;
#else
*alignment = LP64_SWITCH(4, 8);
#endif
} else if (desc[index].type == T_LONG) {
*alignment = 8;
} else {
*alignment = size;
}
*index_out = index + 1;
}
return size;
}
size_t getParamSize(bool cpuLayer, const clk_parameter_descriptor_t* desc,
cl_kernel_arg_address_qualifier qualifier, size_t* alignment) {
unsigned index_out = 0;
return getParamSizeImpl(cpuLayer, desc, 0, qualifier, alignment, &index_out);
}
static unsigned getNumTypeDescs(const clk_parameter_descriptor_t* desc) {
int numStruct = 0;
unsigned i;
for (i = 0; desc[i].type != T_VOID || numStruct > 0; ++i) {
if (desc[i].type == T_STRUCT || desc[i].type == T_PAD) numStruct++;
if (desc[i].type == T_VOID) numStruct--;
}
return i + 1;
}
static clk_value_type_t getFirstScalarType(const clk_parameter_descriptor_t* desc) {
int i = 0;
while (desc[i].type == T_STRUCT) i++;
return desc[i].type;
}
static const clk_value_type_t getParamType(const clk_parameter_descriptor_t* desc,
const clk_parameter_descriptor_t** desc_out,
const char** type_name) {
unsigned numDescs = getNumTypeDescs(desc);
*desc_out = desc + numDescs;
*type_name = desc[numDescs - 1].name;
// Use old behaviour and return first scalar type in case of a struct.
return getFirstScalarType(desc);
}
static amd::KernelParameterDescriptor getParam(bool cpuLayer,
const clk_parameter_descriptor_t* desc,
size_t offset_in,
const clk_parameter_descriptor_t** desc_out) {
size_t alignment;
amd::KernelParameterDescriptor param;
param.name_ = desc->name;
param.type_ = getParamType(desc, desc_out, &(param.typeName_));
param.addressQualifier_ = getParamAddressQualifier(desc);
param.typeQualifier_ = getParamTypeQualifier(desc);
param.accessQualifier_ = getParamAccessQualifier(desc);
param.size_ = getParamSize(cpuLayer, desc, param.addressQualifier_, &alignment);
if (param.size_ == 0) {
param.offset_ = amd::alignUp(offset_in, std::min(sizeof(cl_mem), size_t(16)));
} else {
param.offset_ = amd::alignUp(offset_in, std::min(alignment, size_t(16)));
}
return param;
}
static bool setKernelInfoCallback(std::string symbol, const void* value, void* data) {
cpu::Program* program = reinterpret_cast<cpu::Program*>(data);
device::Program::kernels_t& kernels = program->kernels();
const char __OpenCL_[] = "__OpenCL_";
const char _kernel[] = "_stub";
const char _data[] = "_metadata";
const char _nature[] = "_nature";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
std::string name = symbol.substr(offset, suffixPos - offset);
cpu::Kernel* kernel = reinterpret_cast<cpu::Kernel*>(kernels[name]);
if (NULL == kernel) {
kernel = new Kernel(name);
kernels[name] = kernel;
}
if (symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0) {
kernel->setEntryPoint(value);
return true;
} else if (symbol.compare(suffixPos, sizeof(_data) - 1, _data) == 0) {
device::Kernel::parameters_t params;
size_t* recordPtr = (size_t*)value;
size_t* recordEnd = recordPtr + (*recordPtr) / sizeof(size_t);
++recordPtr; // skip struct_length
kernel->setLocalMemSize(*recordPtr++);
kernel->setPreferredSizeMultiple(1);
kernel->setUniformWorkGroupSize(
program->getCompilerOptions()->oVariables->UniformWorkGroupSize);
kernel->setReqdWorkGroupSize(recordPtr[0], recordPtr[1], recordPtr[2]);
recordPtr += 3;
kernel->setWorkGroupSizeHint(recordPtr[0], recordPtr[1], recordPtr[2]);
recordPtr += 3;
const clk_parameter_descriptor_t* desc =
reinterpret_cast<const clk_parameter_descriptor_t*>(recordPtr);
size_t offset = 0;
while (desc->type != T_VOID) {
const clk_parameter_descriptor_t* next_desc = NULL;
amd::KernelParameterDescriptor param = getParam(false, desc, offset, &next_desc);
size_t cpuSize, cpuAlignment;
cpuSize = getParamSize(true, desc, param.addressQualifier_, &cpuAlignment);
kernel->addArg(cpuSize, cpuAlignment);
// Init for HCtoDCmap
unsigned int init_offset = 0;
unsigned int align = 0;
int inStruct = 0;
int end_index = 0;
HCtoDCmap* map_p = new HCtoDCmap(desc, align, 0, init_offset);
map_p->dc_size = map_p->compute_map(desc, map_p->hc_alignment, map_p->dc_alignment,
init_offset, inStruct, end_index);
map_p->align_map(map_p->hc_alignment, map_p->dc_alignment, map_p->hc_size, map_p->dc_size,
inStruct);
if (CPU_USE_ALIGNMENT_MAP == 0) {
kernel->addHCtoDCmap(map_p);
if (map_p->internal_field_map != NULL) {
kernel->addInternalMap(map_p->internal_field_map);
}
} else {
delete (map_p);
}
// End of HCtoDCmap
desc = next_desc;
params.push_back(param);
size_t size = param.size_ == 0 ? sizeof(cl_mem) : param.size_;
#if defined(USE_NATIVE_ABI)
size = amd::alignUp(size, sizeof(size_t));
#endif // USE_NATIVE_ABI
offset = param.offset_ + size;
}
// retrieve vector type hint metadata
const clk_parameter_descriptor_t* vth_desc = NULL;
getParam(false, desc, offset, &vth_desc);
const size_t* vthPtr = reinterpret_cast<const size_t*>(vth_desc);
if (vthPtr < recordEnd && *vthPtr != 0) {
const char* vecTypeHint = reinterpret_cast<const char*>(*vthPtr);
kernel->setVecTypeHint(vecTypeHint);
}
if (kernel->createSignature(params)) {
return true;
}
} else if (symbol.compare(suffixPos, sizeof(_nature) - 1, _nature) == 0) {
uint32_t* recordPtr = (uint32_t*)value;
kernel->nature_ = (uint)recordPtr[0];
kernel->privateSize_ = (uint)recordPtr[1];
return true;
}
return false;
}
static bool setKernelInfoCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return setKernelInfoCallback(symbolString, value, data);
}
static bool setSymbolsCallback(std::string symbol, const void* value, void* data) {
device::ClBinary* clbinary = (device::ClBinary*)data;
const char __OpenCL_[] = "__OpenCL_";
const char _stub[] = "_stub";
const char _kernel[] = "_kernel";
const char _data[] = "_metadata";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
if ((symbol.compare(suffixPos, sizeof(_stub) - 1, _stub) == 0) ||
(symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0) ||
(symbol.compare(suffixPos, sizeof(_data) - 1, _data) == 0)) {
return clbinary->elfOut()->addSymbol(amd::OclElf::DLL, const_cast<char*>(symbol.c_str()), 0,
false);
}
return false;
}
static bool setSymbolsCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return setSymbolsCallback(symbolString, value, data);
}
// Some helper functions to simplify testing the disassembler
struct DisasData {
public:
DisasData(std::stringstream* stream, aclJITObjectImage im, aclCompiler* cmpl)
: asmstream(stream), image(im), compiler(cmpl){};
std::stringstream* asmstream;
aclJITObjectImage image;
aclCompiler* compiler;
};
#if defined(LEGACY_COMPLIB)
static bool disasSymbolsCallback(std::string symbol, const void* value, void* data) {
DisasData* disasData = (DisasData*)data;
std::stringstream& asmstream = *(disasData->asmstream);
aclJITObjectImage image = disasData->image;
aclCompiler* compiler = disasData->compiler;
const char __OpenCL_[] = "__OpenCL_";
const char _stub[] = "_stub";
const char _kernel[] = "_kernel";
const char _data[] = "_metadata";
const size_t offset = sizeof(__OpenCL_) - 1;
if (symbol.compare(0, offset, __OpenCL_) != 0) {
return false;
}
size_t suffixPos = symbol.rfind('_');
if (suffixPos == std::string::npos) {
return false;
}
if ((symbol.compare(suffixPos, sizeof(_stub) - 1, _stub) == 0) ||
(symbol.compare(suffixPos, sizeof(_kernel) - 1, _kernel) == 0)) {
acl_error err = ACL_SUCCESS;
char* kernelDisas = aclJITObjectImageDisassembleKernel(compiler, image, symbol.c_str(), &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageDisassembleKernel failed");
return false;
}
asmstream << kernelDisas;
free(kernelDisas);
}
return false;
}
static bool disasSymbolsCallbackCStr(const char* symbol, const void* value, void* data) {
std::string symbolString(symbol);
return disasSymbolsCallback(symbolString, value, data);
}
#endif
bool Program::compileBinaryToISA(amd::option::Options* options) {
const bool has_avx = !options->oVariables->DisableAVX && device().hasAVXInstructions();
const bool has_fma4 = device().hasFMA4Instructions();
#if defined(WITH_ONLINE_COMPILER)
std::string tempName = amd::Os::getTempFileName();
dllFileName_ = tempName + "dbg" + "." IF(IS_WINDOWS, "dll", "so");
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info(has_avx ?
/*has_fma4 ? "Bulldozer" :*/
"Corei7_AVX"
: "Athlon64");
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
return false;
}
aclSections_0_8 spirFlag;
_acl_type_enum_0_8 aclTypeBinaryUsed;
if (std::string::npos != options->clcOptions.find("--spirv") ||
elfSectionType_ == amd::OclElf::SPIRV) {
spirFlag = aclSPIRV;
aclTypeBinaryUsed = ACL_TYPE_SPIRV_BINARY;
} else if (std::string::npos != options->clcOptions.find("--spir") ||
elfSectionType_ == amd::OclElf::SPIR) {
spirFlag = aclSPIR;
aclTypeBinaryUsed = ACL_TYPE_SPIR_BINARY;
} else {
spirFlag = aclLLVMIR;
aclTypeBinaryUsed = ACL_TYPE_LLVMIR_BINARY;
}
if (ACL_SUCCESS !=
aclInsertSection(compiler(), bin, llvmBinary_.data(), llvmBinary_.size(), spirFlag)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return false;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
err = aclCompile(compiler(), bin, options->origOptionStr.c_str(), aclTypeBinaryUsed, ACL_TYPE_ISA,
NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
aclBinaryFini(bin);
return false;
}
if (options->oVariables->BinBIF30) {
if (!createBIFBinary(bin)) {
aclBinaryFini(bin);
return false;
}
}
if (options->oVariables->BinAS && !options->oVariables->UseJIT) {
size_t len = 0;
const char* asmtext =
static_cast<const char*>(aclExtractSection(compiler(), bin, &len, aclCODEGEN, &err));
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
// Store the Asm text in ASTEXT section unless the JIT is used
if (!clBinary()->storeX86Asm(asmtext, len)) {
buildLog_ += "Internal Error: Storing X86 ASM failed!\n";
return false;
}
}
size_t len = 0;
const void* isa = aclExtractSection(compiler(), bin, &len, aclTEXT, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
if (options->oVariables->UseJIT) {
// printf("Using the jit!\n");
aclJITObjectImage objectImage = aclJITObjectImageCreate(compiler(), isa, len, bin, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageCreate failed");
aclBinaryFini(bin);
return false;
}
err = aclJITObjectImageFinalize(compiler(), objectImage);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageFinalize failed");
aclBinaryFini(bin);
return false;
}
setJITBinary(objectImage);
aclBinaryFini(bin);
// Store the object image binary in the CL binary;
if (!clBinary()->storeX86JIT(*this)) {
buildLog_ += "Internal Error: Storing X86 DLL failed!\n";
return false;
}
#if 0
// Debug stuff. Try and disassemble all kernels and stubs
std::stringstream asmtext;
DisasData disasData(&asmtext, objectImage, compiler());
err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
disasSymbolsCallbackCStr,
&disasData);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
printf("DisasSize: %d\nDisas: %s\n", (int)asmtext.str().size(),
asmtext.str().c_str());
#endif
return true;
}
std::fstream f;
f.open(dllFileName_.c_str(), std::fstream::out | std::fstream::binary);
f.write(static_cast<const char*>(isa), len);
f.close();
aclBinaryFini(bin);
if (f.fail() || f.bad()) {
buildLog_ += "Internal error: fail to create an internal file!\n";
return false;
}
// Store the dll binary in the CL binary;
if (!clBinary()->storeX86(*this, dllFileName_)) {
buildLog_ += "Internal Error: Storing X86 DLL failed!\n";
return false;
}
return true;
#endif // WITH_ONLINE_COMPILER
return false;
}
bool Program::initBuild(amd::option::Options* options) {
if (!this->::device::Program::initBuild(options)) {
return false;
}
options->setPerBuildInfo("cpu", clBinary()->getEncryptCode(), false);
/*
-f[no-]bin-source : control .source
-f[no-]bin-llvmir : control .llvmir
-f[no-]bin-amdil : control .amdil
-f[no-]bin-exe : control .text
Default: -fno-bin-source -fbin-llvmir -fno-bin-amdil -fbin-exe
*/
// Elf Binary setup
clBinary()->init(options);
std::string outFileName;
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
outFileName = options->getDumpFileName(".bin");
}
if (!clBinary()->setElfOut(LP64_SWITCH(ELFCLASS32, ELFCLASS64),
(outFileName.size() > 0) ? outFileName.c_str() : NULL)) {
LogError("setup elfout for CPU failed");
return false;
}
return true;
}
bool Program::finiBuild(bool isBuildGood) {
clBinary()->resetElfOut();
clBinary()->resetElfIn();
if (!isBuildGood) {
// Prevent the encrypted binary form leaking out
clBinary()->setBinary(NULL, 0);
}
return this->::device::Program::finiBuild(isBuildGood);
}
bool Program::compileImpl(const std::string& sourceCode,
const std::vector<const std::string*>& headers,
const char** headerIncludeNames, amd::option::Options* options) {
#if defined(WITH_ONLINE_COMPILER)
std::string tempFolder = amd::Os::getTempPath();
std::fstream f;
std::vector<std::string> headerFileNames(headers.size());
std::vector<std::string> newDirs;
for (size_t i = 0; i < headers.size(); ++i) {
std::string headerPath = tempFolder;
std::string headerIncludeName(headerIncludeNames[i]);
// replace / in path with current os's file separator
if (amd::Os::fileSeparator() != '/') {
for (std::string::iterator it = headerIncludeName.begin(), end = headerIncludeName.end();
it != end; ++it) {
if (*it == '/') *it = amd::Os::fileSeparator();
}
}
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
if (pos != std::string::npos) {
headerPath += amd::Os::fileSeparator();
headerPath += headerIncludeName.substr(0, pos);
headerIncludeName = headerIncludeName.substr(pos + 1);
}
if (!amd::Os::pathExists(headerPath)) {
bool ret = amd::Os::createPath(headerPath);
assert(ret && "failed creating path!");
newDirs.push_back(headerPath);
}
std::string headerFullName = headerPath + amd::Os::fileSeparator() + headerIncludeName;
headerFileNames[i] = headerFullName;
f.open(headerFullName.c_str(), std::fstream::out);
assert(!f.fail() && "failed creating header file!");
f.write(headers[i]->c_str(), headers[i]->length());
f.close();
}
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
aclBinary* bin = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
return false;
}
if (ACL_SUCCESS !=
aclInsertSection(compiler(), bin, sourceCode.c_str(), sourceCode.size(), aclSOURCE)) {
LogWarning("aclInsertSection failed");
aclBinaryFini(bin);
return false;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)bin->options)->setBuildNo(options->getBuildNo());
std::stringstream opts;
std::string token;
opts << options->origOptionStr.c_str();
if (options->origOptionStr.find("-cl-std=CL") == std::string::npos) {
switch (OPENCL_MAJOR * 100 + OPENCL_MINOR * 10) {
case 100:
opts << " -cl-std=CL1.0";
break;
case 110:
opts << " -cl-std=CL1.1";
break;
case 200:
default:
case 120:
opts << " -cl-std=CL1.2";
break;
}
}
// Add only for CL2.0 and later
bool spirFlag = false;
if (options->oVariables->CLStd[2] >= '2') {
opts << " -D"
<< "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE=" << device().info().maxGlobalVariableSize_;
spirFlag = true;
}
// FIXME: Should we prefix everything with -Wf,?
std::istringstream iss(options->clcOptions);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
// Check if this is a -D option
if (token.compare("-D") == 0) {
// It is, skip payload
getline(iss, token, ' ');
continue;
}
opts << " -Wf," << token;
}
}
if (!headers.empty()) {
opts << " -I" << tempFolder;
}
if (device().info().imageSupport_) {
opts << " -D__IMAGE_SUPPORT__=1";
}
if (device().hasFMA4Instructions()) {
opts << " -DFP_FAST_FMA=1 -DFP_FAST_FMAF=1";
}
iss.clear();
iss.str(device().info().extensions_);
while (getline(iss, token, ' ')) {
if (!token.empty()) {
opts << " -D" << token << "=1";
}
}
std::string newOpt = opts.str();
size_t pos = newOpt.find("-fno-bin-llvmir");
while (pos != std::string::npos) {
newOpt.erase(pos, 15);
pos = newOpt.find("-fno-bin-llvmir");
}
err = aclCompile(compiler(), bin, newOpt.c_str(), ACL_TYPE_OPENCL,
spirFlag ? ACL_TYPE_SPIR_BINARY : ACL_TYPE_LLVMIR_BINARY, NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclCompile failed");
aclBinaryFini(bin);
return false;
}
size_t size = 0;
const void* llvmir = aclExtractSection(compiler(), bin, &size, aclLLVMIR, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
aclBinaryFini(bin);
return false;
}
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
elfSectionType_ = amd::OclElf::LLVMIR;
aclBinaryFini(bin);
if (clBinary()->saveSOURCE()) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, sourceCode.data(), sourceCode.length());
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(amd::OclElf::LLVMIR, llvmBinary_.data(), llvmBinary_.size(),
false);
// store the original compile options
clBinary()->storeCompileOptions(compileOptions_);
}
return true;
#else // WITH_ONLINE_COMPILER
return false;
#endif
}
bool Program::loadDllCode(amd::option::Options* options, bool addElfSymbols) {
if (options->oVariables->UseJIT) {
acl_error err = ACL_SUCCESS;
aclJITObjectImage objectImage = getJITBinary();
err = aclJITObjectImageIterateSymbols(compiler(), objectImage, setKernelInfoCallbackCStr, this);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
err = aclJITObjectImageIterateSymbols(compiler(), objectImage, setSymbolsCallbackCStr,
clBinary());
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
size_t size = aclJITObjectImageGetGlobalsSize(compiler(), objectImage, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageGetGlobalsSize failed");
return false;
}
setGlobalVariableTotalSize(size);
return true;
}
// Check if we have a URI
#if defined(_WIN32)
UINT prevMode = ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
handle_ = ::LoadLibraryEx(dllFileName_.c_str(), NULL, DONT_RESOLVE_DLL_REFERENCES);
::SetErrorMode(prevMode);
#else
handle_ = amd::Os::loadLibrary(dllFileName_.c_str());
#endif
if (!handle_) {
return false;
}
if (!amd::Os::iterateSymbols(handle_, setKernelInfoCallback, this)) {
return false;
}
// Add cpu symbols into elf
if (addElfSymbols) {
if (!amd::Os::iterateSymbols(handle_, setSymbolsCallback, clBinary())) {
return false;
}
}
return true;
}
bool Program::linkImpl(amd::option::Options* options) {
#if defined(WITH_ONLINE_COMPILER)
// If we don't have LLVM binary then attempt to use OCL binary
if (llvmBinary_.empty()) {
// Load ISA
// For elf format, setup elfIn() and this elfIn() will be released
// at the end of build by finiBuild().
if (!clBinary()->setElfIn(LP64_SWITCH(ELFCLASS32, ELFCLASS64))) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogError("Setting up input binary failed");
return false;
}
if (options->oVariables->UseJIT) {
bool hasJITBinary;
if (!clBinary()->loadX86JIT(*this, hasJITBinary)) {
return false;
} else if (hasJITBinary) {
aclJITObjectImage objectImage = getJITBinary();
acl_error err = aclJITObjectImageIterateSymbols(compiler(), objectImage,
setKernelInfoCallbackCStr, this);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
err = aclJITObjectImageIterateSymbols(compiler(), objectImage, setSymbolsCallbackCStr,
clBinary());
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageIterateSymbols failed");
return false;
}
size_t size = aclJITObjectImageGetGlobalsSize(compiler(), objectImage, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclJITObjectImageGetGlobalsSize failed");
return false;
}
setGlobalVariableTotalSize(size);
return true;
}
// Fall-through to recompile
} else {
// Trying to load DLL that was generated by out-process as/ld before
bool hasDLL = false;
bool loadSuccess = clBinary()->loadX86(*this, dllFileName_, hasDLL);
if (!loadSuccess) {
buildLog_ += "Error: loading a kernel from OpenCL binary failed!\n";
return false;
} else if (hasDLL) {
if (loadDllCode(options)) {
// No rebuid and use the original binary. Release any new binary if there is.
clBinary()->restoreOrigBinary();
return true;
}
}
// Fall-through to recompile
}
// Need to try recompile, check to see if if LLVM IR is present
if (clBinary()->loadLlvmBinary(llvmBinary_, elfSectionType_) &&
clBinary()->isRecompilable(llvmBinary_, amd::OclElf::CPU_PLATFORM)) {
// Copy both .source and .llvmir into the elfout_
char* section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
if ((section != NULL) && (sz > 0)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(elfSectionType_, llvmBinary_.data(), llvmBinary_.size(),
false);
}
}
// We failed kernels loading (wrong ASIC?)
else {
buildLog_ += "Error: Runtime failed to load kernels from OCL binary!\n";
LogError(buildLog_.c_str());
return false;
}
}
// Do we have llvm binary?
if (!llvmBinary_.empty()) {
// Compile llvm binary to x86 source code
if (!compileBinaryToISA(options)) {
LogError("We failed to compile LLVMIR binary to ASM text!");
return false;
}
}
setType(TYPE_EXECUTABLE);
/////////////////////////////////////////////////////////////
//////////////// check, there is a good place to finish elf objects
//////////////////////////////////////////////////////////////
// Load dll executable
if (loadDllCode(options, clBinary()->saveISA())) {
if (!createBinary(options)) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
return false;
}
return true;
}
buildLog_ += "Internal Error: loading shared library failed!\n";
#endif // WITH_ONLINE_COMPILER
return false;
}
bool Program::linkImpl(const std::vector<device::Program*>& inputPrograms,
amd::option::Options* options, bool createLibrary) {
#if defined(WITH_ONLINE_COMPILER)
std::vector<std::string*> llvmBinaries(inputPrograms.size());
std::vector<amd::OclElf::oclElfSections> elfSectionType(inputPrograms.size());
std::vector<device::Program*>::const_iterator it = inputPrograms.begin();
std::vector<device::Program*>::const_iterator itEnd = inputPrograms.end();
for (size_t i = 0; it != itEnd; ++it, ++i) {
Program* program = (Program*)*it;
if (program->llvmBinary_.empty()) {
if (program->clBinary() == NULL) {
buildLog_ += "Internal error: Input program not compiled!\n";
LogError("Loading compiled input object failed");
return false;
}
// If we don't have LLVM binary then attempt to use OCL binary
// Load ISA
// For elf format, setup elfIn() and this elfIn() will be released
// at the end of build by finiBuild().
if (!program->clBinary()->setElfIn(LP64_SWITCH(ELFCLASS32, ELFCLASS64))) {
buildLog_ +=
"Internal error: Setting up input OpenCL binary"
" failed!\n";
LogError("Setting up input binary failed");
return false;
}
// Need to try recompile, check to see if if LLVM IR is present
if (program->clBinary()->loadLlvmBinary(program->llvmBinary_, program->elfSectionType_) &&
program->clBinary()->isRecompilable(program->llvmBinary_, amd::OclElf::CPU_PLATFORM)) {
// Copy both .source and .llvmir into the elfout_
#if 0
// TODO: copy source into .source section of elfout_
char *section;
size_t sz;
if (clBinary()->saveSOURCE() &&
clBinary()->elfIn()->getSection(amd::OclElf::SOURCE, &section, &sz)) {
if ((section != NULL) && (sz > 0)) {
clBinary()->elfOut()->addSection(amd::OclElf::SOURCE, section, sz);
}
}
#endif
}
// We failed kernels loading (wrong ASIC?)
else {
buildLog_ +=
"Error: Runtime failed to load kernels from OCL "
"binary!\n";
LogError(buildLog_.c_str());
return false;
}
}
llvmBinaries[i] = &program->llvmBinary_;
elfSectionType[i] = program->elfSectionType_;
}
acl_error err = ACL_SUCCESS;
aclTargetInfo aclinfo = info();
aclBinaryOptions binOpts = {0};
binOpts.struct_size = sizeof(binOpts);
binOpts.elfclass = aclinfo.arch_id == aclX64 ? ELFCLASS64 : ELFCLASS32;
binOpts.bitness = ELFDATA2LSB;
binOpts.alloc = &::malloc;
binOpts.dealloc = &::free;
std::vector<aclBinary*> libs(llvmBinaries.size(), NULL);
for (size_t i = 0; i < libs.size(); ++i) {
libs[i] = aclBinaryInit(sizeof(aclBinary), &aclinfo, &binOpts, &err);
if (err != ACL_SUCCESS) {
buildLog_ += "Internal error: Setting up input OpenCL binary failed!\n";
LogWarning("aclBinaryInit failed");
break;
}
_bif_sections_enum_0_8 aclTypeUsed;
if (elfSectionType[i] == amd::OclElf::SPIRV) {
aclTypeUsed = aclSPIRV;
} else if (elfSectionType[i] == amd::OclElf::SPIR) {
aclTypeUsed = aclSPIR;
} else {
aclTypeUsed = aclLLVMIR;
}
err = aclInsertSection(compiler(), libs[i], llvmBinaries[i]->data(), llvmBinaries[i]->size(),
aclTypeUsed);
if (err != ACL_SUCCESS) {
LogWarning("aclInsertSection failed");
break;
}
// temporary solution to synchronize buildNo between runtime and complib
// until we move runtime inside complib
((amd::option::Options*)libs[i]->options)->setBuildNo(options->getBuildNo());
}
if (libs.size() > 0 && err == ACL_SUCCESS) do {
unsigned int numLibs = libs.size() - 1;
if (numLibs > 0) {
err = aclLink(compiler(), libs[0], libs.size() - 1, &libs[1], ACL_TYPE_LLVMIR_BINARY,
"-create-library", NULL);
buildLog_ += aclGetCompilerLog(compiler());
if (err != ACL_SUCCESS) {
LogWarning("aclLink failed");
break;
}
}
size_t size = 0;
_bif_sections_enum_0_8 aclTypeUsed;
if (elfSectionType[0] == amd::OclElf::SPIRV && numLibs == 0) {
aclTypeUsed = aclSPIRV;
} else if (elfSectionType[0] == amd::OclElf::SPIR && numLibs == 0) {
aclTypeUsed = aclSPIR;
} else {
aclTypeUsed = aclLLVMIR;
}
const void* llvmir = aclExtractSection(compiler(), libs[0], &size, aclTypeUsed, &err);
if (err != ACL_SUCCESS) {
LogWarning("aclExtractSection failed");
break;
}
llvmBinary_.assign(reinterpret_cast<const char*>(llvmir), size);
} while (0);
std::for_each(libs.begin(), libs.end(), std::ptr_fun(aclBinaryFini));
if (err != ACL_SUCCESS) {
buildLog_ += "Error: linking llvm modules failed!";
return false;
}
if (clBinary()->saveLLVMIR()) {
clBinary()->elfOut()->addSection(elfSectionType_, llvmBinary_.data(), llvmBinary_.size(),
false);
// store the original link options
clBinary()->storeLinkOptions(linkOptions_);
clBinary()->storeCompileOptions(compileOptions_);
}
// skip the rest if we are building an opencl library
if (createLibrary) {
setType(TYPE_LIBRARY);
if (!createBinary(options)) {
buildLog_ += "Intenral error: creating OpenCL binary failed\n";
return false;
}
return true;
}
// Compile llvm binary to x86 source code
if (!compileBinaryToISA(options)) {
LogError("We failed to compile LLVMIR binary to ASM text!");
return false;
}
setType(TYPE_EXECUTABLE);
/////////////////////////////////////////////////////////////
//////////////// check, there is a good place to finish elf objects
//////////////////////////////////////////////////////////////
// Load dll executable
if (loadDllCode(options, clBinary()->saveISA())) {
if (!createBinary(options)) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
return false;
}
return true;
}
buildLog_ += "Internal Error: loading shared library failed!\n";
#endif // WITH_ONLINE_COMPILER
return false;
}
bool Program::initClBinary() {
if (clBinary_ == NULL) {
clBinary_ = new ClBinary(device());
if (clBinary_ == NULL) {
return false;
}
}
return true;
}
void Program::releaseClBinary() {
if (clBinary_ != NULL) {
delete clBinary_;
clBinary_ = NULL;
}
}
bool Program::createBinary(amd::option::Options* options) {
if (options->oVariables->BinBIF30) {
return true;
}
if (!clBinary()->createElfBinary(options->oVariables->BinEncrypt, type())) {
buildLog_ += "Internal Error: creating OpenCL binary failed!\n";
LogError("Failed to create ELF binary image!");
return false;
}
return true;
}
const aclTargetInfo& Program::info(const char* str) {
acl_error err = ACL_SUCCESS;
info_ = aclGetTargetInfo(LP64_SWITCH("x86", "x86-64"), (str && str[0] == '\0' ? "Generic" : str),
&err);
if (err != ACL_SUCCESS) {
LogWarning("aclGetTargetInfo failed");
}
return info_;
}
Program::~Program() {
if (getJITBinary() != NULL) {
aclJITObjectImageDestroy(compiler(), getJITBinary());
}
if (!sourceFileName_.empty()) {
amd::Os::unlink(sourceFileName_.c_str());
}
if (handle_ != NULL) {
amd::Os::unloadLibrary(handle_);
amd::Os::unlink(dllFileName_);
char dllName[256];
#ifdef _WIN32
memcpy(dllName, dllFileName_.data(), dllFileName_.size());
char* tempName = strrchr(dllName, '.');
if (tempName != NULL) {
*tempName = '\0';
amd::Os::unlink(dllName);
}
#endif // _WIN32
}
#if defined(WITH_ONLINE_COMPILER)
releaseClBinary();
#endif
}
} // namespace cpu