Files
rocm-systems/rocclr/runtime/device/cpu/cpucommand.cpp
T
foreman 465c1c0287 P4 to Git Change 1398097 by lmoriche@lmoriche_opencl_dev2 on 2017/04/13 13:01:56
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment
	- Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html).

Affected files ...

... //depot/stg/opencl/drivers/opencl/.clang-format#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
2017-04-13 13:56:38 -04:00

610 строки
20 KiB
C++

//
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpucommand.hpp"
#include "device/cpu/cpubuiltins.hpp"
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cputables.hpp"
#include "platform/command.hpp"
#include "platform/commandqueue.hpp"
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "platform/sampler.hpp"
#include "thread/thread.hpp"
#include "os/os.hpp"
#include "utils/util.hpp"
#include "utils/options.hpp"
#include <amdocl/cl_kernel.h>
#include <algorithm>
namespace cpu {
#define CPU_WORKER_THREAD_TOTAL_STACK_SIZE \
(CPU_WORKER_THREAD_STACK_SIZE + CLK_PRIVATE_MEMORY_SIZE * (CPU_MAX_WORKGROUP_SIZE + 1))
WorkerThread::WorkerThread(const cpu::Device& device)
: Thread("CPU Worker Thread", CPU_WORKER_THREAD_TOTAL_STACK_SIZE),
queueLock_("WorkerThread::queueLock"),
waitingOp_(0),
terminated_(false) {
localDataSize_ = (size_t)device.info().localMemSize_;
localDataStorage_ =
(address)amd::AlignedMemory::allocate(localDataSize_ + __CPU_SCRATCH_SIZE, sizeof(cl_long16));
#if defined(__linux__) && defined(NUMA_SUPPORT)
const nodemask_t* numaMask = device.getNumaMask();
if (numaMask != NULL) {
numa_bind(numaMask);
}
#endif
}
WorkerThread::~WorkerThread() {
guarantee(Thread::current() != this && "thread suicide!");
amd::AlignedMemory::deallocate(localDataStorage_);
}
bool WorkerThread::terminate() {
terminated_ = true;
if (Thread::current() != this) {
// FIXME_lmoriche: fix termination handshake
while (state() < Thread::FINISHED) {
flush();
amd::Os::yield();
}
}
return true;
}
void WorkerThread::enqueue(Operation& op) {
while (waitingOp_ != 0) {
amd::Os::yield();
}
op.clone(operation());
++waitingOp_;
}
void WorkerThread::loop() {
baseWorkItemsStack_ =
amd::alignDown(stackBase() - CPU_WORKER_THREAD_STACK_SIZE, CLK_PRIVATE_MEMORY_SIZE);
#if defined(WIN32)
amd::Os::touchStackPages(baseWorkItemsStack_, amd::Os::currentStackPtr());
#endif // WINDOWS
Operation* op = operation();
queueLock_.lock();
while (true) {
while (waitingOp_ == 0) {
if (terminated_) {
break;
}
queueLock_.wait();
}
if (terminated_) {
break;
}
op->command().setStatus(CL_RUNNING);
op->execute();
op->cleanup();
--waitingOp_;
}
queueLock_.unlock();
}
void NativeFn::execute() {
cl_int status = static_cast<amd::NativeFnCommand&>(command()).invoke();
command().setStatus(status);
}
static void nop() { /*Do nothing*/
}
template <NDRangeKernelBatch::ExecutionNature NATURE,
NDRangeKernelBatch::ExecutionOrder ORDER = NDRangeKernelBatch::ORDER_DEFAULT>
class NDRangeKernelBatchMode : public NDRangeKernelBatch {
private:
void executeWorkGroup(WorkGroup& wg) {
if (NATURE == NATURE_WG_LEVEL_EXEC) {
wg.executeWorkItem();
} else if ((NATURE == NATURE_1_WORK_ITEM) || (wg.getNumWorkItems() == 1)) {
wg.executeWorkItem();
} else {
wg.getBaseWorkItem()->setNext(&wg.getWorkerThread().mainFiber());
if (NATURE == NATURE_WITHOUT_BARRIER) {
wg.executeWithoutBarrier();
} else { // NATURE == NATURE_WITH_BARRIER
wg.executeWithBarrier();
}
}
// Yield at the end of each workgroup to avoid starving GPU device
amd::Os::yield();
}
public:
void executeMode(WorkGroup& wg) {
const amd::NDRange& offset = static_cast<amd::NDRangeKernelCommand&>(command_).sizes().offset();
WorkItem* workItem0 = wg.getBaseWorkItem();
clk_builtins_t tableTask;
size_t prevOpId = 0, opId = (size_t)-1;
if (NATURE == NATURE_1_WORK_ITEM) {
tableTask = Builtins::dispatchTable_;
// If local size == 1 then barrier() becomes a nop.
tableTask.barrier_ptr = (void (*)(cl_mem_fence_flags))nop;
workItem0->infoBlock().builtins = &tableTask;
workItem0->setNext(&wg.getWorkerThread().mainFiber());
}
while (getNextOperationId(opId)) {
workItem0->incrementGroupId(groupIds_, offset, opId - prevOpId);
uint workDims = workItem0->infoBlock().work_dim;
size_t numWorkItems = workItem0->infoBlock().local_size[0] *
(workDims >= 2 ? workItem0->infoBlock().local_size[1] : 1) *
(workDims >= 3 ? workItem0->infoBlock().local_size[2] : 1);
wg.setNumWorkItems(numWorkItems);
if (numWorkItems == 1) {
tableTask = Builtins::dispatchTable_;
tableTask.barrier_ptr = (void (*)(cl_mem_fence_flags))nop;
workItem0->infoBlock().builtins = &tableTask;
workItem0->setNext(&wg.getWorkerThread().mainFiber());
executeWorkGroup(wg);
tableTask.barrier_ptr = &WorkItem::barrier;
} else {
executeWorkGroup(wg);
}
prevOpId = opId;
}
//#define DISABLE_TASK_STEALING
#if !defined(DISABLE_TASK_STEALING) && 0
size_t maxId = numCores_;
size_t stolenId = coreId_ + 1;
NDRangeKernelBatch* workingBatch = this;
size_t numStolenIds = 1;
const size_t maxStealingSize = 3;
const size_t minAdaptiveStealingDiff = numCores_ * maxStealingSize;
while (true) {
for (; stolenId < maxId; ++stolenId) {
WorkerThread* worker = virtualDevice_.getWorkerThread(stolenId);
// In case were we have less operations than Worker Threads
if (worker->isOperationValid()) {
workingBatch = static_cast<NDRangeKernelBatch*>(worker->operation());
numStolenIds = workingBatch->getNextOperationIds(opId, numStolenIds);
if (numStolenIds > 0) {
do {
for (size_t i = 0; i < numStolenIds; ++i) {
workItem0->setGroupId(groupIds_, offset, opId);
executeWorkGroup(wg);
opId += numCores_;
}
// adaptive stealing
if (numWorkGroups_ - opId > minAdaptiveStealingDiff) {
numStolenIds = maxStealingSize;
} else {
while (workingBatch->getNextOperationId(opId)) {
workItem0->setGroupId(groupIds_, offset, opId);
executeWorkGroup(wg);
}
break;
}
numStolenIds = workingBatch->getNextOperationIds(opId, numStolenIds);
} while (numStolenIds > 0);
}
numStolenIds = 1;
}
} // for (stolenId..maxId)
if (stolenId == coreId_) {
break;
}
stolenId = 0;
maxId = coreId_;
} // while (true)
#endif
}
};
inline bool NDRangeKernelBatch::getNextOperationId(size_t& opId) {
if (currentOpId_ >= numWorkGroups_) {
return false;
}
opId = amd::AtomicOperation::add(numCores_, &currentOpId_);
return opId < numWorkGroups_;
}
inline size_t NDRangeKernelBatch::getNextOperationIds(size_t& opId, size_t count) {
size_t topId = numCores_ * count;
if (currentOpId_ >= numWorkGroups_) {
return 0;
}
opId = amd::AtomicOperation::add(topId, &currentOpId_);
const size_t numWorkGroups = numWorkGroups_;
if (opId >= numWorkGroups) {
return 0;
}
topId += opId;
if (topId >= (numWorkGroups + numCores_)) {
count -= (topId - numWorkGroups) / numCores_;
}
return count;
}
// Process the parameters, allocate LDS.
bool NDRangeKernelBatch::patchParameters(const cpu::Kernel& cpuKernel, address params,
address& localMemPtr, const address localMemLimit,
size_t localMemSize) const {
amd::NDRangeKernelCommand& command = static_cast<amd::NDRangeKernelCommand&>(command_);
const amd::Device& device = command.queue()->device();
const amd::Kernel& kernel = command.kernel();
const amd::KernelSignature& signature = kernel.signature();
const amd::KernelParameters& kernelParam = kernel.parameters();
const_address cmdParams = command.parameters();
unsigned effectiveOffset = 0;
// DD -- on CPU device, real effective offset is NATIVELY aligned
// Here all source arguments are in place, so we're safe just iterating
for (size_t i = 0; i < signature.numParameters(); ++i) {
const amd::KernelParameterDescriptor& desc = signature.at(i);
const void* cmdParam = cmdParams + desc.offset_;
void* param;
size_t prmSize = cpuKernel.getArgSize(i);
// Align i'th parameter on multiple of its size. Parameter size is power of 2.
size_t alignment = cpuKernel.getArgAlignment(i);
effectiveOffset = amd::alignUp(effectiveOffset, std::min(alignment, size_t(16)));
param = params + effectiveOffset;
if (desc.size_ == 0) {
// __local memory parameter
localMemPtr = amd::alignUp(localMemPtr, sizeof(cl_long16));
size_t length = *static_cast<const size_t*>(cmdParam);
*static_cast<void**>(param) = localMemPtr;
localMemPtr += length;
if (localMemPtr > localMemLimit) {
command.setException(CL_MEM_OBJECT_ALLOCATION_FAILURE);
return false;
}
} else if (desc.type_ == T_POINTER) {
// __global memory parameter
cl_mem_object_type pointer_type = CL_MEM_OBJECT_BUFFER;
if (kernelParam.boundToSvmPointer(device, cmdParams, i)) {
*reinterpret_cast<void**>(param) = *reinterpret_cast<void* const*>(cmdParam);
} else {
void* hostMemPtr = NULL;
amd::Memory* memArg = *reinterpret_cast<amd::Memory* const*>(cmdParam);
if (memArg != NULL) {
hostMemPtr = memArg->getHostMem();
if (hostMemPtr == NULL) {
command.setException(CL_MEM_OBJECT_ALLOCATION_FAILURE);
return false;
}
pointer_type = memArg->getType();
}
// For images on CPU devices, pass "struct {int4 p0; int4 p1}".
// That allows an obvious implementation for
// __amdil_get_image[23]d_params[01].
// That makes the rest of the .bc implementation for
// images relatively straight forward.
if (pointer_type == CL_MEM_OBJECT_IMAGE1D || pointer_type == CL_MEM_OBJECT_IMAGE2D ||
pointer_type == CL_MEM_OBJECT_IMAGE3D || pointer_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
pointer_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
pointer_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
amd::Image::Impl& impl = memArg->asImage()->getImpl();
impl.reserved_ = hostMemPtr;
*reinterpret_cast<void**>(param) = (void*)&impl;
} else {
*reinterpret_cast<void**>(param) = hostMemPtr;
}
}
} else if (desc.type_ == T_SAMPLER) {
// Switch from an Amd::Sampler to the 32bit integer
// variable that is a clk_sampler.
amd::Sampler* samplerArg = *reinterpret_cast<amd::Sampler* const*>(cmdParam);
*reinterpret_cast<uint32_t*>(param) = (uint32_t)samplerArg->state();
} else {
// Using HCtoDCmap
HCtoDCmap arg_map = cpuKernel.getHCtoDCmap(i);
unsigned int arg_offset = effectiveOffset;
int err_code = 0;
int inStruct = 0;
int sys_64bit = LP64_SWITCH(0, 1); // Mapping only required for 32 bit targets
if (CPU_USE_ALIGNMENT_MAP == 0 && !sys_64bit) {
effectiveOffset += arg_map.copy_params(param, cmdParam, arg_offset, err_code, inStruct);
if (err_code) {
return false;
}
prmSize = arg_map.dc_size;
} else {
::memcpy(param, cmdParam, desc.size_);
}
}
effectiveOffset += prmSize;
}
localMemPtr = amd::alignUp(localMemPtr, sizeof(cl_long16));
if ((localMemPtr + localMemSize) > localMemLimit) {
command.setException(CL_MEM_OBJECT_ALLOCATION_FAILURE);
return false;
}
return true;
}
void NDRangeKernelBatch::execute() {
amd::NDRangeKernelCommand& command = static_cast<amd::NDRangeKernelCommand&>(command_);
const cpu::Kernel& kernel =
static_cast<const cpu::Kernel&>(*command.kernel().getDeviceKernel(command.queue()->device()));
WorkerThread& thread = *WorkerThread::current();
const size_t numWorkItems = command.sizes().local().product();
address params = thread.baseWorkItemsStack();
address baseLocalMemPtr = thread.localDataStorage();
address patchedLocalMemPtr = thread.localDataStorage() + __CPU_SCRATCH_SIZE;
if (!patchParameters(kernel, params, patchedLocalMemPtr,
patchedLocalMemPtr + thread.localDataSize(),
kernel.workGroupInfo()->localMemSize_)) {
return;
}
WorkItem* workItem0 =
::new ((WorkItem*)params - 1) WorkItem(command.sizes(), baseLocalMemPtr, patchedLocalMemPtr);
WorkGroup wg(command, kernel, thread, params, workItem0, numWorkItems);
if (numWorkItems == 1) {
static_cast<NDRangeKernelBatchMode<NATURE_1_WORK_ITEM>*>(this)->executeMode(wg);
} else if (kernel.hasBarrier()) {
static_cast<NDRangeKernelBatchMode<NATURE_WITH_BARRIER>*>(this)->executeMode(wg);
} else {
static_cast<NDRangeKernelBatchMode<NATURE_WITHOUT_BARRIER>*>(this)->executeMode(wg);
}
}
void WorkGroup::executeWorkItem() {
callKernel((kernelentrypoint_t)kernel_.getEntryPoint(), workItem0_->nativeStackPtr());
}
void WorkGroup::executeWithBarrier() {
kernelentrypoint_t entryPoint = (kernelentrypoint_t)kernel_.getEntryPoint();
workingFiber_ = workItem0_;
address workGroupStackPtr = workItem0_->nativeStackPtr();
// Save the current stack context in case we execute a barrier.
volatile size_t threadCounter = 0;
bool barrier = !thread_.mainFiber().save();
size_t tid = threadCounter++;
WorkItem* workItem = (WorkItem*)((char*)workItem0_ - tid * CLK_PRIVATE_MEMORY_SIZE);
if (barrier) {
WorkItem* prev = (WorkItem*)((char*)workItem + CLK_PRIVATE_MEMORY_SIZE);
WINDOWS_ONLY(amd::Os::touchStackPages((address)(workItem + 1), (address)prev));
::memcpy(workItem, prev, sizeof(WorkItem));
clk_thread_info_block_t& tib = workItem->infoBlock();
++tib.local_id[0];
if (unlikely(tib.local_id[0] >= tib.local_size[0])) {
//
// Compiling for Windows 64bit (only in release) introduces a bug,
// which uses the same register for saving threadCounter and the
// 0 value. Therefore "tib.local_id[i] = 0" was actually translated
// to "tib.local_id[0] = threadCounter". To avoid this issue, and
// still be able to store a 0 into tib.local_id[i], we trick the
// compiler, by using the value in tib.local_id[3], which is always
// initialized to 0.
//
tib.local_id[0] = tib.local_id[3];
++tib.local_id[1];
if (unlikely(tib.local_id[1] >= tib.local_size[1])) {
tib.local_id[1] = tib.local_id[3];
++tib.local_id[2];
}
}
// Link the previous workitem to this one.
prev->setNext(workItem);
// If this is the last workitem, complete the ring.
if (tid >= numWorkItems_ - 1) {
workItem->setNext(workItem0_);
}
}
// Execute thread0
address workItemStackPtr = workItem->nativeStackPtr();
callKernelProtectedReturn(entryPoint, workItemStackPtr);
// Check if thread0 executed a barrier()
if (threadCounter > 1) {
workItem = (WorkItem*)workingFiber_;
workingFiber_ = workingFiber_->next();
tid = ((address)workItem0_ - (address)workItem) / CLK_PRIVATE_MEMORY_SIZE;
if (tid == (numWorkItems_ - 1)) {
// If we get here, we are done!
return;
}
if (workItem->next() == &thread_.mainFiber()) {
// Detected a deadlock
command_.setException(CL_INVALID_KERNEL);
return;
}
// Schedule the next workitem.
workItem->next()->restore();
ShouldNotReachHere();
}
// Execute thread1...threadN
callKernelRange(entryPoint, workItemStackPtr, workItem->infoBlock());
}
void WorkGroup::executeWithoutBarrier() {
kernelentrypoint_t entryPoint = (kernelentrypoint_t)kernel_.getEntryPoint();
address workItemStackPtr = workItem0_->nativeStackPtr();
// Execute thread0
callKernel(entryPoint, workItemStackPtr);
// Execute thread1...threadN
callKernelRange(entryPoint, workItemStackPtr, workItem0_->infoBlock());
}
void WorkGroup::callKernelRange(kernelentrypoint_t entryPoint, address stackPtr,
clk_thread_info_block_t& tib) {
while (true) {
++tib.local_id[0];
if (unlikely(tib.local_id[0] >= tib.local_size[0])) {
tib.local_id[0] = 0;
++tib.local_id[1];
if (unlikely(tib.local_id[1] >= tib.local_size[1])) {
tib.local_id[1] = 0;
++tib.local_id[2];
if (unlikely(tib.local_id[2] >= tib.local_size[2])) {
tib.local_id[2] = 0;
return;
}
}
}
callKernel(entryPoint, stackPtr);
}
}
WorkItem::WorkItem(const amd::NDRangeContainer& sizes, void* scratchMemPtr, void* localMemPtr) {
const amd::NDRange& local = sizes.local();
const amd::NDRange& global = sizes.global();
const amd::NDRange& offset = sizes.offset();
const size_t dims = sizes.dimensions();
tib_.builtins = &Builtins::dispatchTable_;
tib_.local_mem_base = localMemPtr;
tib_.local_scratch = scratchMemPtr;
tib_.table_base = (const void*)cpuTables;
tib_.work_dim = (cl_uint)sizes.dimensions();
for (size_t i = 0; i < dims; ++i) {
tib_.global_offset[i] = offset[i];
tib_.global_size[i] = global[i];
tib_.local_size[i] = local[i];
tib_.enqueued_local_size[i] = local[i];
tib_.local_id[i] = 0;
tib_.group_id[i] = 0;
}
// Fill the remaining dimensions.
for (size_t i = dims; i < sizeof(tib_.global_size) / sizeof(size_t); ++i) {
tib_.global_offset[i] = 0;
tib_.global_size[i] = 1;
tib_.local_size[i] = 1;
tib_.enqueued_local_size[i] = 1;
tib_.local_id[i] = 0;
tib_.group_id[i] = 0;
}
}
ALWAYSINLINE void WorkItem::setGroupId(const amd::NDRange& rangeLimits, const amd::NDRange& offset,
size_t n) {
const size_t dims = rangeLimits.dimensions();
for (size_t i = 0; i < dims; ++i) {
size_t lim = rangeLimits[i];
size_t& val = tib_.group_id[i];
val = n;
if (n < lim) {
tib_.global_offset[i] = offset[i] + val * tib_.enqueued_local_size[i];
tib_.local_id[i] = 0;
tib_.local_size[i] = std::min(tib_.enqueued_local_size[i],
tib_.global_size[i] - (val * tib_.enqueued_local_size[i]));
++i;
for (; i < dims; ++i) {
tib_.global_offset[i] = offset[i];
tib_.local_id[i] = 0;
tib_.group_id[i] = 0;
}
break;
} else {
n /= lim;
val -= n * lim;
tib_.global_offset[i] = offset[i] + val * tib_.enqueued_local_size[i];
tib_.local_id[i] = 0;
tib_.local_size[i] = std::min(tib_.enqueued_local_size[i],
tib_.global_size[i] - (val * tib_.enqueued_local_size[i]));
}
}
}
ALWAYSINLINE void WorkItem::incrementGroupId(const amd::NDRange& rangeLimits,
const amd::NDRange& offset, size_t n) {
const size_t dims = rangeLimits.dimensions();
for (size_t i = 0; i < dims; ++i) {
size_t lim = rangeLimits[i];
size_t& val = tib_.group_id[i];
val += n;
if (val < lim) {
tib_.global_offset[i] = offset[i] + val * tib_.enqueued_local_size[i];
tib_.local_id[i] = 0;
tib_.local_size[i] = std::min(tib_.enqueued_local_size[i],
tib_.global_size[i] - (val * tib_.enqueued_local_size[i]));
break;
} else {
n = val / lim;
val -= n * lim;
tib_.global_offset[i] = offset[i] + val * tib_.enqueued_local_size[i];
tib_.local_id[i] = 0;
tib_.local_size[i] = std::min(tib_.enqueued_local_size[i],
tib_.global_size[i] - (val * tib_.enqueued_local_size[i]));
}
}
}
void WorkItem::barrier(cl_mem_fence_flags flags) {
WorkItem* workItem = WorkItem::current();
workItem->swap(workItem->next());
}
void Operation::cleanup() {
cl_int lastException = command().exception();
cl_int status = (lastException != 0) ? lastException : CL_COMPLETE;
Counter* counter = reinterpret_cast<Counter*>(command().data());
if (counter == NULL) {
command().setStatus(status);
} else if (counter->decrement() == 0) {
counter->event().setStatus(status);
}
}
} // namespace cpu