Files
rocm-systems/rocclr/runtime/device/cpu/cpudevice.cpp
T
foreman 465c1c0287 P4 to Git Change 1398097 by lmoriche@lmoriche_opencl_dev2 on 2017/04/13 13:01:56
SWDEV-102733 - [OCL-LC-ROCm] Cmake build Write CMakeLists.txt to enable building with and without the DK environment
	- Change the coding convention of the runtime files. Use Google's Style (https://google.github.io/styleguide/cppguide.html).

Affected files ...

... //depot/stg/opencl/drivers/opencl/.clang-format#1 add
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_agent_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_context.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_counter.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d10.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d11.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_d3d9.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#7 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_event.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_gl.cpp#53 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_icd_amd.h#18 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel.h#24 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_kernel_info_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_lqdflash_amd.h#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_memobj.cpp#81 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_object.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_pipe.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_platform_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_profile_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#41 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sampler.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_sdi_amd.h#2 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_semaphore_amd.h#3 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_svm.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_thread_trace_amd.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/appprofile.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blit.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/blitcl.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpubuiltins.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.cpp#66 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpucommand.hpp#40 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#280 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.hpp#96 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpufeat.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpukernel.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpumapping.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.cpp#70 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuprogram.hpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.cpp#33 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpusettings.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cputables.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.cpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpuvirtual.hpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#209 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#284 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuappprofile.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.cpp#58 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpubinary.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucompiler.cpp#156 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpucounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudefs.hpp#147 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#567 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#163 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#318 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#126 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.cpp#131 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpumemory.hpp#50 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#44 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#232 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#69 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#238 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.hpp#87 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#19 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuschedcl.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#350 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#98 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputhreadtrace.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.cpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputimestamp.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gputrap.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#410 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#140 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuwavelimiter.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbinary.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcompiler.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#39 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsched.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palschedcl.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palthreadtrace.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltrap.hpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palwavelimiter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#48 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/alloc.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os.hpp#30 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/os/os_win32.cpp#47 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/agent.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/command.hpp#83 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/commandqueue.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/counter.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/interop.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#23 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.cpp#127 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/memory.hpp#100 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/ndrange.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/object.hpp#17 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/perfctr.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.cpp#86 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/program.hpp#41 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.cpp#35 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/runtime.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/sampler.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/threadtrace.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/atomic.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.cpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/monitor.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/semaphore.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/runtime/thread/thread.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/top.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/concurrent.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.cpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/debug.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#271 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/macros.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/util.hpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/utils/versions.hpp#2150 edit
2017-04-13 13:56:38 -04:00

1047 wiersze
31 KiB
C++

//
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cpuprogram.hpp"
#include "utils/versions.hpp"
#include "utils/flags.hpp"
#include "amdocl/cl_common.hpp"
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>
#if defined(__linux__)
#if !defined(ATI_ARCH_ARM)
#include <sys/sysinfo.h>
#endif // ATI_ARCH_ARM
#include <unistd.h>
#endif
#if defined(_WIN32)
#include <windows.h>
#include <intrin.h>
extern BOOL(WINAPI* pfnGetNumaNodeProcessorMaskEx)(USHORT, PGROUP_AFFINITY);
#endif // _WIN32
namespace cpu {
aclCompiler* Device::compiler_;
size_t Device::maxWorkerThreads_ = (size_t)-1;
Device::~Device() {
#if defined(__linux__) && defined(NUMA_SUPPORT)
if (getNumaMask() != NULL) {
if (numaMask_ != NULL) {
delete numaMask_;
}
} else
#endif
if (workerThreadsAffinity_ != NULL) {
delete workerThreadsAffinity_;
}
}
void Device::tearDown() {
amd::Os::uninstallSigfpeHandler();
aclCompilerFini(compiler_);
}
bool Device::init() {
// Allow disabling of the CPU device
if (CPU_MAX_COMPUTE_UNITS == 0) return false;
if (!amd::Os::installSigfpeHandler()) return false;
const char* library = getenv("COMPILER_LIBRARY");
aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8), (library || CPU_OPENCL_VERSION >= 200)
? library
: LINUX_ONLY("lib") "amdocl12cl" LP64_SWITCH(
LINUX_SWITCH("32", ""), "64") LINUX_SWITCH(".so", ".dll"),
NULL, NULL, NULL, NULL, NULL, NULL};
acl_error error;
compiler_ = aclCompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
return false;
}
device::Info info;
::memset(&info, '\0', sizeof(info));
info.type_ = CL_DEVICE_TYPE_CPU;
info.vendorId_ = 0x1002;
int systemProcessorCount = amd::Os::processorCount();
info.maxComputeUnits_ = systemProcessorCount;
if (!flagIsDefault(CPU_MAX_COMPUTE_UNITS)) {
if ((CPU_MAX_COMPUTE_UNITS <= 0) || (CPU_MAX_COMPUTE_UNITS > systemProcessorCount))
info.maxComputeUnits_ = systemProcessorCount;
else
info.maxComputeUnits_ = CPU_MAX_COMPUTE_UNITS;
}
info.maxWorkItemDimensions_ = 3;
info.maxWorkGroupSize_ = CPU_MAX_WORKGROUP_SIZE;
info.maxWorkItemSizes_[0] = info.maxWorkGroupSize_;
info.maxWorkItemSizes_[1] = info.maxWorkGroupSize_;
info.maxWorkItemSizes_[2] = info.maxWorkGroupSize_;
info.addressBits_ = LP64_SWITCH(32, 64);
if (CPU_IMAGE_SUPPORT) {
info.imageSupport_ = CL_TRUE;
info.maxReadImageArgs_ = MaxReadImage;
info.maxWriteImageArgs_ = MaxWriteImage;
info.image2DMaxWidth_ = 8 * Ki;
info.image2DMaxHeight_ = 8 * Ki;
info.image3DMaxWidth_ = 2 * Ki;
info.image3DMaxHeight_ = 2 * Ki;
info.image3DMaxDepth_ = 2 * Ki;
info.maxSamplers_ = MaxSamplers;
// OpenCL 1.2 device info fields
info.imageMaxBufferSize_ = 64 * Ki;
info.imageMaxArraySize_ = 2 * Ki;
info.imagePitchAlignment_ = 0;
info.imageBaseAddressAlignment_ = 0;
info.bufferFromImageSupport_ = CL_FALSE;
}
info.maxParameterSize_ = 4 * Ki;
info.memBaseAddrAlign_ =
8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ? sizeof(cl_long16) : MEMOBJ_BASE_ADDR_ALIGN);
info.minDataTypeAlignSize_ = sizeof(cl_long16);
info.singleFPConfig_ = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA;
info.doubleFPConfig_ = info.singleFPConfig_;
info.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
info.affinityDomain_.value_ = 0;
info.affinityDomain_.next_ = 1;
info.globalMemCacheType_ = CL_READ_WRITE_CACHE;
#if defined(__linux__)
info.globalMemCacheLineSize_ = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
info.globalMemCacheSize_ = sysconf(_SC_LEVEL1_DCACHE_SIZE);
info.affinityDomain_.cacheL1_ = 1;
if (sysconf(_SC_LEVEL2_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL2_ = 1;
}
if (sysconf(_SC_LEVEL3_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL3_ = 1;
}
if (sysconf(_SC_LEVEL4_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL4_ = 1;
}
#if defined(NUMA_SUPPORT)
if (numa_available() != -1 && numa_max_node() = > 0) {
info.affinityDomain_.numa_ = 1;
}
#endif
#else // win32
DWORD length = 0;
::GetLogicalProcessorInformation(NULL, &length);
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
bool found = false;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr,
limit = &buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
for (ptr = buffer; ptr < limit; ++ptr) {
PCACHE_DESCRIPTOR cache = &ptr->Cache;
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
info.affinityDomain_.value_ |=
(device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE << 1) >> cache->Level;
if (!found && cache->Level == 1) {
info.globalMemCacheLineSize_ = cache->LineSize;
info.globalMemCacheSize_ = cache->Size;
found = true;
}
}
}
}
free(buffer);
ULONG highestNuma = 0;
if (::GetNumaHighestNodeNumber(&highestNuma) && highestNuma != 0) {
info.affinityDomain_.numa_ = 1;
}
#endif
uintptr_t virtualMemSize;
#if defined(__linux__)
#if !defined(ATI_ARCH_ARM)
struct sysinfo si;
if (sysinfo(&si) != 0) {
return false;
}
if (si.mem_unit == 0) {
// Linux kernels prior to 2.3.23 return sizes in bytes.
si.mem_unit = 1;
}
info.globalMemSize_ = (cl_ulong)si.totalram * si.mem_unit;
#else
info.globalMemSize_ = 0;
#endif
virtualMemSize = (uintptr_t)info.globalMemSize_;
#else
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
if (GlobalMemoryStatusEx(&statex) == 0) {
return false;
}
info.globalMemSize_ = (cl_ulong)statex.ullTotalPhys;
virtualMemSize = (uintptr_t)std::min(statex.ullTotalPageFile, statex.ullTotalVirtual);
#endif
// disable CPU device if system memory is equal to or less than 2GB
if (info.globalMemSize_ <= OCL_SYSMEM_REQUIREMENT * Gi) {
return true;
}
maxWorkerThreads_ = (size_t)(
virtualMemSize / (uintptr_t)((CPU_WORKER_THREAD_STACK_SIZE +
CLK_PRIVATE_MEMORY_SIZE * (CPU_MAX_WORKGROUP_SIZE + 1))) *
7 / 10);
#if defined(_LP64)
// Cap at 8TiB for 64-bit
const cl_ulong maxGlobalMemSize = 8ULL * Ki * Gi;
#elif defined(_WIN32)
// Cap at 2GiB (see http://msdn.microsoft.com/en-us/library/aa366778.aspx)
const cl_ulong maxGlobalMemSize = 2ULL * Gi;
#else // linux
// Cap at 3.5GiB
const cl_ulong maxGlobalMemSize = 3584ULL * Mi;
#endif
info.globalMemSize_ = std::min(info.globalMemSize_, maxGlobalMemSize);
info.maxMemAllocSize_ = info.globalMemSize_ * CPU_MAX_ALLOC_PERCENT / 100;
if (flagIsDefault(CPU_MAX_ALLOC_PERCENT)) {
const cl_ulong minAllocSize = LP64_SWITCH(1ULL * Gi, 2ULL * Gi);
info.maxMemAllocSize_ =
std::max(info.maxMemAllocSize_, std::min(info.globalMemSize_, minAllocSize));
}
info.maxConstantBufferSize_ = 64 * Ki;
info.maxConstantArgs_ = 8;
info.localMemType_ = CL_GLOBAL;
info.localMemSize_ = std::max((cl_ulong)32 * Ki, info.globalMemCacheSize_ / 2);
info.errorCorrectionSupport_ = CL_FALSE;
info.hostUnifiedMemory_ = CL_TRUE;
info.profilingTimerResolution_ = (size_t)amd::Os::timerResolutionNanos();
info.profilingTimerOffset_ = amd::Os::offsetToEpochNanos();
info.littleEndian_ = CL_TRUE;
info.available_ = CL_TRUE;
info.compilerAvailable_ = CL_TRUE;
info.linkerAvailable_ = CL_TRUE;
info.executionCapabilities_ = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL;
// Enable SVM only for OpenCL 2.0
if (((OPENCL_MAJOR >= 2) && (CPU_OPENCL_VERSION >= 200)) || OCL_FORCE_CPU_SVM) {
info.svmCapabilities_ = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS;
}
info.preferredPlatformAtomicAlignment_ = 0;
info.preferredGlobalAtomicAlignment_ = 0;
info.preferredLocalAtomicAlignment_ = 0;
info.queueProperties_ = CL_QUEUE_PROFILING_ENABLE;
info.platform_ = AMD_PLATFORM;
#if defined(__linux__)
std::ifstream ifs("/proc/cpuinfo", std::ios::in);
if (ifs.is_open()) {
std::string line;
bool vendor = false;
bool name = false;
bool freq = false;
while (std::getline(ifs, line) && !(vendor && name && freq)) {
if (!vendor && (line.find("vendor_id\t: ") != std::string::npos)) {
::strcpy(info.vendor_, line.substr(line.find_first_of(':') + 2).c_str());
vendor = true;
} else if (!name && (line.find("model name\t: ") != std::string::npos ||
line.find("Processor\t: ") != std::string::npos)) {
::strcpy(info.name_, line.substr(line.find_first_of(':') + 2).c_str());
name = true;
} else if (!freq && (line.find("cpu MHz\t\t: ") != std::string::npos)) {
info.maxClockFrequency_ = ::atoi(line.substr(line.find_first_of(':') + 2).c_str());
freq = true;
}
}
ifs.close();
}
#elif defined(_WIN32)
int CPUInfo[4] = {-1};
int nRet = 0;
unsigned nIds, nExIds, i;
// cpuid with an InfoType argument of 0 returns the number of
// valid Ids in CPUInfo[0] and the CPU identification string in
// the other three array elements. The CPU identification string is
// not in linear order. The code below arranges the information
// in a human readable form.
amd::Os::cpuid(CPUInfo, 0);
nIds = CPUInfo[0];
memset(info.vendor_, 0, sizeof(info.vendor_));
*((int*)(info.vendor_ + 0)) = CPUInfo[1];
*((int*)(info.vendor_ + 4)) = CPUInfo[3];
*((int*)(info.vendor_ + 8)) = CPUInfo[2];
// Calling cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
amd::Os::cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(info.name_, 0, sizeof(info.name_));
sprintf(info.name_, "Unknown Processor");
// Get the information associated with each extended ID.
for (i = 0x80000000; i <= nExIds; ++i) {
amd::Os::cpuid(CPUInfo, i);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy(info.name_, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000003)
memcpy(info.name_ + 16, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000004)
memcpy(info.name_ + 32, CPUInfo, sizeof(CPUInfo));
}
info.maxClockFrequency_ = 0;
HKEY hKey;
// Open the key
if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0\\", 0,
KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) {
// Read the value
DWORD dwLen = 4;
RegQueryValueEx(hKey, "~MHz", NULL, NULL, (LPBYTE)&info.maxClockFrequency_, &dwLen);
// Cleanup and return
RegCloseKey(hKey);
}
#else
::strcpy(info.name_, "Unknown Processor");
::strcpy(info.vendor_, "Unknown Vendor");
info.maxClockFrequency_ = 0;
#endif
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
info.profile_ = "FULL_PROFILE";
if (CPU_OPENCL_VERSION < 200) {
info.version_ = "OpenCL 1.2 " AMD_PLATFORM_INFO;
info.oclcVersion_ = "OpenCL C 1.2 ";
} else {
info.version_ = "OpenCL " OPENCL_VERSION_STR " " AMD_PLATFORM_INFO;
info.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
}
info.spirVersions_ = "1.2";
info.partitionCreateInfo_.type_.value_ = 0;
info.partitionProperties_.value_ = 0;
if (info.maxComputeUnits_ > 1) {
info.partitionProperties_.equally_ = 1;
info.partitionProperties_.byCounts_ = 1;
if (info.affinityDomain_.value_ != 0) {
info.partitionProperties_.byAffinityDomain_ = 1;
}
} else {
info.affinityDomain_.value_ = 0;
}
// Copy the name into the boardName data member for CPU implementation.
// ::strncpy(info.boardName_, info.name_, sizeof(info.boardName_));
memset(info.boardName_, 0, sizeof(info.boardName_));
Device* device = new Device();
if (device == NULL || !device->create()) {
delete device;
return false;
}
::snprintf(info.driverVersion_, sizeof(info.driverVersion_) - 1, "%s (%s%s%s)", AMD_BUILD_STRING,
#if defined(ATI_ARCH_X86)
"sse2",
#else // !ATI_ARCH_X86
"",
#endif // !ATI_ARCH_X86
device->hasAVXInstructions() ? ",avx" : "",
device->hasFMA4Instructions() ? ",fma4" : "");
// These will need to change for AVX2
info.preferredVectorWidthChar_ = 16;
info.preferredVectorWidthShort_ = 8;
info.preferredVectorWidthInt_ = 4;
info.preferredVectorWidthLong_ = 2;
if (device->hasAVXInstructions()) {
info.preferredVectorWidthFloat_ = 8;
info.preferredVectorWidthDouble_ = 4;
} else {
info.preferredVectorWidthFloat_ = 4;
info.preferredVectorWidthDouble_ = 2;
}
info.preferredVectorWidthHalf_ = 0; // no half support
// Same here, will need to change for AVX2
info.nativeVectorWidthChar_ = 16;
info.nativeVectorWidthShort_ = 8;
info.nativeVectorWidthInt_ = 4;
info.nativeVectorWidthLong_ = 2;
if (device->hasAVXInstructions()) {
info.nativeVectorWidthFloat_ = 8;
info.nativeVectorWidthDouble_ = 4;
} else {
info.nativeVectorWidthFloat_ = 4;
info.nativeVectorWidthDouble_ = 2;
}
info.nativeVectorWidthHalf_ = 0; // no half support
// Find all supported device extensions
info.extensions_ = device->getExtensionString();
// OpenCL 1.2 device info fields
info.builtInKernels_ = "";
info.preferredInteropUserSync_ = true;
info.printfBufferSize_ = 64 * Ki;
info.maxPipePacketSize_ = info.maxMemAllocSize_;
info.maxPipeActiveReservations_ = 16;
info.maxPipeArgs_ = 16;
info.maxReadWriteImageArgs_ = MaxReadWriteImage;
// Max size should not be bigger than 1.75 GB
const cl_ulong maxSize = std::min(static_cast<cl_ulong>((Gi / 4) * 7), info.maxMemAllocSize_);
info.maxGlobalVariableSize_ = static_cast<size_t>(maxSize);
info.globalVariablePreferredTotalSize_ = static_cast<size_t>(maxSize);
device->info_ = info;
device->registerDevice();
return true;
}
bool Device::create() {
// Create CPU settings
settings_ = new cpu::Settings();
cpu::Settings* cpuSettings = reinterpret_cast<cpu::Settings*>(settings_);
if ((cpuSettings == NULL) || !cpuSettings->create()) {
return false;
}
#if defined(ATI_ARCH_X86)
// Check that we have at least SSE2
if (settings().cpuFeatures_ == 0) {
return false;
}
#endif
return true;
}
bool Device::initSubDevice(device::Info& info, cl_uint maxComputeUnits,
const device::CreateSubDevicesInfo& create_info) {
if (workerThreadsAffinity_ == NULL) {
workerThreadsAffinity_ = new amd::Os::ThreadAffinityMask;
if (workerThreadsAffinity_ == NULL) {
return false;
}
}
info_ = info;
info_.maxComputeUnits_ = maxComputeUnits;
info_.partitionCreateInfo_ = create_info.p_;
if (create_info.p_.type_.value_ == device::PartitionType::BY_COUNTS) {
cl_uint* countsList = new cl_uint[create_info.p_.byCounts_.listSize_];
if (countsList == NULL) {
return false;
}
for (size_t i = 0; i < create_info.p_.byCounts_.listSize_; ++i) {
countsList[i] = create_info.countsListAt(i);
}
info_.partitionCreateInfo_.byCounts_.countsList_ = countsList;
}
// The device cannot be partitioned further
if (maxComputeUnits == 1) {
info_.partitionProperties_.value_ = 0;
info_.affinityDomain_.value_ = 0;
}
return true;
}
void Device::setWorkerThreadsAffinity(cl_uint numWorkerThreads,
const amd::Os::ThreadAffinityMask* threadsAffinityMask,
uint& baseCoreId) {
uint coreId = baseCoreId;
if (threadsAffinityMask == NULL) {
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
++coreId;
workerThreadsAffinity_->set(coreId);
}
} else { // Already has affinity, so filter accordingly
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
coreId = threadsAffinityMask->getNextSet(coreId);
workerThreadsAffinity_->set(coreId);
}
}
baseCoreId = coreId;
}
cl_int Device::createSubDevices(device::CreateSubDevicesInfo& create_info, cl_uint num_entries,
cl_device_id* devices, cl_uint* num_devices) {
switch (create_info.p_.type_.value_) {
case device::PartitionType::EQUALLY:
return partitionEqually(create_info, num_entries, devices, num_devices);
case device::PartitionType::BY_COUNTS:
return partitionByCounts(create_info, num_entries, devices, num_devices);
case device::PartitionType::BY_AFFINITY_DOMAIN:
if (info_.affinityDomain_.value_ == 0) {
return CL_DEVICE_PARTITION_FAILED;
}
if (create_info.p_.byAffinityDomain_.next_) {
create_info.p_.byAffinityDomain_.next_ = 0;
create_info.p_.byAffinityDomain_.value_ =
(1 << amd::leastBitSet(info_.affinityDomain_.value_));
} else {
if ((create_info.p_.byAffinityDomain_.value_ & info_.affinityDomain_.value_) == 0) {
return CL_INVALID_VALUE;
}
}
if (create_info.p_.byAffinityDomain_.numa_) {
return partitionByAffinityDomainNUMA(create_info, num_entries, devices, num_devices);
} else {
return partitionByAffinityDomainCacheLevel(create_info, num_entries, devices, num_devices);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
cl_int Device::partitionEqually(const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) {
cl_uint subComputeUnits = (cl_uint)create_info.p_.equally_.numComputeUnits_;
if (subComputeUnits == 0) {
return CL_INVALID_VALUE;
}
cl_uint numSubDevices = info_.maxComputeUnits_ / subComputeUnits;
if (numSubDevices == 0) {
return CL_DEVICE_PARTITION_FAILED;
}
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (devices != NULL) {
if (num_entries < numSubDevices) {
return CL_INVALID_VALUE;
}
uint coreId = (uint)-1;
while (numSubDevices-- > 0) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || !device->initSubDevice(info_, subComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->setWorkerThreadsAffinity(subComputeUnits, workerThreadsAffinity_, coreId);
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
return CL_SUCCESS;
}
cl_int Device::partitionByCounts(const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries, cl_device_id* devices, cl_uint* num_devices) {
cl_uint maxComputeUnits = 0;
cl_uint numSubDevices = (cl_uint)create_info.p_.byCounts_.listSize_;
for (size_t i = (size_t)numSubDevices; i > 0; --i) {
maxComputeUnits += create_info.countsListAt(i);
}
if (numSubDevices == 0 || maxComputeUnits > info_.maxComputeUnits_) {
return CL_INVALID_DEVICE_PARTITION_COUNT;
}
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (devices != NULL) {
if (num_entries < numSubDevices) {
return CL_INVALID_VALUE;
}
uint coreId = (uint)-1;
while (numSubDevices-- > 0) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
cl_uint subComputeUnits = create_info.countsListAt((size_t)numSubDevices);
if (!device->create() || !device->initSubDevice(info_, subComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->setWorkerThreadsAffinity(subComputeUnits, workerThreadsAffinity_, coreId);
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
return CL_SUCCESS;
}
cl_int Device::partitionByAffinityDomainNUMA(const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries, cl_device_id* devices,
cl_uint* num_devices) {
cl_uint numSubDevices = 0;
#if defined(__linux__)
#if !defined(NUMA_SUPPORT)
return CL_INVALID_VALUE;
#else
int highestNuma = numa_max_node();
if (highestNuma < 0) {
return CL_INVALID_VALUE;
}
numSubDevices = (cl_uint)highestNuma;
if (devices != NULL) {
for (int node = 0; node <= highestNuma; ++node) {
cl_uint subComputeUnits = 0;
int len = 1;
while (true) {
ulong* cpus = alloca(sizeof(ulong) * len);
if (numa_node_to_cpus(node, cpus, len * sizeof(ulong)) < 0) {
if (errno != ERANGE) {
return CL_INVALID_VALUE;
}
len *= 2;
} else {
len *= sizeof(ulong) * 8;
for (int i = 0; i < len; i++) {
if (test_bit(i, cpus)) {
++subComputeUnits;
}
}
break;
}
}
if (subComputeUnits == 0) {
return CL_INVALID_VALUE;
}
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || NULL == (device->numaMask_ = new nodemask_t)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->initSubDevice(info_, subComputeUnits, create_info)) {
delete device->numaMask_;
device->numaMask_ = NULL;
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
nodemask_zero(device->numaMask_);
nodemask_set(device->numaMask_, node);
// Need to remove this domain type
device->info_.affinityDomain_.numa_ = 0;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
#endif // NUMA_SUPPORT
#else // win32
GROUP_AFFINITY numaNodeMask;
ULONG highestNuma = 0;
if (!::GetNumaHighestNodeNumber(&highestNuma)) {
return CL_INVALID_VALUE;
}
for (ULONG node = 0; node <= highestNuma; ++node) {
if (pfnGetNumaNodeProcessorMaskEx != NULL) {
if (!pfnGetNumaNodeProcessorMaskEx((USHORT)node, &numaNodeMask)) {
// Highet NUMA node number is not guaranteed to be the
// number of nodes.
continue;
}
} else {
ULONGLONG tmpMask;
if (!::GetNumaNodeProcessorMask((UCHAR)node, &tmpMask)) {
// Highet NUMA node number is not guaranteed to be the
// number of nodes.
continue;
}
numaNodeMask.Group = 0;
numaNodeMask.Mask = (KAFFINITY)tmpMask;
}
if (workerThreadsAffinity_ != NULL) {
workerThreadsAffinity_->adjust(0, numaNodeMask.Mask);
}
if (numaNodeMask.Mask == 0) {
continue;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() ||
!device->initSubDevice(info_, (cl_uint)amd::countBitsSet(numaNodeMask.Mask),
create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(numaNodeMask.Group, numaNodeMask.Mask);
// Need to remove this domain type
device->info_.affinityDomain_.numa_ = 0;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
}
#endif // win32
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
// Could not get a processor mask for any of the nodes
if (numSubDevices == 0) {
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
#if defined(__linux__)
static bool readFileString(const char* file, char* buf, size_t bufSize) {
int fd = open(file, O_RDONLY);
if (fd < 0) {
return false;
}
struct stat st;
if (fstat(fd, &st) < 0) {
close(fd);
return false;
}
if ((size_t)st.st_size < bufSize) {
bufSize = (size_t)st.st_size;
}
ssize_t n = read(fd, buf, bufSize);
close(fd);
if (n <= 0) {
return false;
}
if (n >= (ssize_t)bufSize) {
n = (ssize_t)bufSize - 1;
}
buf[n] = '\0';
return true;
}
static void parseSharedCpuMap(const char* cpuMap, cpu_set_t& mask) {
CPU_ZERO(&mask);
uint32_t* bits = (uint32_t*)mask.__bits;
const char* s = cpuMap + strlen(cpuMap);
while (true) {
s = (const char*)memrchr(cpuMap, ',', s - cpuMap);
if (!s) {
s = cpuMap;
} else {
s++;
}
*bits++ = strtoul(s, NULL, 16);
if (s == cpuMap) {
return;
}
--s;
}
}
#endif // linux
cl_int Device::partitionByAffinityDomainCacheLevel(const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries, cl_device_id* devices,
cl_uint* num_devices) {
cl_uint cacheLevel = 0;
switch (create_info.p_.byAffinityDomain_.value_) {
case device::AffinityDomain::AFFINITY_DOMAIN_L4_CACHE:
cacheLevel = 4;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L3_CACHE:
cacheLevel = 3;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L2_CACHE:
cacheLevel = 2;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE:
cacheLevel = 1;
break;
default:
return CL_INVALID_VALUE;
}
const uint negAffinityDomain = ~create_info.p_.byAffinityDomain_.value_;
cl_uint numSubDevices = 0;
#if defined(__linux__)
amd::Os::ThreadAffinityMask affinityMask;
if (workerThreadsAffinity_ != NULL) {
affinityMask = *workerThreadsAffinity_;
} else {
for (uint cpuId = 0; cpuId < (uint)info_.maxComputeUnits_; ++cpuId) {
affinityMask.set(cpuId);
}
}
amd::Os::ThreadAffinityMask currentMask;
char buf[1024];
for (uint cpuId = affinityMask.getFirstSet(); cpuId != (uint)-1;
cpuId = affinityMask.getNextSet(cpuId)) {
sprintf(buf, "/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map", cpuId, cacheLevel);
if (!readFileString(buf, buf, sizeof(buf))) {
return CL_INVALID_VALUE;
}
parseSharedCpuMap(buf, currentMask.getNative());
affinityMask.adjust(currentMask.getNative());
if (currentMask.isEmpty()) {
continue;
}
cl_uint maxComputeUnits;
if (cacheLevel > 1) {
maxComputeUnits = 0;
amd::Os::ThreadAffinityMask currentMaskSub;
cl_uint cacheLevelSub = cacheLevel - 1;
for (uint cpuIdSub = affinityMask.getFirstSet(); cpuIdSub != (uint)-1;
cpuIdSub = affinityMask.getNextSet(cpuIdSub)) {
sprintf(buf, "/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map", cpuIdSub,
cacheLevelSub);
if (!readFileString(buf, buf, sizeof(buf))) {
return CL_INVALID_VALUE;
}
parseSharedCpuMap(buf, currentMaskSub.getNative());
currentMask.adjust(currentMaskSub.getNative());
if (!currentMaskSub.isEmpty()) {
++maxComputeUnits;
}
}
if (maxComputeUnits == 0) {
continue;
}
} else {
maxComputeUnits = 1;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || !device->initSubDevice(info_, maxComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(currentMask.getNative());
// Need to remove this domain type
device->info_.affinityDomain_.value_ &= negAffinityDomain;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
affinityMask.clear(currentMask.getNative());
}
#else // win32
DWORD length = 0;
::GetLogicalProcessorInformation(NULL, &length);
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr,
limit = &buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
for (ptr = buffer; ptr < limit; ++ptr) {
PCACHE_DESCRIPTOR cache = &ptr->Cache;
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
if (cache->Level == cacheLevel) {
KAFFINITY affinityMask = (KAFFINITY)ptr->ProcessorMask;
if (workerThreadsAffinity_ != NULL) {
workerThreadsAffinity_->adjust(0, affinityMask);
}
if (affinityMask == 0) {
continue;
}
cl_uint maxComputeUnits;
if (cacheLevel > 1) {
maxComputeUnits = 0;
cl_uint cacheLevelSub = cacheLevel - 1;
for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptrSub = buffer; ptrSub < limit; ++ptrSub) {
PCACHE_DESCRIPTOR cacheSub = &ptrSub->Cache;
if (ptrSub->Relationship == RelationCache && cacheSub->Type != CacheInstruction) {
if (cacheSub->Level == cacheLevelSub &&
((affinityMask & (KAFFINITY)ptrSub->ProcessorMask) != 0)) {
++maxComputeUnits;
}
}
}
if (maxComputeUnits == 0) {
continue;
}
} else {
maxComputeUnits = 1;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
free(buffer);
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || !device->initSubDevice(info_, maxComputeUnits, create_info)) {
free(buffer);
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(0, affinityMask);
// Need to remove this domain type
device->info_.affinityDomain_.value_ &= negAffinityDomain;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
if (numSubDevices >= info_.maxComputeUnits_) {
break;
}
}
}
}
}
free(buffer);
#endif
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (numSubDevices == 0) {
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
device::Program* Device::createProgram(amd::option::Options* options) {
Program* cpuProgram = new Program(*this);
if (cpuProgram == NULL) {
LogError("We failed memory allocation for program!");
}
return cpuProgram;
}
void* Device::allocMapTarget(amd::Memory& mem, const amd::Coord3D& origin,
const amd::Coord3D& region, uint mapFlags, size_t* rowPitch,
size_t* slicePitch) {
if (mem.asImage() != NULL) {
amd::Image* image = mem.asImage();
size_t elementSize = image->getImageFormat().getElementSize();
size_t rp = image->getRowPitch();
size_t sp = image->getSlicePitch();
*rowPitch = rp;
if (slicePitch) {
*slicePitch = sp;
}
return (address)image->getHostMem() +
(origin[0] * elementSize + origin[1] * rp + origin[2] * sp);
} else if (mem.asBuffer() != NULL) {
return (address)mem.getHostMem() + origin[0];
}
return NULL;
}
void Device::freeMapTarget(amd::Memory& mem, void* target) {
// nop for CPU
}
} // namespace cpu