Files
rocm-systems/rocclr/runtime/device/cpu/cpudevice.cpp
T
foreman 4a7a1e7598 P4 to Git Change 1127589 by lmoriche@lmoriche_opencl_dev on 2015/03/04 18:08:48
ECR #304775 - Split the legacy compiler library (OpenCL C/C++ 1.x->EDG->LLVM32->AMDIL/x86) and trunk compiler library (OpenCL C 2.x->CLANG->LLVM TOT->HSAIL/x86). The legacy compiler library (amdocl12cl.dll) will be dynamically loaded when building OpenCL 1.x programs, the trunk compiler library will be embedded in the OpenCL runtime.

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/build/Makefile.api#100 edit
... //depot/stg/opencl/drivers/opencl/compiler/Makefile#52 edit
... //depot/stg/opencl/drivers/opencl/compiler/clc/build/Makefile#5 edit
... //depot/stg/opencl/drivers/opencl/compiler/clc/build/Makefile.clc#18 edit
... //depot/stg/opencl/drivers/opencl/compiler/clc/src/build/Makefile#5 edit
... //depot/stg/opencl/drivers/opencl/compiler/clc/src/build/Makefile.clc#15 edit
... //depot/stg/opencl/drivers/opencl/compiler/edg/build/Makefile#4 edit
... //depot/stg/opencl/drivers/opencl/compiler/edg/build/Makefile.edg#9 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/Makefile#31 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/api/v0_8/acl.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/api/v0_8/aclLoaders.cpp#10 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/build/Makefile.common#27 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#61 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/Makefile#45 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/build/Makefile.gpu#27 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/metadata.cpp#6 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/build/Makefile.complib#77 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/complibdefs#39 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/jit/src/Disassembler.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/jit/src/build/Makefile.src#3 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/v0_8/libUtils.h#15 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDIL.h#110 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDIL.td#20 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILAlgorithms.tpp#19 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILAsmBackend.cpp#31 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILAsmBackend.h#24 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILAsmPrinter.cpp#60 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILAsmPrinter.h#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILBarrierDetect.cpp#75 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILBase.td#69 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCFGStructurizer.cpp#193 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCIIOExpansion.cpp#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCIIOExpansion.h#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCIPointerManager.cpp#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCIPointerManager.h#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCallingConv.td#31 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCompilerErrors.cpp#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCompilerErrors.h#27 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCompilerWarnings.cpp#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCompilerWarnings.h#19 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILConversions.td#30 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILCreateKernelStub.cpp#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILDeviceInfo.h#10 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILEBBPass.cpp#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILEGIOExpansion.cpp#101 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILEnumeratedTypes.td#55 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILFixupKinds.h#11 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILFormats.td#77 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILFrameLowering.cpp#20 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILFrameLowering.h#21 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILIOExpansion.cpp#100 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILIOExpansion.h#51 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILISelDAGToDAG.cpp#96 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILISelLowering.cpp#407 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILISelLowering.h#78 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInliner.cpp#59 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstPrinter.cpp#25 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstPrinter.h#16 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstrInfo.cpp#118 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstrInfo.h#37 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstrInfo.td#73 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstrPatterns.td#60 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILInstructions.td#190 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILIntrinsicInfo.cpp#37 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILIntrinsicInfo.h#16 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILIntrinsics.td#103 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernel.h#22 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.cpp#454 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.h#52 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILLLVMApple.h#14 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILLLVMPC.h#18 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILLiteralManager.cpp#34 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMCAsmInfo.cpp#62 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMCAsmInfo.h#20 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMCCodeEmitter.cpp#22 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineDCE.cpp#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineDCE.h#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineEBB.cpp#9 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineEBB.h#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineFunctionInfo.cpp#76 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineFunctionInfo.h#50 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachinePeephole.cpp#57 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineValue.cpp#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMachineValue.h#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMem32.td#32 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMem64.td#32 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILModuleInfo.cpp#59 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILModuleInfo.h#35 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILMultiClass.td#90 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILNodes.td#47 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILOperands.td#30 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPatterns.td#41 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPeepholeOptimizer.cpp#166 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPointerManager.cpp#230 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPointerManager.h#32 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPointerManagerImpl.h#31 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILPrintfConvert.cpp#93 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILProfiles.td#36 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterInfo.cpp#88 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterInfo.h#35 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterInfo.td#72 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUseValidate.cpp#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesINScalar.td#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesINV2.td#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesINV4.td#3 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesScalar.td#13 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesV2.td#13 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRegisterUsesV4.td#13 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILRenumberRegister.cpp#8 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSIIOExpansion.cpp#53 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSIIOExpansion.h#19 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSIPointerManager.cpp#37 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSIPointerManager.h#16 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSubtarget.cpp#208 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSubtarget.h#39 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSwizzleEncoder.cpp#68 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILSwizzleEncoder.h#27 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILTargetMachine.cpp#165 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILTargetMachine.h#26 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILTokenDesc.td#14 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILUtilityFunctions.cpp#128 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILUtilityFunctions.h#71 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILVersion.td#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AddrSpaces.txt#8 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Android.mk#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Apple/AMDIL.td#11 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Apple/AMDILVersion.td#12 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/CMakeLists.txt#15 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Doxyfile#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/GPUMetadataAbi.pdf#8 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILInternal.h#8 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILInternalDeviceFlags.h#13 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILKVDevice.h#7 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILVIInstrInfo.td#2 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILVIInstructions.td#12 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/AMDILVIIntrinsics.td#5 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/CMakeLists.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/InternalProcessors.td#22 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Internal/LLVMBuild.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/LICENSE.TXT#5 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/LLVMBuild.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/MCTargetDesc/AMDILELFObjectWriter.cpp#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/MCTargetDesc/AMDILELFObjectWriter.h#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/MCTargetDesc/CMakeLists.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/MCTargetDesc/LLVMBuild.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/Processors.td#9 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/TD-Class-Flow-new.xmi#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/TD-Class-Flow.xmi#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/TargetInfo/AMDILTargetInfo.cpp#25 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/TargetInfo/CMakeLists.txt#10 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/TargetInfo/LLVMBuild.txt#6 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/build/Makefile.amdil#26 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/generateRegisters.pl#8 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/notes#5 delete
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/Makefile#23 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/llvmdefs#37 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/llvmlibs#36 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Analysis/AMDExportKernelNature.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Linker/AMDFixupKernelModule.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm32/lib/Linker/clpVectorExpansion.cpp#2 edit
... //depot/stg/opencl/drivers/opencl/library/build/Makefile#5 edit
... //depot/stg/opencl/drivers/opencl/library/build/Makefile.library#42 edit
... //depot/stg/opencl/drivers/opencl/library/gpu/7xx/build/Makefile.7xx#15 edit
... //depot/stg/opencl/drivers/opencl/library/gpu/CI/build/Makefile.CI#4 edit
... //depot/stg/opencl/drivers/opencl/library/gpu/SI/build/Makefile.SI#8 edit
... //depot/stg/opencl/drivers/opencl/library/gpu/common/build/Makefile.common#27 edit
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subany.cl#3 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subbar.cl#3 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subbcast.cl#4 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subget.cl#4 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subreduce.cl#3 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/common/src/subgroup/subscan.cl#3 delete
... //depot/stg/opencl/drivers/opencl/library/gpu/gen/build/Makefile.gen#8 edit
... //depot/stg/opencl/drivers/opencl/library/x86/Makefile#7 edit
... //depot/stg/opencl/drivers/opencl/library/x86/avx/build/Makefile.avx#20 edit
... //depot/stg/opencl/drivers/opencl/library/x86/common/build/Makefile.common#36 edit
... //depot/stg/opencl/drivers/opencl/library/x86/common/src/amdrt/build/Makefile.amdrt#22 edit
... //depot/stg/opencl/drivers/opencl/library/x86/fma4/build/Makefile.fma4#13 edit
... //depot/stg/opencl/drivers/opencl/library/x86/gen/build/Makefile.gen#21 edit
... //depot/stg/opencl/drivers/opencl/opencldefs#153 edit
... //depot/stg/opencl/drivers/opencl/openclrules#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/cpu/cpudevice.cpp#270 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/build/Makefile.gpu#59 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#498 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/binary/build/Makefile.binary#16 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/compiler/OCLStructOperations.cpp#14 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/complib/build/Makefile.complib#33 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/module/spir/build/Makefile.spir#8 edit
... //depot/stg/opencl/drivers/opencl/tests/ocltst/testdefs#26 edit
2015-03-05 03:27:00 -05:00

1162 строки
34 KiB
C++

//
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpudevice.hpp"
#include "device/cpu/cpuprogram.hpp"
#include "utils/versions.hpp"
#include "amdocl/cl_common.hpp"
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#if defined(__linux__)
#if !defined(ATI_ARCH_ARM)
#include <sys/sysinfo.h>
#endif // ATI_ARCH_ARM
#include <unistd.h>
#endif
#if defined(_WIN32)
# include <windows.h>
# include <intrin.h>
extern BOOL (WINAPI *pfnGetNumaNodeProcessorMaskEx)(USHORT,PGROUP_AFFINITY);
#endif // _WIN32
namespace cpu {
aclCompiler* Device::compiler_;
size_t Device::maxWorkerThreads_ = (size_t)-1;
Device::~Device()
{
#if defined(__linux__) && defined(NUMA_SUPPORT)
if (getNumaMask() != NULL) {
if (numaMask_ != NULL) {
delete numaMask_;
}
}
else
#endif
if (workerThreadsAffinity_ != NULL) {
delete workerThreadsAffinity_;
}
}
void
Device::tearDown()
{
aclCompilerFini(compiler_);
}
bool
Device::init()
{
// Allow disabling of the CPU device
if (CPU_MAX_COMPUTE_UNITS == 0)
return false;
const char *library = getenv("COMPILER_LIBRARY");
aclCompilerOptions opts = {
sizeof(aclCompilerOptions_0_8),
library ? library : LINUX_ONLY("lib") "amdocl12cl" \
LP64_SWITCH(LINUX_SWITCH("32",""),"64") LINUX_SWITCH(".so",".dll"),
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
&::malloc,
&::free
};
acl_error error;
compiler_ = aclCompilerInit(&opts, &error);
if (error != ACL_SUCCESS) {
LogError("Error initializing the compiler");
return false;
}
device::Info info;
::memset(&info, '\0', sizeof(info));
info.type_ = CL_DEVICE_TYPE_CPU;
info.vendorId_ = 0x1002;
int systemProcessorCount = amd::Os::processorCount();
info.maxComputeUnits_ = systemProcessorCount;
if (!flagIsDefault(CPU_MAX_COMPUTE_UNITS)) {
if ((CPU_MAX_COMPUTE_UNITS <= 0) || (CPU_MAX_COMPUTE_UNITS > systemProcessorCount))
info.maxComputeUnits_ = systemProcessorCount;
else
info.maxComputeUnits_ = CPU_MAX_COMPUTE_UNITS;
}
info.maxWorkItemDimensions_ = 3;
info.maxWorkGroupSize_ = CPU_MAX_WORKGROUP_SIZE;
info.maxWorkItemSizes_[0] = info.maxWorkGroupSize_;
info.maxWorkItemSizes_[1] = info.maxWorkGroupSize_;
info.maxWorkItemSizes_[2] = info.maxWorkGroupSize_;
info.addressBits_ = LP64_SWITCH(32,64);
if (CPU_IMAGE_SUPPORT) {
info.imageSupport_ = CL_TRUE;
info.maxReadImageArgs_ = MaxReadImage;
info.maxWriteImageArgs_ = MaxWriteImage;
info.image2DMaxWidth_ = 8 * Ki;
info.image2DMaxHeight_ = 8 * Ki;
info.image3DMaxWidth_ = 2 * Ki;
info.image3DMaxHeight_ = 2 * Ki;
info.image3DMaxDepth_ = 2 * Ki;
info.maxSamplers_ = MaxSamplers;
// OpenCL 1.2 device info fields
info.imageMaxBufferSize_ = 64 * Ki;
info.imageMaxArraySize_ = 2 * Ki;
info.imagePitchAlignment_ = 0;
info.imageBaseAddressAlignment_ = 0;
info.bufferFromImageSupport_ = CL_FALSE;
}
info.maxParameterSize_ = 4*Ki;
info.memBaseAddrAlign_ = 8 * (flagIsDefault(MEMOBJ_BASE_ADDR_ALIGN) ?
sizeof(cl_long16) : MEMOBJ_BASE_ADDR_ALIGN);
info.minDataTypeAlignSize_ = sizeof(cl_long16);
info.singleFPConfig_ =
CL_FP_DENORM | CL_FP_INF_NAN |
CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO |
CL_FP_ROUND_TO_INF | CL_FP_FMA;
info.doubleFPConfig_ = info.singleFPConfig_;
info.singleFPConfig_ |= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
info.affinityDomain_.value_ = 0;
info.affinityDomain_.next_ = 1;
info.globalMemCacheType_ = CL_READ_WRITE_CACHE;
#if defined(__linux__)
info.globalMemCacheLineSize_ = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
info.globalMemCacheSize_ = sysconf(_SC_LEVEL1_DCACHE_SIZE);
info.affinityDomain_.cacheL1_ = 1;
if (sysconf(_SC_LEVEL2_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL2_ = 1;
}
if (sysconf(_SC_LEVEL3_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL3_ = 1;
}
if (sysconf(_SC_LEVEL4_CACHE_SIZE) > 0) {
info.affinityDomain_.cacheL4_ = 1;
}
#if defined(NUMA_SUPPORT)
if (numa_available() != -1 && numa_max_node() => 0) {
info.affinityDomain_.numa_ = 1;
}
#endif
#else // win32
DWORD length = 0;
::GetLogicalProcessorInformation(NULL, &length);
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc(length);
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
bool found = false;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr, limit =
&buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
for (ptr = buffer; ptr < limit; ++ptr) {
PCACHE_DESCRIPTOR cache = &ptr->Cache;
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
info.affinityDomain_.value_ |=
(device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE << 1) >>
cache->Level;
if (!found && cache->Level == 1) {
info.globalMemCacheLineSize_ = cache->LineSize;
info.globalMemCacheSize_ = cache->Size;
found = true;
}
}
}
}
free(buffer);
ULONG highestNuma = 0;
if (::GetNumaHighestNodeNumber(&highestNuma) && highestNuma != 0) {
info.affinityDomain_.numa_ = 1;
}
#endif
uintptr_t virtualMemSize;
#if defined(__linux__)
#if !defined(ATI_ARCH_ARM)
struct sysinfo si;
if (sysinfo(&si) != 0) {
return false;
}
if (si.mem_unit == 0) {
// Linux kernels prior to 2.3.23 return sizes in bytes.
si.mem_unit = 1;
}
info.globalMemSize_ = (cl_ulong) si.totalram * si.mem_unit;
#else
info.globalMemSize_ = 0;
#endif
virtualMemSize = (uintptr_t) info.globalMemSize_;
#else
MEMORYSTATUSEX statex;
statex.dwLength = sizeof (statex);
if (GlobalMemoryStatusEx (&statex) == 0) {
return false;
}
info.globalMemSize_ = (cl_ulong) statex.ullTotalPhys;
virtualMemSize =
(uintptr_t) std::min(statex.ullTotalPageFile, statex.ullTotalVirtual);
#endif
maxWorkerThreads_ = (size_t) (virtualMemSize /
(uintptr_t) ((CPU_WORKER_THREAD_STACK_SIZE +
CLK_PRIVATE_MEMORY_SIZE * (CPU_MAX_WORKGROUP_SIZE + 1))) *
7 / 10);
#if defined(_LP64)
// Cap at 8TiB for 64-bit
const cl_ulong maxGlobalMemSize = 8ULL*Ki*Gi;
#elif defined(_WIN32)
// Cap at 2GiB (see http://msdn.microsoft.com/en-us/library/aa366778.aspx)
const cl_ulong maxGlobalMemSize = 2ULL*Gi;
#else // linux
// Cap at 3.5GiB
const cl_ulong maxGlobalMemSize = 3584ULL*Mi;
#endif
info.globalMemSize_ = std::min(info.globalMemSize_, maxGlobalMemSize);
info.maxMemAllocSize_ = info.globalMemSize_ * CPU_MAX_ALLOC_PERCENT / 100;
if (flagIsDefault(CPU_MAX_ALLOC_PERCENT)) {
const cl_ulong minAllocSize = LP64_SWITCH(1ULL*Gi, 2ULL*Gi);
info.maxMemAllocSize_ = std::max(info.maxMemAllocSize_,
std::min(info.globalMemSize_, minAllocSize));
}
info.maxConstantBufferSize_ = 64*Ki;
info.maxConstantArgs_ = 8;
info.localMemType_ = CL_GLOBAL;
info.localMemSize_ = std::max((cl_ulong)32*Ki, info.globalMemCacheSize_/2);
info.errorCorrectionSupport_ = CL_FALSE;
info.hostUnifiedMemory_ = CL_TRUE;
info.profilingTimerResolution_ = (size_t)amd::Os::timerResolutionNanos();
info.profilingTimerOffset_ = amd::Os::offsetToEpochNanos();
info.littleEndian_ = CL_TRUE;
info.available_ = CL_TRUE;
info.compilerAvailable_ = CL_TRUE;
info.linkerAvailable_ = CL_TRUE;
info.executionCapabilities_ = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL;
// Enable SVM only for OpenCL 2.0
if (((OPENCL_MAJOR >= 2) && (CPU_OPENCL_VERSION >= 200)) || OCL_FORCE_CPU_SVM) {
info.svmCapabilities_ = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
CL_DEVICE_SVM_ATOMICS;
}
info.preferredPlatformAtomicAlignment_ = 0;
info.preferredGlobalAtomicAlignment_ = 0;
info.preferredLocalAtomicAlignment_ = 0;
info.queueProperties_ = CL_QUEUE_PROFILING_ENABLE;
info.platform_ = AMD_PLATFORM;
#if defined(__linux__)
std::ifstream ifs("/proc/cpuinfo", std::ios::in);
if (ifs.is_open()) {
std::string line;
bool vendor = false;
bool name = false;
bool freq = false;
while (std::getline(ifs, line) && !(vendor && name && freq)) {
if (!vendor && (line.find("vendor_id\t: ")
!= std::string::npos)) {
::strcpy(
info.vendor_,
line.substr(line.find_first_of(':') + 2).c_str());
vendor = true;
}
else if (!name && (line.find("model name\t: ") != std::string::npos
|| line.find("Processor\t: ") != std::string::npos)) {
::strcpy(
info.name_,
line.substr(line.find_first_of(':') + 2).c_str());
name = true;
}
else if (!freq && (line.find("cpu MHz\t\t: ")
!= std::string::npos)) {
info.maxClockFrequency_ =
::atoi(line.substr(line.find_first_of(':') + 2).c_str());
freq = true;
}
}
ifs.close();
}
#elif defined(_WIN32)
int CPUInfo[4] = {-1};
int nRet = 0;
unsigned nIds, nExIds, i;
// cpuid with an InfoType argument of 0 returns the number of
// valid Ids in CPUInfo[0] and the CPU identification string in
// the other three array elements. The CPU identification string is
// not in linear order. The code below arranges the information
// in a human readable form.
amd::Os::cpuid(CPUInfo, 0);
nIds = CPUInfo[0];
memset(info.vendor_, 0, sizeof(info.vendor_));
*((int*)(info.vendor_+0)) = CPUInfo[1];
*((int*)(info.vendor_+4)) = CPUInfo[3];
*((int*)(info.vendor_+8)) = CPUInfo[2];
// Calling cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
amd::Os::cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(info.name_, 0, sizeof(info.name_));
sprintf(info.name_, "Unknown Processor");
// Get the information associated with each extended ID.
for (i=0x80000000; i<=nExIds; ++i)
{
amd::Os::cpuid(CPUInfo, i);
// Interpret CPU brand string and cache information.
if (i == 0x80000002)
memcpy(info.name_, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000003)
memcpy(info.name_ + 16, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000004)
memcpy(info.name_ + 32, CPUInfo, sizeof(CPUInfo));
}
info.maxClockFrequency_ = 0;
HKEY hKey;
// Open the key
if (RegOpenKeyEx(
HKEY_LOCAL_MACHINE,
"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0\\",
0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) {
// Read the value
DWORD dwLen = 4;
RegQueryValueEx(
hKey, "~MHz", NULL, NULL,
(LPBYTE)&info.maxClockFrequency_, &dwLen);
// Cleanup and return
RegCloseKey(hKey);
}
#else
::strcpy(info.name_, "Unknown Processor");
::strcpy(info.vendor_, "Unknown Vendor");
info.maxClockFrequency_ = 0;
#endif
#define OPENCL_VERSION_STR XSTR(OPENCL_MAJOR) "." XSTR(OPENCL_MINOR)
info.profile_ = "FULL_PROFILE";
if (CPU_OPENCL_VERSION < 200) {
info.version_ = "OpenCL 1.2 " AMD_PLATFORM_INFO;
info.oclcVersion_ = "OpenCL C 1.2 ";
}
else {
info.version_ = "OpenCL " OPENCL_VERSION_STR " " AMD_PLATFORM_INFO;
info.oclcVersion_ = "OpenCL C " OPENCL_VERSION_STR " ";
}
info.spirVersions_ = "1.2";
#if cl_amd_open_video
info.openVideo_ = CL_FALSE;
#endif // cl_amd_open_video
info.partitionCreateInfo_.type_.value_ = 0;
info.partitionProperties_.value_ = 0;
if (info.maxComputeUnits_ > 1) {
info.partitionProperties_.equally_ = 1;
info.partitionProperties_.byCounts_ = 1;
if (info.affinityDomain_.value_ != 0) {
info.partitionProperties_.byAffinityDomain_ = 1;
}
}
else {
info.affinityDomain_.value_ = 0;
}
// Copy the name into the boardName data member for CPU implementation.
// ::strncpy(info.boardName_, info.name_, sizeof(info.boardName_));
memset(info.boardName_, 0, sizeof(info.boardName_));
Device* device = new Device();
if (device == NULL || !device->create()) {
delete device;
return false;
}
::snprintf(info.driverVersion_, sizeof(info.driverVersion_) - 1,
"%s (%s%s%s)", AMD_BUILD_STRING,
#if defined(ATI_ARCH_X86)
"sse2",
#else // !ATI_ARCH_X86
"",
#endif // !ATI_ARCH_X86
device->hasAVXInstructions() ? ",avx" : "",
device->hasFMA4Instructions() ? ",fma4" : "");
// These will need to change for AVX2
info.preferredVectorWidthChar_ = 16;
info.preferredVectorWidthShort_ = 8;
info.preferredVectorWidthInt_ = 4;
info.preferredVectorWidthLong_ = 2;
if (device->hasAVXInstructions()) {
info.preferredVectorWidthFloat_ = 8;
info.preferredVectorWidthDouble_ = 4;
} else {
info.preferredVectorWidthFloat_ = 4;
info.preferredVectorWidthDouble_ = 2;
}
info.preferredVectorWidthHalf_ = 0; // no half support
// Same here, will need to change for AVX2
info.nativeVectorWidthChar_ = 16;
info.nativeVectorWidthShort_ = 8;
info.nativeVectorWidthInt_ = 4;
info.nativeVectorWidthLong_ = 2;
if (device->hasAVXInstructions()) {
info.nativeVectorWidthFloat_ = 8;
info.nativeVectorWidthDouble_ = 4;
} else {
info.nativeVectorWidthFloat_ = 4;
info.nativeVectorWidthDouble_ = 2;
}
info.nativeVectorWidthHalf_ = 0; // no half support
// Find all supported device extensions
info.extensions_ = device->getExtensionString();
// OpenCL 1.2 device info fields
info.builtInKernels_ = "";
info.preferredInteropUserSync_ = true;
info.printfBufferSize_ = 64*Ki;
info.maxPipePacketSize_ = info.maxMemAllocSize_;
info.maxPipeActiveReservations_ = 16;
info.maxPipeArgs_ = 16;
info.maxReadWriteImageArgs_ = MaxReadWriteImage;
// Max size should not be bigger than 1.75 GB
const cl_ulong maxSize = std::min(static_cast<cl_ulong>((Gi/4)*7),
info.maxMemAllocSize_);
info.maxGlobalVariableSize_ = static_cast<size_t>(maxSize);
info.globalVariablePreferredTotalSize_ = static_cast<size_t>(maxSize);
device->info_ = info;
device->registerDevice();
return true;
}
bool
Device::create()
{
// Create CPU settings
settings_ = new cpu::Settings();
cpu::Settings* cpuSettings = reinterpret_cast<cpu::Settings*>(settings_);
if ((cpuSettings == NULL) || !cpuSettings->create()) {
return false;
}
#if defined(ATI_ARCH_X86)
// Check that we have at least SSE2
if (settings().cpuFeatures_ == 0) {
return false;
}
#endif
return true;
}
bool
Device::initSubDevice(
device::Info& info,
cl_uint maxComputeUnits,
const device::CreateSubDevicesInfo& create_info)
{
if (workerThreadsAffinity_ == NULL) {
workerThreadsAffinity_ = new amd::Os::ThreadAffinityMask;
if (workerThreadsAffinity_ == NULL) {
return false;
}
}
info_ = info;
info_.maxComputeUnits_ = maxComputeUnits;
info_.partitionCreateInfo_ = create_info.p_;
if (create_info.p_.type_.value_ == device::PartitionType::BY_COUNTS) {
cl_uint* countsList = new cl_uint[create_info.p_.byCounts_.listSize_];
if (countsList == NULL) {
return false;
}
for (size_t i = 0; i < create_info.p_.byCounts_.listSize_; ++i) {
countsList[i] = create_info.countsListAt(i);
}
info_.partitionCreateInfo_.byCounts_.countsList_ = countsList;
}
// The device cannot be partitioned further
if (maxComputeUnits == 1) {
info_.partitionProperties_.value_ = 0;
info_.affinityDomain_.value_ = 0;
}
return true;
}
void
Device::setWorkerThreadsAffinity(
cl_uint numWorkerThreads,
const amd::Os::ThreadAffinityMask* threadsAffinityMask,
uint& baseCoreId)
{
uint coreId = baseCoreId;
if (threadsAffinityMask == NULL) {
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
++coreId;
workerThreadsAffinity_->set(coreId);
}
}
else { // Already has affinity, so filter accordingly
for (cl_uint i = 0; i < numWorkerThreads; ++i) {
coreId = threadsAffinityMask->getNextSet(coreId);
workerThreadsAffinity_->set(coreId);
}
}
baseCoreId = coreId;
}
cl_int
Device::createSubDevices(
device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices)
{
switch (create_info.p_.type_.value_) {
case device::PartitionType::EQUALLY:
return partitionEqually(
create_info, num_entries, devices, num_devices);
case device::PartitionType::BY_COUNTS:
return partitionByCounts(
create_info, num_entries, devices, num_devices);
case device::PartitionType::BY_AFFINITY_DOMAIN:
if (info_.affinityDomain_.value_ == 0) {
return CL_DEVICE_PARTITION_FAILED;
}
if (create_info.p_.byAffinityDomain_.next_) {
create_info.p_.byAffinityDomain_.next_ = 0;
create_info.p_.byAffinityDomain_.value_ =
(1 << amd::leastBitSet(info_.affinityDomain_.value_));
}
else {
if ((create_info.p_.byAffinityDomain_.value_ &
info_.affinityDomain_.value_) == 0) {
return CL_INVALID_VALUE;
}
}
if (create_info.p_.byAffinityDomain_.numa_) {
return partitionByAffinityDomainNUMA(
create_info, num_entries, devices, num_devices);
}
else {
return partitionByAffinityDomainCacheLevel(
create_info, num_entries, devices, num_devices);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
cl_int
Device::partitionEqually(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices)
{
cl_uint subComputeUnits =
(cl_uint)create_info.p_.equally_.numComputeUnits_;
if (subComputeUnits == 0) {
return CL_INVALID_VALUE;
}
cl_uint numSubDevices = info_.maxComputeUnits_ / subComputeUnits;
if (numSubDevices == 0) {
return CL_DEVICE_PARTITION_FAILED;
}
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (devices != NULL) {
if (num_entries < numSubDevices) {
return CL_INVALID_VALUE;
}
uint coreId = (uint)-1;
while (numSubDevices-- > 0) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() ||
!device->initSubDevice(info_, subComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->setWorkerThreadsAffinity(
subComputeUnits, workerThreadsAffinity_, coreId);
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
return CL_SUCCESS;
}
cl_int
Device::partitionByCounts(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices)
{
cl_uint maxComputeUnits = 0;
cl_uint numSubDevices = (cl_uint)create_info.p_.byCounts_.listSize_;
for (size_t i = (size_t)numSubDevices; i > 0; --i) {
maxComputeUnits += create_info.countsListAt(i);
}
if (numSubDevices == 0 || maxComputeUnits > info_.maxComputeUnits_) {
return CL_INVALID_DEVICE_PARTITION_COUNT;
}
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (devices != NULL) {
if (num_entries < numSubDevices) {
return CL_INVALID_VALUE;
}
uint coreId = (uint)-1;
while (numSubDevices-- > 0) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
cl_uint subComputeUnits =
create_info.countsListAt((size_t)numSubDevices);
if (!device->create() ||
!device->initSubDevice(info_, subComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->setWorkerThreadsAffinity(
subComputeUnits, workerThreadsAffinity_, coreId);
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
return CL_SUCCESS;
}
cl_int
Device::partitionByAffinityDomainNUMA(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices)
{
cl_uint numSubDevices = 0;
#if defined(__linux__)
#if !defined(NUMA_SUPPORT)
return CL_INVALID_VALUE;
#else
int highestNuma = numa_max_node();
if (highestNuma < 0) {
return CL_INVALID_VALUE;
}
numSubDevices = (cl_uint)highestNuma;
if (devices != NULL) {
for (int node = 0; node <= highestNuma; ++node) {
cl_uint subComputeUnits = 0;
int len = 1;
while (true) {
ulong* cpus = alloca(sizeof(ulong)*len);
if (numa_node_to_cpus(node, cpus, len * sizeof(ulong)) < 0) {
if (errno != ERANGE) {
return CL_INVALID_VALUE;
}
len *= 2;
}
else {
len *= sizeof(ulong) * 8;
for (int i = 0; i < len; i++) {
if (test_bit(i, cpus)) {
++subComputeUnits;
}
}
break;
}
}
if (subComputeUnits == 0) {
return CL_INVALID_VALUE;
}
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || NULL == (device->numaMask_ = new nodemask_t)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->initSubDevice(
info_, subComputeUnits, create_info)) {
delete device->numaMask_;
device->numaMask_ = NULL;
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
nodemask_zero(device->numaMask_);
nodemask_set(device->numaMask_, node);
// Need to remove this domain type
device->info_.affinityDomain_.numa_ = 0;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
}
#endif // NUMA_SUPPORT
#else // win32
GROUP_AFFINITY numaNodeMask;
ULONG highestNuma = 0;
if (!::GetNumaHighestNodeNumber(&highestNuma)) {
return CL_INVALID_VALUE;
}
for (ULONG node = 0; node <= highestNuma; ++node) {
if (pfnGetNumaNodeProcessorMaskEx != NULL) {
if (!pfnGetNumaNodeProcessorMaskEx((USHORT)node, &numaNodeMask)) {
// Highet NUMA node number is not guaranteed to be the
// number of nodes.
continue;
}
}
else {
ULONGLONG tmpMask;
if (!::GetNumaNodeProcessorMask((UCHAR)node, &tmpMask)) {
// Highet NUMA node number is not guaranteed to be the
// number of nodes.
continue;
}
numaNodeMask.Group = 0;
numaNodeMask.Mask = (KAFFINITY)tmpMask;
}
if (workerThreadsAffinity_ != NULL) {
workerThreadsAffinity_->adjust(0, numaNodeMask.Mask);
}
if (numaNodeMask.Mask == 0) {
continue;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || !device->initSubDevice(info_,
(cl_uint)amd::countBitsSet(numaNodeMask.Mask), create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(
numaNodeMask.Group, numaNodeMask.Mask);
// Need to remove this domain type
device->info_.affinityDomain_.numa_ = 0;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
}
#endif // win32
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
// Could not get a processor mask for any of the nodes
if (numSubDevices == 0) {
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
#if defined(__linux__)
static bool
readFileString(const char* file, char* buf, size_t bufSize)
{
int fd = open(file, O_RDONLY);
if (fd < 0) {
return false;
}
struct stat st;
if (fstat(fd, &st) < 0) {
close(fd);
return false;
}
if ((size_t)st.st_size < bufSize) {
bufSize = (size_t)st.st_size;
}
ssize_t n = read(fd, buf, bufSize);
close(fd);
if (n <= 0) {
return false;
}
if (n >= (ssize_t)bufSize) {
n = (ssize_t)bufSize - 1;
}
buf[n] = '\0';
return true;
}
static void
parseSharedCpuMap(const char* cpuMap, cpu_set_t& mask)
{
CPU_ZERO(&mask);
uint32_t* bits = (uint32_t*)mask.__bits;
const char* s = cpuMap + strlen(cpuMap);
while (true) {
s = (const char*)memrchr(cpuMap, ',', s - cpuMap);
if (!s) {
s = cpuMap;
}
else {
s++;
}
*bits++ = strtoul(s, NULL, 16);
if (s == cpuMap) {
return;
}
--s;
}
}
#endif // linux
cl_int
Device::partitionByAffinityDomainCacheLevel(
const device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices)
{
cl_uint cacheLevel = 0;
switch (create_info.p_.byAffinityDomain_.value_) {
case device::AffinityDomain::AFFINITY_DOMAIN_L4_CACHE:
cacheLevel = 4;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L3_CACHE:
cacheLevel = 3;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L2_CACHE:
cacheLevel = 2;
break;
case device::AffinityDomain::AFFINITY_DOMAIN_L1_CACHE:
cacheLevel = 1;
break;
default:
return CL_INVALID_VALUE;
}
const uint negAffinityDomain =
~create_info.p_.byAffinityDomain_.value_;
cl_uint numSubDevices = 0;
#if defined(__linux__)
amd::Os::ThreadAffinityMask affinityMask;
if (workerThreadsAffinity_ != NULL) {
affinityMask = *workerThreadsAffinity_;
}
else {
for (uint cpuId = 0; cpuId < (uint)info_.maxComputeUnits_; ++cpuId) {
affinityMask.set(cpuId);
}
}
amd::Os::ThreadAffinityMask currentMask;
char buf[1024];
for (uint cpuId = affinityMask.getFirstSet();
cpuId != (uint)-1;
cpuId = affinityMask.getNextSet(cpuId)) {
sprintf(buf,
"/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map",
cpuId, cacheLevel);
if (!readFileString(buf, buf, sizeof(buf))) {
return CL_INVALID_VALUE;
}
parseSharedCpuMap(buf, currentMask.getNative());
affinityMask.adjust(currentMask.getNative());
if (currentMask.isEmpty()) {
continue;
}
cl_uint maxComputeUnits;
if (cacheLevel > 1) {
maxComputeUnits = 0;
amd::Os::ThreadAffinityMask currentMaskSub;
cl_uint cacheLevelSub = cacheLevel - 1;
for (uint cpuIdSub = affinityMask.getFirstSet();
cpuIdSub != (uint)-1;
cpuIdSub = affinityMask.getNextSet(cpuIdSub)) {
sprintf(buf,
"/sys/devices/system/cpu/cpu%u/cache/index%u/shared_cpu_map",
cpuIdSub, cacheLevelSub);
if (!readFileString(buf, buf, sizeof(buf))) {
return CL_INVALID_VALUE;
}
parseSharedCpuMap(buf, currentMaskSub.getNative());
currentMask.adjust(currentMaskSub.getNative());
if (!currentMaskSub.isEmpty()) {
++maxComputeUnits;
}
}
if (maxComputeUnits == 0) {
continue;
}
}
else {
maxComputeUnits = 1;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() ||
!device->initSubDevice(info_, maxComputeUnits, create_info)) {
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(currentMask.getNative());
// Need to remove this domain type
device->info_.affinityDomain_.value_ &= negAffinityDomain;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
affinityMask.clear(currentMask.getNative());
}
#else // win32
DWORD length = 0;
::GetLogicalProcessorInformation(NULL, &length);
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer =
(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc(length);
if (buffer != NULL && ::GetLogicalProcessorInformation(buffer, &length)) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr, limit =
&buffer[length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION)];
for (ptr = buffer; ptr < limit; ++ptr) {
PCACHE_DESCRIPTOR cache = &ptr->Cache;
if (ptr->Relationship == RelationCache && cache->Type != CacheInstruction) {
if (cache->Level == cacheLevel) {
KAFFINITY affinityMask = (KAFFINITY)ptr->ProcessorMask;
if (workerThreadsAffinity_ != NULL) {
workerThreadsAffinity_->adjust(0, affinityMask);
}
if (affinityMask == 0) {
continue;
}
cl_uint maxComputeUnits;
if (cacheLevel > 1) {
maxComputeUnits = 0;
cl_uint cacheLevelSub = cacheLevel - 1;
for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION
ptrSub = buffer; ptrSub < limit; ++ptrSub) {
PCACHE_DESCRIPTOR cacheSub = &ptrSub->Cache;
if (ptrSub->Relationship == RelationCache &&
cacheSub->Type != CacheInstruction) {
if (cacheSub->Level == cacheLevelSub &&
((affinityMask & (KAFFINITY)ptrSub->ProcessorMask) != 0)) {
++maxComputeUnits;
}
}
}
if (maxComputeUnits == 0) {
continue;
}
}
else {
maxComputeUnits = 1;
}
if (devices != NULL) {
Device* device = new Device(this);
if (device == NULL) {
free(buffer);
return CL_OUT_OF_HOST_MEMORY;
}
if (!device->create() || !device->initSubDevice(info_,
maxComputeUnits, create_info)) {
free(buffer);
device->release();
return CL_OUT_OF_HOST_MEMORY;
}
device->workerThreadsAffinity_->set(0, affinityMask);
// Need to remove this domain type
device->info_.affinityDomain_.value_ &= negAffinityDomain;
*devices++ = as_cl(static_cast<amd::Device*>(device));
}
numSubDevices++;
if (numSubDevices >= info_.maxComputeUnits_) {
break;
}
}
}
}
}
free(buffer);
#endif
if (num_devices != NULL) {
*num_devices = numSubDevices;
}
if (numSubDevices == 0) {
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
device::Program*
Device::createProgram(int oclVer)
{
Program* cpuProgram = new Program(*this);
if (cpuProgram == NULL) {
LogError("We failed memory allocation for program!");
}
return cpuProgram;
}
void*
Device::allocMapTarget(
amd::Memory& mem,
const amd::Coord3D& origin,
const amd::Coord3D& region,
uint mapFlags,
size_t* rowPitch,
size_t* slicePitch)
{
if (mem.asImage() != NULL) {
amd::Image * image = mem.asImage();
size_t elementSize = image->getImageFormat().getElementSize();
size_t rp = image->getRowPitch();
size_t sp = image->getSlicePitch();
*rowPitch = rp;
if (slicePitch) {
*slicePitch = sp;
}
return (address) image->getHostMem()
+ (origin[0] * elementSize + origin[1] * rp + origin[2] * sp);
}
else if (mem.asBuffer() != NULL) {
return (address) mem.getHostMem() + origin[0];
}
return NULL;
}
void
Device::freeMapTarget(amd::Memory& mem, void* target)
{
// nop for CPU
}
} // namespace cpu