Files
rocm-systems/rocclr/runtime/device/cpu/cpusettings.cpp
T
2014-07-04 16:17:05 -04:00

105 خطوط
3.1 KiB
C++

//
// Copyright 2011 Advanced Micro Devices, Inc. All rights reserved.
//
#include "device/cpu/cpusettings.hpp"
#include "os/os.hpp"
namespace cpu {
bool
Settings::create()
{
largeHostMemAlloc_ = true;
// This code is temporary until cl_khr_fp64 is unconditional
if (flagIsDefault(CL_KHR_FP64) || CL_KHR_FP64) {
enableExtension(ClKhrFp64);
}
enableExtension(ClAmdFp64);
enableExtension(ClKhrGlobalInt32BaseAtomics);
enableExtension(ClKhrGlobalInt32ExtendedAtomics);
enableExtension(ClKhrLocalInt32BaseAtomics);
enableExtension(ClKhrLocalInt32ExtendedAtomics);
#ifdef _LP64
enableExtension(ClKhrInt64BaseAtomics);
enableExtension(ClKhrInt64ExtendedAtomics);
#endif // _LP64
enableExtension(ClKhrByteAddressableStore);
enableExtension(ClKhrGlSharing);
enableExtension(ClKhrGlEvent);
enableExtension(ClExtDeviceFission);
enableExtension(ClAmdDeviceAttributeQuery);
enableExtension(ClAmdVec3);
enableExtension(ClAmdMediaOps);
enableExtension(ClAmdMediaOps2);
enableExtension(ClAmdPopcnt);
enableExtension(ClAmdPrintf);
// enableExtension(ClKhrSelectFpRoundingMode);
enableExtension(ClKhr3DImageWrites);
// enableExtension(ClKhrFp16);
#if defined(_WIN32)
enableExtension(ClKhrD3d10Sharing);
#endif // _WIN32
enableExtension(ClKhrSpir);
// Enable some OpenCL 2.0 extensions
if (OPENCL_MAJOR >= 2) {
partialDispatch_ = true;
enableExtension(ClKhrSubGroups);
supportDepthsRGB_ = true;
}
// Map CPUID feature bits to our own feature bits
const int sse2_features = CPUFEAT_DX_SSE | CPUFEAT_DX_SSE2;
const int avx_features = CPUFEAT_CX_SSE3 | CPUFEAT_CX_SSSE3 |
CPUFEAT_CX_SSE4_1 | CPUFEAT_CX_SSE4_2 |
CPUFEAT_CX_POPCNT | CPUFEAT_CX_AVX |
CPUFEAT_CX_OSXSAVE;
const int fma3_features = INTEL_CPUFEAT_CX_FMA3;
const int fma4_features = AMD_CPUFEAT_CX_FMA4 | AMD_CPUFEAT_CX_XOP;
int regs[4];
#if defined(ATI_ARCH_X86)
amd::Os::cpuid(regs, 0x0);
bool isAmd = regs[1] == ('A' | ('u' << 8) | ('t' << 16) | ('h' << 24));
bool isIntel = regs[1] == ('G' | ('e' << 8) | ('n' << 16) | ('u' << 24));
amd::Os::cpuid(regs, 0x1);
cpuFeatures_ = (regs[3] & sse2_features) == sse2_features ?
SSE2Instructions : 0;
if ((regs[2] & avx_features) == avx_features) {
// Check for state support
uint64_t xcr0 = amd::Os::xgetbv(0);
// Check for SSE and YMM bits (1 and 2)
if (((uint32_t)xcr0 & 0x6U) == 0x6U) {
cpuFeatures_ |= AVXInstructions;
// Now check for FMA and XOP
if (isIntel) {
cpuFeatures_ |= (regs[2] & fma3_features) == fma3_features ?
FMA3Instructions : 0;
}
if (isAmd) {
amd::Os::cpuid(regs, 0x80000001);
cpuFeatures_ |= (regs[2] & fma4_features) == fma4_features ?
FMA4Instructions : 0;
}
}
}
#endif // ATI_ARCH_X86
return true;
}
} // namespace cpu