Merge pull request #1469 from emankov/master

[HIPIFY] Add supported fp16 functions
This commit is contained in:
Evgeny Mankov
2019-09-26 18:16:57 +03:00
committed by GitHub
2 changed files with 333 additions and 5 deletions
+165
View File
@@ -1874,6 +1874,13 @@ sub countSupportedDeviceFunctions
"__fdividef",
"__ffs",
"__ffsll",
"__float22half2_rn",
"__float2half",
"__float2half2_rn",
"__float2half_rd",
"__float2half_rn",
"__float2half_ru",
"__float2half_rz",
"__float2int_rd",
"__float2int_rn",
"__float2int_ru",
@@ -1892,6 +1899,7 @@ sub countSupportedDeviceFunctions
"__float2ull_rz",
"__float_as_int",
"__float_as_uint",
"__floats2half2_rn",
"__fmaf_rd",
"__fmaf_rn",
"__fmaf_ru",
@@ -1913,19 +1921,127 @@ sub countSupportedDeviceFunctions
"__fsub_rn",
"__fsub_ru",
"__fsub_rz",
"__h2div",
"__hadd",
"__hadd2",
"__hadd2_sat",
"__hadd_sat",
"__half22float2",
"__half2float",
"__half2half2",
"__half2int_rd",
"__half2int_rn",
"__half2int_ru",
"__half2int_rz",
"__half2ll_rd",
"__half2ll_rn",
"__half2ll_ru",
"__half2ll_rz",
"__half2short_rd",
"__half2short_rn",
"__half2short_ru",
"__half2short_rz",
"__half2uint_rd",
"__half2uint_rn",
"__half2uint_ru",
"__half2uint_rz",
"__half2ull_rd",
"__half2ull_rn",
"__half2ull_ru",
"__half2ull_rz",
"__half2ushort_rd",
"__half2ushort_rn",
"__half2ushort_ru",
"__half2ushort_rz",
"__half_as_short",
"__half_as_ushort",
"__halves2half2",
"__hbeq2",
"__hbequ2",
"__hbge2",
"__hbgeu2",
"__hbgt2",
"__hbgtu2",
"__hble2",
"__hbleu2",
"__hblt2",
"__hbltu2",
"__hbne2",
"__hbneu2",
"__hdiv",
"__heq",
"__heq2",
"__hequ",
"__hequ2",
"__hfma",
"__hfma2",
"__hfma2_sat",
"__hfma_sat",
"__hge",
"__hge2",
"__hgeu",
"__hgeu2",
"__hgt",
"__hgt2",
"__hgtu",
"__hgtu2",
"__high2float",
"__high2half",
"__high2half2",
"__highs2half2",
"__hisinf",
"__hisnan",
"__hisnan2",
"__hle",
"__hle2",
"__hleu",
"__hleu2",
"__hlt",
"__hlt2",
"__hltu",
"__hltu2",
"__hmul",
"__hmul2",
"__hmul2_sat",
"__hmul_sat",
"__hne",
"__hne2",
"__hneg",
"__hneg2",
"__hneu",
"__hneu2",
"__hsub",
"__hsub2",
"__hsub2_sat",
"__hsub_sat",
"__int2float_rd",
"__int2float_rn",
"__int2float_ru",
"__int2float_rz",
"__int2half_rn",
"__int2half_ru",
"__int2half_rz",
"__int_as_float",
"__ldca",
"__ldcg",
"__ldcs",
"__ldg",
"__ll2float_rd",
"__ll2float_rn",
"__ll2float_ru",
"__ll2float_rz",
"__ll2half_rd",
"__ll2half_rn",
"__ll2half_ru",
"__ll2half_rz",
"__log10f",
"__log2f",
"__logf",
"__low2float",
"__low2half",
"__low2half2",
"__lowhigh2highlow",
"__lows2half2",
"__mul24",
"__mul64hi",
"__mulhi",
@@ -1935,6 +2051,11 @@ sub countSupportedDeviceFunctions
"__rhadd",
"__sad",
"__saturatef",
"__short2half_rd",
"__short2half_rn",
"__short2half_ru",
"__short2half_rz",
"__short_as_half",
"__sincosf",
"__sinf",
"__syncthreads",
@@ -1946,16 +2067,29 @@ sub countSupportedDeviceFunctions
"__uint2float_rn",
"__uint2float_ru",
"__uint2float_rz",
"__uint2half_rd",
"__uint2half_rn",
"__uint2half_ru",
"__uint2half_rz",
"__uint_as_float",
"__ull2float_rd",
"__ull2float_rn",
"__ull2float_ru",
"__ull2float_rz",
"__ull2half_rd",
"__ull2half_rn",
"__ull2half_ru",
"__ull2half_rz",
"__umul24",
"__umul64hi",
"__umulhi",
"__urhadd",
"__usad",
"__ushort2half_rd",
"__ushort2half_rn",
"__ushort2half_ru",
"__ushort2half_rz",
"__ushort_as_half",
"abs",
"acos",
"acosf",
@@ -2023,6 +2157,36 @@ sub countSupportedDeviceFunctions
"fmodf",
"frexp",
"frexpf",
"h2ceil",
"h2cos",
"h2exp",
"h2exp10",
"h2exp2",
"h2floor",
"h2log",
"h2log10",
"h2log2",
"h2rcp",
"h2rint",
"h2rsqrt",
"h2sin",
"h2sqrt",
"h2trunc",
"hceil",
"hcos",
"hexp",
"hexp10",
"hexp2",
"hfloor",
"hlog",
"hlog10",
"hlog2",
"hrcp",
"hrint",
"hrsqrt",
"hsin",
"hsqrt",
"htrunc",
"hypot",
"hypotf",
"ilogb",
@@ -2257,6 +2421,7 @@ sub warnUnsupportedDeviceFunctions
"__vsubus4",
"_fdsign",
"_ldsign",
"atomicAdd",
"float2int",
"float_as_int",
"float_as_uint",
@@ -432,13 +432,176 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNC_MAP{
{"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
// fp16 functions
{"__float2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__floats2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__low2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__high2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__float22half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half22float2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2int_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2int_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2int_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__int2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__int2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__int2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2short_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2short_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2short_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2short_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__short2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__short2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__short2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__short2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2uint_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2uint_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2uint_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__uint2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__uint2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__uint2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__uint2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ushort_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ushort_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ushort_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ushort_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ushort2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ushort2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ushort2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ushort2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ull_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ull_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ull_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ull2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ull2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ull2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ull2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ll_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ll_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2ll_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ll2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ll2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ll2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ll2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"htrunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hfloor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2trunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2ceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2floor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2rint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__lowhigh2highlow", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__lows2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__highs2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__high2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__low2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hisinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__halves2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__low2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__high2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half_as_short", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__half_as_ushort", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__short_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ushort_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ldg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ldg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ldcg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ldca", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__ldcs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__heq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hle2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hlt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hisnan2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hmul2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__h2div", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hadd2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hsub2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hmul2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hfma2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hfma2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hsub", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hmul", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hdiv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hadd_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hsub_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hmul_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hfma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hfma_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hneg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hble2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hblt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hbgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__heq", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hne", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hle", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hge", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hlt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hequ", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hneu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hleu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgeu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hltu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hgtu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__hisnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hrsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hrcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hlog", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hlog2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hlog10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hexp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hexp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hcos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"hsin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2sqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2rsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2rcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2log", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2log2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2log10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2exp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2exp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2exp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2cos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"h2sin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}},
{"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_down_sync",{"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_down_sync",{"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
{"atomicAdd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
};