diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index a249f0efbc..10a5393755 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -240,7 +240,8 @@ if(HIP_PLATFORM STREQUAL "hcc") src/hip_surface.cpp src/hip_intercept.cpp src/env.cpp - src/program_state.cpp) + src/program_state.cpp + src/h2f.cpp) execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") diff --git a/hipamd/src/h2f.cpp b/hipamd/src/h2f.cpp new file mode 100644 index 0000000000..a8c60e7c48 --- /dev/null +++ b/hipamd/src/h2f.cpp @@ -0,0 +1,68 @@ +/* +Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +// conversion routines between float and half precision +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + +// half to float, the f16 is in the low 16 bits of the input argument a +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U; + u = (a & 0x7fff) != 0 ? v : u; + return u32_as_f32(u) * 0x1.0p-112f; +} + +// float to half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +// On machines without fp16 instructions, clang lowers llvm.convert.from.fp16 +// to call of this function. +extern "C" float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +// On machines without fp16 instructions, clang lowers llvm.convert.to.fp16 +// to call of this function. +extern "C" unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +}