Reinsert __gnu_h2f_ieee and __gnu_f2h_ieee.

Fixes SWDEV-251676 Reverts Ide24d245d851e20961020323e52f33322a33fff9 and adds __attribute__((weak)) to __gnu_h2f_ieee and __gnu_f2h_ieee. Change-Id: If4f9e2f867b56a3f0121a907a49ae4032220d9af
2020-09-10 22:26:49 +00:00
@@ -266,3 +266,5 @@ hipMemcpy2DFromArrayAsync
 hipMemcpyAtoH
 hipMemcpyHtoA
 hipMemcpyParam2DAsync
+__gnu_h2f_ieee
+__gnu_f2h_ieee
@@ -178,6 +178,8 @@ global:
    __hipRegisterSurface;
    __hipRegisterTexture;
    __hipUnregisterFatBinary;
+    __gnu_h2f_ieee;
+    __gnu_f2h_ieee;
    hipConfigureCall;
    hipSetupArgument;
    hipLaunchByPtr;
@@ -644,23 +644,69 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
                                    flags));
 }

+// conversion routines between float and half precision
+
+static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
+
+static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
+
+static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
+
+
+// half float, the f16 is in the low 16 bits of the input argument
+
+static inline float __convert_half_to_float(std::uint32_t a) noexcept {
+
+  std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
+
+  std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
+
+  u = (a & 0x7fff) != 0 ? v : u;
+
+  return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
+
+}
+
+// float half with nearest even rounding
+// The lower 16 bits of the result is the bit pattern for the f16
+static inline std::uint32_t __convert_float_to_half(float a) noexcept {
+  std::uint32_t u = f32_as_u32(a);
+  int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
+  std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
+  std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
+  std::uint32_t n = ((std::uint32_t)e << 12) | m;
+  std::uint32_t s = (u >> 16) & 0x8000;
+  int b = clamp_int(1-e, 0, 13);
+  std::uint32_t d = (0x1000 | m) >> b;
+  d |= (d << b) != (0x1000 | m);
+  std::uint32_t v = e < 1 ? d : n;
+  v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
+  v = e > 30 ? 0x7c00 : v;
+  v = e == 143 ? i : v;
+  return s | v;
+}
+
+extern "C" __attribute__((weak)) float  __gnu_h2f_ieee(unsigned short h){
+  return __convert_half_to_float((std::uint32_t) h);
+}
+
+extern "C" __attribute__((weak)) unsigned short  __gnu_f2h_ieee(float f){
+  return (unsigned short)__convert_float_to_half(f);
+}
+
 void PlatformState::init()
 {
  amd::ScopedLock lock(lock_);
-
  if(initialized_ || g_devices.empty()) {
    return;
  }
  initialized_ = true;
-
  for (auto& it : statCO_.modules_) {
    digestFatBinary(it.first, it.second);
  }
-
  for (auto &it : statCO_.vars_) {
    it.second->resize_dVar(g_devices.size());
  }
-
  for (auto &it : statCO_.functions_) {
    it.second->resize_dFunc(g_devices.size());
  }