From 5241c76a9dbb7bc9efb91d31442fed10bc4b704f Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 10 Sep 2020 22:26:49 +0000 Subject: [PATCH] Reinsert __gnu_h2f_ieee and __gnu_f2h_ieee. Fixes SWDEV-251676 Reverts Ide24d245d851e20961020323e52f33322a33fff9 and adds __attribute__((weak)) to __gnu_h2f_ieee and __gnu_f2h_ieee. Change-Id: If4f9e2f867b56a3f0121a907a49ae4032220d9af --- rocclr/hip_hcc.def.in | 2 ++ rocclr/hip_hcc.map.in | 2 ++ rocclr/hip_platform.cpp | 54 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/rocclr/hip_hcc.def.in b/rocclr/hip_hcc.def.in index 7e767a1881..a38cbb0fff 100755 --- a/rocclr/hip_hcc.def.in +++ b/rocclr/hip_hcc.def.in @@ -266,3 +266,5 @@ hipMemcpy2DFromArrayAsync hipMemcpyAtoH hipMemcpyHtoA hipMemcpyParam2DAsync +__gnu_h2f_ieee +__gnu_f2h_ieee diff --git a/rocclr/hip_hcc.map.in b/rocclr/hip_hcc.map.in index 370eda4fb6..c561df11bd 100755 --- a/rocclr/hip_hcc.map.in +++ b/rocclr/hip_hcc.map.in @@ -178,6 +178,8 @@ global: __hipRegisterSurface; __hipRegisterTexture; __hipUnregisterFatBinary; + __gnu_h2f_ieee; + __gnu_f2h_ieee; hipConfigureCall; hipSetupArgument; hipLaunchByPtr; diff --git a/rocclr/hip_platform.cpp b/rocclr/hip_platform.cpp index dc610affdc..2ff505e189 100755 --- a/rocclr/hip_platform.cpp +++ b/rocclr/hip_platform.cpp @@ -644,23 +644,69 @@ hipError_t ihipLaunchKernel(const void* hostFunction, flags)); } +// conversion routines between float and half precision + +static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; } + +static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; } + +static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); } + + +// half float, the f16 is in the low 16 bits of the input argument + +static inline float __convert_half_to_float(std::uint32_t a) noexcept { + + std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U; + + std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U; + + u = (a & 0x7fff) != 0 ? v : u; + + return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/; + +} + +// float half with nearest even rounding +// The lower 16 bits of the result is the bit pattern for the f16 +static inline std::uint32_t __convert_float_to_half(float a) noexcept { + std::uint32_t u = f32_as_u32(a); + int e = static_cast((u >> 23) & 0xff) - 127 + 15; + std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0); + std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0); + std::uint32_t n = ((std::uint32_t)e << 12) | m; + std::uint32_t s = (u >> 16) & 0x8000; + int b = clamp_int(1-e, 0, 13); + std::uint32_t d = (0x1000 | m) >> b; + d |= (d << b) != (0x1000 | m); + std::uint32_t v = e < 1 ? d : n; + v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5)); + v = e > 30 ? 0x7c00 : v; + v = e == 143 ? i : v; + return s | v; +} + +extern "C" __attribute__((weak)) float __gnu_h2f_ieee(unsigned short h){ + return __convert_half_to_float((std::uint32_t) h); +} + +extern "C" __attribute__((weak)) unsigned short __gnu_f2h_ieee(float f){ + return (unsigned short)__convert_float_to_half(f); +} + void PlatformState::init() { amd::ScopedLock lock(lock_); - if(initialized_ || g_devices.empty()) { return; } initialized_ = true; - for (auto& it : statCO_.modules_) { digestFatBinary(it.first, it.second); } - for (auto &it : statCO_.vars_) { it.second->resize_dVar(g_devices.size()); } - for (auto &it : statCO_.functions_) { it.second->resize_dFunc(g_devices.size()); }