Reinsert __gnu_h2f_ieee and __gnu_f2h_ieee.
Fixes SWDEV-251676 Reverts Ide24d245d851e20961020323e52f33322a33fff9 and adds __attribute__((weak)) to __gnu_h2f_ieee and __gnu_f2h_ieee. Change-Id: If4f9e2f867b56a3f0121a907a49ae4032220d9af
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
e71067a7ec
Коммит
5241c76a9d
@@ -266,3 +266,5 @@ hipMemcpy2DFromArrayAsync
|
||||
hipMemcpyAtoH
|
||||
hipMemcpyHtoA
|
||||
hipMemcpyParam2DAsync
|
||||
__gnu_h2f_ieee
|
||||
__gnu_f2h_ieee
|
||||
|
||||
@@ -178,6 +178,8 @@ global:
|
||||
__hipRegisterSurface;
|
||||
__hipRegisterTexture;
|
||||
__hipUnregisterFatBinary;
|
||||
__gnu_h2f_ieee;
|
||||
__gnu_f2h_ieee;
|
||||
hipConfigureCall;
|
||||
hipSetupArgument;
|
||||
hipLaunchByPtr;
|
||||
|
||||
@@ -644,23 +644,69 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
|
||||
flags));
|
||||
}
|
||||
|
||||
// conversion routines between float and half precision
|
||||
|
||||
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
|
||||
|
||||
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
|
||||
|
||||
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
||||
|
||||
|
||||
// half float, the f16 is in the low 16 bits of the input argument
|
||||
|
||||
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
||||
|
||||
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
||||
|
||||
std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
|
||||
|
||||
u = (a & 0x7fff) != 0 ? v : u;
|
||||
|
||||
return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
|
||||
|
||||
}
|
||||
|
||||
// float half with nearest even rounding
|
||||
// The lower 16 bits of the result is the bit pattern for the f16
|
||||
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
std::uint32_t u = f32_as_u32(a);
|
||||
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
|
||||
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
|
||||
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
||||
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
||||
std::uint32_t s = (u >> 16) & 0x8000;
|
||||
int b = clamp_int(1-e, 0, 13);
|
||||
std::uint32_t d = (0x1000 | m) >> b;
|
||||
d |= (d << b) != (0x1000 | m);
|
||||
std::uint32_t v = e < 1 ? d : n;
|
||||
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
|
||||
v = e > 30 ? 0x7c00 : v;
|
||||
v = e == 143 ? i : v;
|
||||
return s | v;
|
||||
}
|
||||
|
||||
extern "C" __attribute__((weak)) float __gnu_h2f_ieee(unsigned short h){
|
||||
return __convert_half_to_float((std::uint32_t) h);
|
||||
}
|
||||
|
||||
extern "C" __attribute__((weak)) unsigned short __gnu_f2h_ieee(float f){
|
||||
return (unsigned short)__convert_float_to_half(f);
|
||||
}
|
||||
|
||||
void PlatformState::init()
|
||||
{
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
if(initialized_ || g_devices.empty()) {
|
||||
return;
|
||||
}
|
||||
initialized_ = true;
|
||||
|
||||
for (auto& it : statCO_.modules_) {
|
||||
digestFatBinary(it.first, it.second);
|
||||
}
|
||||
|
||||
for (auto &it : statCO_.vars_) {
|
||||
it.second->resize_dVar(g_devices.size());
|
||||
}
|
||||
|
||||
for (auto &it : statCO_.functions_) {
|
||||
it.second->resize_dFunc(g_devices.size());
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user