Reinsert __gnu_h2f_ieee and __gnu_f2h_ieee.

gargrahul · mangupta · commit d1886b0b342f · 2020-09-14T06:16:18.000-04:00
Fixes SWDEV-251676

Reverts Ide24d245d851e20961020323e52f33322a33fff9 and adds
__attribute__((weak)) to __gnu_h2f_ieee and __gnu_f2h_ieee.

Change-Id: If4f9e2f867b56a3f0121a907a49ae4032220d9af
diff --git a/rocclr/hip_hcc.def.in b/rocclr/hip_hcc.def.in
@@ -264,3 +264,5 @@ hipMemcpy2DFromArrayAsync
 hipMemcpyAtoH
 hipMemcpyHtoA
 hipMemcpyParam2DAsync
+__gnu_h2f_ieee
+__gnu_f2h_ieee
diff --git a/rocclr/hip_hcc.map.in b/rocclr/hip_hcc.map.in
@@ -176,6 +176,8 @@ global:
     __hipRegisterSurface;
     __hipRegisterTexture;
     __hipUnregisterFatBinary;
+    __gnu_h2f_ieee;
+    __gnu_f2h_ieee;
     hipConfigureCall;
     hipSetupArgument;
     hipLaunchByPtr;
diff --git a/rocclr/hip_platform.cpp b/rocclr/hip_platform.cpp
@@ -645,23 +645,69 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
                                     flags));
 }
 
+// conversion routines between float and half precision
+
+static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
+
+static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
+
+static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
+
+
+// half float, the f16 is in the low 16 bits of the input argument
+
+static inline float __convert_half_to_float(std::uint32_t a) noexcept {
+
+  std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
+
+  std::uint32_t v = f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U)/*0x1.0p+112f*/) + 0x38000000U;
+
+  u = (a & 0x7fff) != 0 ? v : u;
+
+  return u32_as_f32(u) * u32_as_f32(0x07800000U)/*0x1.0p-112f*/;
+
+}
+
+// float half with nearest even rounding
+// The lower 16 bits of the result is the bit pattern for the f16
+static inline std::uint32_t __convert_float_to_half(float a) noexcept {
+  std::uint32_t u = f32_as_u32(a);
+  int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
+  std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
+  std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
+  std::uint32_t n = ((std::uint32_t)e << 12) | m;
+  std::uint32_t s = (u >> 16) & 0x8000;
+  int b = clamp_int(1-e, 0, 13);
+  std::uint32_t d = (0x1000 | m) >> b;
+  d |= (d << b) != (0x1000 | m);
+  std::uint32_t v = e < 1 ? d : n;
+  v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
+  v = e > 30 ? 0x7c00 : v;
+  v = e == 143 ? i : v;
+  return s | v;
+}
+
+extern "C" __attribute__((weak)) float  __gnu_h2f_ieee(unsigned short h){
+  return __convert_half_to_float((std::uint32_t) h);
+}
+
+extern "C" __attribute__((weak)) unsigned short  __gnu_f2h_ieee(float f){
+  return (unsigned short)__convert_float_to_half(f);
+}
+
 void PlatformState::init()
 {
   amd::ScopedLock lock(lock_);
-
   if(initialized_ || g_devices.empty()) {
     return;
   }
   initialized_ = true;
-
   for (auto& it : statCO_.modules_) {
     digestFatBinary(it.first, it.second);
   }
-
   for (auto &it : statCO_.vars_) {
     it.second->resize_dVar(g_devices.size());
   }
-
   for (auto &it : statCO_.functions_) {
     it.second->resize_dFunc(g_devices.size());
   }