@@ -645,23 +645,69 @@ hipError_t ihipLaunchKernel(const void* hostFunction,
645645 flags));
646646}
647647
648+ // conversion routines between float and half precision
649+
650+ static inline std::uint32_t f32_as_u32 (float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u ; }
651+
652+ static inline float u32_as_f32 (std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f ; }
653+
654+ static inline int clamp_int (int i, int l, int h) { return std::min (std::max (i, l), h); }
655+
656+
657+ // half float, the f16 is in the low 16 bits of the input argument
658+
659+ static inline float __convert_half_to_float (std::uint32_t a) noexcept {
660+
661+ std::uint32_t u = ((a << 13 ) + 0x70000000U ) & 0x8fffe000U ;
662+
663+ std::uint32_t v = f32_as_u32 (u32_as_f32 (u) * u32_as_f32 (0x77800000U )/* 0x1.0p+112f*/ ) + 0x38000000U ;
664+
665+ u = (a & 0x7fff ) != 0 ? v : u;
666+
667+ return u32_as_f32 (u) * u32_as_f32 (0x07800000U )/* 0x1.0p-112f*/ ;
668+
669+ }
670+
671+ // float half with nearest even rounding
672+ // The lower 16 bits of the result is the bit pattern for the f16
673+ static inline std::uint32_t __convert_float_to_half (float a) noexcept {
674+ std::uint32_t u = f32_as_u32 (a);
675+ int e = static_cast <int >((u >> 23 ) & 0xff ) - 127 + 15 ;
676+ std::uint32_t m = ((u >> 11 ) & 0xffe ) | ((u & 0xfff ) != 0 );
677+ std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0 );
678+ std::uint32_t n = ((std::uint32_t )e << 12 ) | m;
679+ std::uint32_t s = (u >> 16 ) & 0x8000 ;
680+ int b = clamp_int (1 -e, 0 , 13 );
681+ std::uint32_t d = (0x1000 | m) >> b;
682+ d |= (d << b) != (0x1000 | m);
683+ std::uint32_t v = e < 1 ? d : n;
684+ v = (v >> 2 ) + (((v & 0x7 ) == 3 ) | ((v & 0x7 ) > 5 ));
685+ v = e > 30 ? 0x7c00 : v;
686+ v = e == 143 ? i : v;
687+ return s | v;
688+ }
689+
690+ extern " C" __attribute__((weak)) float __gnu_h2f_ieee (unsigned short h){
691+ return __convert_half_to_float ((std::uint32_t ) h);
692+ }
693+
694+ extern " C" __attribute__((weak)) unsigned short __gnu_f2h_ieee (float f){
695+ return (unsigned short )__convert_float_to_half (f);
696+ }
697+
648698void PlatformState::init ()
649699{
650700 amd::ScopedLock lock (lock_);
651-
652701 if (initialized_ || g_devices.empty ()) {
653702 return ;
654703 }
655704 initialized_ = true ;
656-
657705 for (auto & it : statCO_.modules_ ) {
658706 digestFatBinary (it.first , it.second );
659707 }
660-
661708 for (auto &it : statCO_.vars_ ) {
662709 it.second ->resize_dVar (g_devices.size ());
663710 }
664-
665711 for (auto &it : statCO_.functions_ ) {
666712 it.second ->resize_dFunc (g_devices.size ());
667713 }
0 commit comments