@@ -25,7 +25,7 @@ namespace LIBC_NAMESPACE_DECL {
25
25
namespace math {
26
26
27
27
static constexpr float16 rsqrtf16 (float16 x) {
28
- using FPBits = fputil::FPBits<float16>;
28
+ using FPBits = fputil::FPBits<float16>;
29
29
FPBits xbits (x);
30
30
31
31
uint16_t x_u = xbits.uintval ();
@@ -76,8 +76,8 @@ using FPBits = fputil::FPBits<float16>;
76
76
int exp_floored = -(exponent >> 1 );
77
77
78
78
if (mantissa == 0 .5f ) {
79
- // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
80
- // 1/sqrt(0.5f) = sqrt(2.0f).
79
+ // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes
80
+ // this way). 1/sqrt(0.5f) = sqrt(2.0f).
81
81
// If exponent is odd (exponent = 2k + 1):
82
82
// rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
83
83
// = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
@@ -96,32 +96,35 @@ using FPBits = fputil::FPBits<float16>;
96
96
} else {
97
97
// Degree-5 polynomial (float coefficients) generated with Sollya:
98
98
// P = fpminimax(1/sqrt(x) + 2^-28, 5, [|single...|], [0.5,1])
99
- float y = fputil::polyeval (
100
- mantissa, 0x1 .9c81fap1f, -0x1 .e2c63ap2f , 0x1 .91e9b8p3f,
101
- -0x1 .899abep3f, 0x1 .9eddeap2f, -0x1 .6bdb48p0f);
99
+ float y =
100
+ fputil::polyeval ( mantissa, 0x1 .9c81fap1f, -0x1 .e2c63ap2f , 0x1 .91e9b8p3f,
101
+ -0x1 .899abep3f, 0x1 .9eddeap2f, -0x1 .6bdb48p0f);
102
102
103
- // Newton-Raphson iteration in float (use multiply_add to leverage FMA when available):
103
+ // Newton-Raphson iteration in float (use multiply_add to leverage FMA when
104
+ // available):
104
105
float y2 = y * y;
105
106
float factor = fputil::multiply_add (-0 .5f * mantissa, y2, 1 .5f );
106
107
y = y * factor;
107
108
108
-
109
109
result = fputil::ldexp (y, exp_floored);
110
110
if (exponent & 1 ) {
111
111
constexpr float ONE_OVER_SQRT2 = 0x1 .6a09e6p-1f ; // 1/sqrt(2)
112
112
result *= ONE_OVER_SQRT2;
113
113
}
114
114
115
- // Targeted post-correction: for the specific half-precision mantissa pattern
116
- // M == 0x011F we observe a consistent -1 ULP bias across exponents.
115
+ // Targeted post-correction: for the specific half-precision mantissa
116
+ // pattern M == 0x011F we observe a consistent -1 ULP bias across exponents.
117
117
// Apply a tiny upward nudge to cross the rounding boundary in all modes.
118
118
const uint16_t half_mantissa = static_cast <uint16_t >(x_abs & 0x3ff );
119
119
if (half_mantissa == 0x011F ) {
120
120
// Nudge up to fix consistent -1 ULP at that mantissa boundary
121
- result = fputil::multiply_add (result, 0x1 .0p-21f , result); // result *= (1 + 2^-21)
121
+ result = fputil::multiply_add (result, 0x1 .0p-21f ,
122
+ result); // result *= (1 + 2^-21)
122
123
} else if (half_mantissa == 0x0313 ) {
123
- // Nudge down to fix +1 ULP under upward rounding at this mantissa boundary
124
- result = fputil::multiply_add (result, -0x1 .0p-21f , result); // result *= (1 - 2^-21)
124
+ // Nudge down to fix +1 ULP under upward rounding at this mantissa
125
+ // boundary
126
+ result = fputil::multiply_add (result, -0x1 .0p-21f ,
127
+ result); // result *= (1 - 2^-21)
125
128
}
126
129
}
127
130
@@ -134,4 +137,3 @@ using FPBits = fputil::FPBits<float16>;
134
137
#endif // LIBC_TYPES_HAS_FLOAT16
135
138
136
139
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
137
-
0 commit comments