-
Notifications
You must be signed in to change notification settings - Fork 17k
[libc][math] reduce duplicated inv trig helpers #191364
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,81 +30,96 @@ LIBC_INLINE_VAR constexpr DoubleDouble PI = {0x1.1a62633145c07p-53, | |
| LIBC_INLINE_VAR constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54, | ||
| 0x1.921fb54442d18p0}; | ||
|
|
||
| #ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS | ||
| // Scalar double constants for pi and pi/2, used by float-precision | ||
| // implementations (acosf etc.). | ||
| LIBC_INLINE_VAR constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; | ||
| LIBC_INLINE_VAR constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; | ||
|
|
||
| // When correct rounding is not needed, we use a degree-22 minimax polynomial to | ||
| // approximate asin(x)/x on [0, 0.5] using Sollya with: | ||
| // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|], | ||
| // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24|], | ||
| // [|1, D...|], [0, 0.5]); | ||
| // > dirtyinfnorm(asin(x)/x - P, [0, 0.5]); | ||
| // 0x1.1a71ef0a0f26a9fb7ed7e41dee788b13d1770db3dp-52 | ||
|
|
||
| LIBC_INLINE_VAR constexpr double ASIN_COEFFS[12] = { | ||
| 0x1.0000000000000p0, 0x1.5555555556dcfp-3, 0x1.3333333082e11p-4, | ||
| 0x1.6db6dd14099edp-5, 0x1.f1c69b35bf81fp-6, 0x1.6e97194225a67p-6, | ||
| 0x1.1babddb82ce12p-6, 0x1.d55bd078600d6p-7, 0x1.33328959e63d6p-7, | ||
| 0x1.2b5993bda1d9bp-6, -0x1.806aff270bf25p-7, 0x1.02614e5ed3936p-5, | ||
| // > dirtyinfnorm((asin(x) - x*Q)/x, [0, 0.5]); | ||
| // 0x1.feb2fcdba66447ccbe28a1a0f935b51678a718fb1p-59 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that the degree of approximation and the evaluation scheme for double precision
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will do! |
||
| // Coefficients for float-precision asin (ASINF_COEFFS excludes the leading 1 | ||
| // term; used as: asin(x) ~ x + x^3 * asinf_eval(x^2)). | ||
| LIBC_INLINE_VAR constexpr double ASINF_COEFFS[12] = { | ||
| 0x1.555555555538p-3, 0x1.333333336fd5bp-4, 0x1.6db6db41ce4bcp-5, | ||
| 0x1.f1c72c66896dep-6, 0x1.6e89f0a0ac64bp-6, 0x1.1c6c111de4074p-6, | ||
| 0x1.c6fa84b5699acp-7, 0x1.8ed60a3e6dd19p-7, 0x1.ab3a090750049p-8, | ||
| 0x1.405213cd1ef46p-6, -0x1.0a5a381f73f65p-6, 0x1.05985a32a9045p-5, | ||
| }; | ||
|
|
||
| LIBC_INLINE double asin_eval(double u) { | ||
| double u2 = u * u; | ||
| double c0 = fputil::multiply_add(u, ASIN_COEFFS[1], ASIN_COEFFS[0]); | ||
| double c1 = fputil::multiply_add(u, ASIN_COEFFS[3], ASIN_COEFFS[2]); | ||
| double c2 = fputil::multiply_add(u, ASIN_COEFFS[5], ASIN_COEFFS[4]); | ||
| double c3 = fputil::multiply_add(u, ASIN_COEFFS[7], ASIN_COEFFS[6]); | ||
| double c4 = fputil::multiply_add(u, ASIN_COEFFS[9], ASIN_COEFFS[8]); | ||
| double c5 = fputil::multiply_add(u, ASIN_COEFFS[11], ASIN_COEFFS[10]); | ||
|
|
||
| double u4 = u2 * u2; | ||
| double d0 = fputil::multiply_add(u2, c1, c0); | ||
| double d1 = fputil::multiply_add(u2, c3, c2); | ||
| double d2 = fputil::multiply_add(u2, c5, c4); | ||
|
|
||
| return fputil::polyeval(u4, d0, d1, d2); | ||
| // Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x, for float-precision asin. | ||
| // Used as: asin(x) ~ x + x^3 * asinf_eval(x^2). | ||
| LIBC_INLINE double asinf_eval(double xsq) { | ||
| double x4 = xsq * xsq; | ||
| double c0 = fputil::multiply_add(xsq, ASINF_COEFFS[1], ASINF_COEFFS[0]); | ||
| double c1 = fputil::multiply_add(xsq, ASINF_COEFFS[3], ASINF_COEFFS[2]); | ||
| double c2 = fputil::multiply_add(xsq, ASINF_COEFFS[5], ASINF_COEFFS[4]); | ||
| double c3 = fputil::multiply_add(xsq, ASINF_COEFFS[7], ASINF_COEFFS[6]); | ||
| double c4 = fputil::multiply_add(xsq, ASINF_COEFFS[9], ASINF_COEFFS[8]); | ||
| double c5 = fputil::multiply_add(xsq, ASINF_COEFFS[11], ASINF_COEFFS[10]); | ||
| double x8 = x4 * x4; | ||
| double d0 = fputil::multiply_add(x4, c1, c0); | ||
| double d1 = fputil::multiply_add(x4, c3, c2); | ||
| double d2 = fputil::multiply_add(x4, c5, c4); | ||
| return fputil::polyeval(x8, d0, d1, d2); | ||
| } | ||
|
|
||
| // Coefficients for the polynomial approximation of asin(x)/(pi*x) on [0, 0.5]. | ||
| // Generated by Sollya: | ||
| // the coefficients for the polynomial approximation of asin(x)/(pi*x) in the | ||
| // range [0, 0.5] extracted using Sollya, for float-precision asinpi. | ||
| // | ||
| // Sollya code: | ||
| // > prec = 200; | ||
| // > display = hexadecimal; | ||
| // > g = asin(x) / (pi * x); | ||
| // > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|], | ||
| // > [|D...|], [0, 0.5]); | ||
| // > for i from 0 to degree(P) do coeff(P, i); | ||
| // > print("Error:", dirtyinfnorm(P - g, [1e-30; 0.25])); | ||
| // Error : 0x1.a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778acp-54 ~= 2^-54 | ||
| LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[12] = { | ||
| 0x1.45f306dc9c881p-2, // x^0 | ||
| 0x1.b2995e7b7e756p-5, // x^2 | ||
| 0x1.8723a1d12f828p-6, // x^4 | ||
| 0x1.d1a45564b9545p-7, // x^6 | ||
| 0x1.3ce4ceaa0e1e9p-7, // x^8 | ||
| 0x1.d2c305898ea13p-8, // x^10 | ||
| 0x1.692212e27a5f9p-8, // x^12 | ||
| 0x1.2b22cc744d25bp-8, // x^14 | ||
| 0x1.8427b864479ffp-9, // x^16 | ||
| 0x1.815522d7a2bf1p-8, // x^18 | ||
| -0x1.f6df98438aef4p-9, // x^20 | ||
| 0x1.4b50c2eb13708p-7, // x^22 | ||
| // Error : 0x1.6b01ec54170565911f924eb53361de37df00d74e2a10a21d5p-56 ~ 2^−55.496 | ||
| // | ||
| // Non-zero coefficients (even powers only): | ||
| LIBC_INLINE_VAR constexpr double ASINPIF_COEFFS[13] = { | ||
| 0x1.45f306dc9c883p-2, // x^0 | ||
| 0x1.b2995e7b7af0fp-5, // x^2 | ||
| 0x1.8723a1d61d2e9p-6, // x^4 | ||
| 0x1.d1a4529a30a69p-7, // x^6 | ||
| 0x1.3ce53861f8f1fp-7, // x^8 | ||
| 0x1.d2b076c914efep-8, // x^10 | ||
| 0x1.6a2b36f9aed68p-8, // x^12 | ||
| 0x1.21604ae2879a2p-8, // x^14 | ||
| 0x1.ff0549b4fd0d6p-9, // x^16 | ||
| 0x1.035d343508f72p-9, // x^18 | ||
| 0x1.a7b91f72b1592p-8, // x^20 | ||
| -0x1.6a3fb073e97aep-8, // x^22 | ||
| 0x1.547a51d51664ap-7 // x^24 | ||
| }; | ||
|
|
||
| // Evaluate P(u) where P(u) ~ asin(sqrt(u))/(pi*sqrt(u)), using Estrin's scheme. | ||
| LIBC_INLINE double asinpi_eval(double u) { | ||
| double u2 = u * u; | ||
| double c0 = fputil::multiply_add(u, ASINPI_COEFFS[1], ASINPI_COEFFS[0]); | ||
| double c1 = fputil::multiply_add(u, ASINPI_COEFFS[3], ASINPI_COEFFS[2]); | ||
| double c2 = fputil::multiply_add(u, ASINPI_COEFFS[5], ASINPI_COEFFS[4]); | ||
| double c3 = fputil::multiply_add(u, ASINPI_COEFFS[7], ASINPI_COEFFS[6]); | ||
| double c4 = fputil::multiply_add(u, ASINPI_COEFFS[9], ASINPI_COEFFS[8]); | ||
| double c5 = fputil::multiply_add(u, ASINPI_COEFFS[11], ASINPI_COEFFS[10]); | ||
|
|
||
| double u4 = u2 * u2; | ||
| double d0 = fputil::multiply_add(u2, c1, c0); | ||
| double d1 = fputil::multiply_add(u2, c3, c2); | ||
| double d2 = fputil::multiply_add(u2, c5, c4); | ||
|
|
||
| return fputil::polyeval(u4, d0, d1, d2); | ||
| // Evaluates P1(v2) = c1 + c2*v2 + ... + c12*v2^11 (tail of the asinpif | ||
| // polynomial without c0) using Estrin's scheme. | ||
| // Used as: asinpif(x) ~ x * (ASINPIF_COEFFS[0] + v2 * asinpif_eval(v2)) | ||
| // where v2 = x^2. | ||
| LIBC_INLINE double asinpif_eval(double v2) { | ||
| double v4 = v2 * v2; | ||
| double v8 = v4 * v4; | ||
| double v16 = v8 * v8; | ||
|
|
||
| double p0 = fputil::multiply_add(v2, ASINPIF_COEFFS[2], ASINPIF_COEFFS[1]); | ||
| double p1 = fputil::multiply_add(v2, ASINPIF_COEFFS[4], ASINPIF_COEFFS[3]); | ||
| double p2 = fputil::multiply_add(v2, ASINPIF_COEFFS[6], ASINPIF_COEFFS[5]); | ||
| double p3 = fputil::multiply_add(v2, ASINPIF_COEFFS[8], ASINPIF_COEFFS[7]); | ||
| double p4 = fputil::multiply_add(v2, ASINPIF_COEFFS[10], ASINPIF_COEFFS[9]); | ||
| double p5 = fputil::multiply_add(v2, ASINPIF_COEFFS[12], ASINPIF_COEFFS[11]); | ||
|
|
||
| double q0 = fputil::multiply_add(v4, p1, p0); | ||
| double q1 = fputil::multiply_add(v4, p3, p2); | ||
| double q2 = fputil::multiply_add(v4, p5, p4); | ||
|
|
||
| double r0 = fputil::multiply_add(v8, q1, q0); | ||
|
|
||
| return fputil::multiply_add(v16, q2, r0); | ||
| } | ||
|
|
||
| #else | ||
|
|
||
| // The Taylor expansion of asin(x) around 0 is: | ||
| // asin(x) = x + x^3/6 + 3x^5/40 + ... | ||
|
|
@@ -635,8 +650,6 @@ LIBC_INLINE constexpr Float128 asinpi_eval(const Float128 &u, unsigned idx) { | |
| return fputil::quick_mul(p, ONE_OVER_PI_F128); | ||
| } | ||
|
|
||
| #endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS | ||
|
|
||
| } // namespace asin_internal | ||
|
|
||
| } // namespace LIBC_NAMESPACE_DECL | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.