Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[libc][math] reduce duplicated inv trig helpers
  • Loading branch information
hulxv committed Apr 22, 2026
commit 82a7cc483abbd1c627b0adc00a39fd40bc6eb6ad
23 changes: 7 additions & 16 deletions libc/src/__support/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ add_header_library(
HDRS
acosf.h
DEPENDS
.inv_trigf_utils
.asin_utils
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
Expand Down Expand Up @@ -101,7 +101,7 @@ add_header_library(
HDRS
acospif.h
DEPENDS
.inv_trigf_utils
.asin_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.polyeval
Expand Down Expand Up @@ -211,7 +211,7 @@ add_header_library(
HDRS
asinpif.h
DEPENDS
.inv_trigf_utils
.asin_utils
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
Expand Down Expand Up @@ -267,7 +267,7 @@ add_header_library(
atan2f_float.h
atan2f.h
DEPENDS
.inv_trigf_utils
.atan_utils
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
Expand All @@ -283,7 +283,7 @@ add_header_library(
HDRS
atan2f16.h
DEPENDS
.inv_trigf_utils
.atan_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.cast
Expand Down Expand Up @@ -329,7 +329,7 @@ add_header_library(
atanf_float.h
atanf.h
DEPENDS
.inv_trigf_utils
.atan_utils
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
Expand Down Expand Up @@ -417,7 +417,7 @@ add_header_library(
HDRS
asinf.h
DEPENDS
.inv_trigf_utils
.asin_utils
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.except_value_utils
Expand Down Expand Up @@ -1750,15 +1750,6 @@ add_header_library(
libc.src.__support.macros.config
)

add_header_library(
inv_trigf_utils
HDRS
inv_trigf_utils.h
DEPENDS
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.common
)

add_header_library(
frexpf16
Expand Down
10 changes: 5 additions & 5 deletions libc/src/__support/math/acos.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ LIBC_INLINE constexpr double acos(double x) {

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// acos(x) = pi/2 - asin(x)
// = pi/2 - x * P(x^2)
double p = asin_eval(x * x);
return PI_OVER_TWO.hi + fputil::multiply_add(-x, p, PI_OVER_TWO.lo);
// = pi/2 - x * (1 + x^2 * P1(x^2))
// = pi/2 - x - x^3 * P1(x^2)
double xsq = x * x;
return PI_OVER_TWO.hi + fputil::multiply_add(-x * xsq, asinf_eval(xsq), PI_OVER_TWO.lo - x);
#else
unsigned idx = 0;
DoubleDouble x_sq = fputil::exact_mult(x, x);
Expand Down Expand Up @@ -177,9 +178,8 @@ LIBC_INLINE constexpr double acos(double x) {
constexpr DoubleDouble CONST_TERM[2] = {{0.0, 0.0}, PI};
DoubleDouble const_term = CONST_TERM[xbits.is_neg()];

double p = asin_eval(u);
double scale = x_sign * 2.0 * v_hi;
double r = const_term.hi + fputil::multiply_add(scale, p, const_term.lo);
double r = const_term.hi + fputil::multiply_add(scale * u, asinf_eval(u), const_term.lo + scale);
return r;
#else

Expand Down
8 changes: 4 additions & 4 deletions libc/src/__support/math/acosf.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSF_H

#include "inv_trigf_utils.h"
#include "asin_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/except_value_utils.h"
Expand Down Expand Up @@ -48,7 +48,7 @@ LIBC_INLINE_VAR constexpr fputil::ExceptValues<float, N_EXCEPTS> ACOSF_EXCEPTS =

LIBC_INLINE constexpr float acosf(float x) {
using namespace acosf_internal;
using namespace inv_trigf_utils_internal;
using namespace asin_internal;
using FPBits = typename fputil::FPBits<float>;

FPBits xbits(x);
Expand Down Expand Up @@ -84,7 +84,7 @@ LIBC_INLINE constexpr float acosf(float x) {
double xd = static_cast<double>(x);
double xsq = xd * xd;
double x3 = xd * xsq;
double r = asin_eval(xsq);
double r = asinf_eval(xsq);
return static_cast<float>(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd));
}

Expand Down Expand Up @@ -136,7 +136,7 @@ LIBC_INLINE constexpr float acosf(float x) {
double u = fputil::multiply_add(-0.5, xd, 0.5);
double cv = 2 * fputil::sqrt<double>(u);

double r3 = asin_eval(u);
double r3 = asinf_eval(u);
double r = fputil::multiply_add(cv * u, r3, cv);
return static_cast<float>(x_sign ? M_MATH_PI - r : r);
}
Expand Down
10 changes: 5 additions & 5 deletions libc/src/__support/math/acospif.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF_H
#define LLVM_LIBC_SRC___SUPPORT_MATH_ACOSPIF_H

#include "inv_trigf_utils.h"
#include "asin_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
Expand Down Expand Up @@ -54,8 +54,8 @@ LIBC_INLINE float acospif(float x) {
double x_d = fputil::cast<double>(x);
double v2 = x_d * x_d;
double result = x_d * fputil::multiply_add(
v2, inv_trigf_utils_internal::asinpi_eval(v2),
inv_trigf_utils_internal::ASINPI_COEFFS[0]);
v2, asin_internal::asinpif_eval(v2),
asin_internal::ASINPIF_COEFFS[0]);
return fputil::cast<float>(0.5 - result);
}

Expand All @@ -76,7 +76,7 @@ LIBC_INLINE float acospif(float x) {
constexpr double ONE_OVER_PI_LO = -0x1.6b01ec5417056p-56;
// C0_MINUS_1OVERPI = c0 - 1/pi = DELTA_C0 + ONE_OVER_PI_LO
constexpr double C0_MINUS_1OVERPI =
(inv_trigf_utils_internal::ASINPI_COEFFS[0] - ONE_OVER_PI_HI) +
(asin_internal::ASINPIF_COEFFS[0] - ONE_OVER_PI_HI) +
ONE_OVER_PI_LO;

double u = fputil::multiply_add(-0.5, x_abs, 0.5);
Expand All @@ -85,7 +85,7 @@ LIBC_INLINE float acospif(float x) {

// tail = (c0 - 1/pi) + u * P1(u)
double tail = fputil::multiply_add(
u, inv_trigf_utils_internal::asinpi_eval(u), C0_MINUS_1OVERPI);
u, asin_internal::asinpif_eval(u), C0_MINUS_1OVERPI);

double result_hi = fputil::multiply_add(neg2_sqrt_u, ONE_OVER_PI_HI, 0.5);
double result = fputil::multiply_add(tail, neg2_sqrt_u, result_hi);
Expand Down
7 changes: 4 additions & 3 deletions libc/src/__support/math/asin.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ LIBC_INLINE double asin(double x) {
}

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
return x * asin_eval(x * x);
double xsq = x * x;
return fputil::multiply_add(x * xsq, asinf_eval(xsq), x);
#else
using Float128 = fputil::DyadicFloat<128>;
using DoubleDouble = fputil::DoubleDouble;
Expand Down Expand Up @@ -189,8 +190,8 @@ LIBC_INLINE double asin(double x) {
double v_hi = fputil::sqrt<double>(u);

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
double p = asin_eval(u);
double r = x_sign * fputil::multiply_add(-2.0 * v_hi, p, PI_OVER_TWO.hi);
double neg2_v = -2.0 * v_hi;
double r = x_sign * fputil::multiply_add(neg2_v * u, asinf_eval(u), PI_OVER_TWO.hi + neg2_v);
return r;
#else

Expand Down
137 changes: 75 additions & 62 deletions libc/src/__support/math/asin_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,81 +30,96 @@ LIBC_INLINE_VAR constexpr DoubleDouble PI = {0x1.1a62633145c07p-53,
LIBC_INLINE_VAR constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54,
0x1.921fb54442d18p0};

#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Scalar double constants for pi and pi/2, used by float-precision
// implementations (acosf etc.).
LIBC_INLINE_VAR constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
LIBC_INLINE_VAR constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
Comment thread
hulxv marked this conversation as resolved.
Outdated

// When correct rounding is not needed, we use a degree-22 minimax polynomial to
// approximate asin(x)/x on [0, 0.5] using Sollya with:
// > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|],
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24|],
// [|1, D...|], [0, 0.5]);
// > dirtyinfnorm(asin(x)/x - P, [0, 0.5]);
// 0x1.1a71ef0a0f26a9fb7ed7e41dee788b13d1770db3dp-52

LIBC_INLINE_VAR constexpr double ASIN_COEFFS[12] = {
0x1.0000000000000p0, 0x1.5555555556dcfp-3, 0x1.3333333082e11p-4,
0x1.6db6dd14099edp-5, 0x1.f1c69b35bf81fp-6, 0x1.6e97194225a67p-6,
0x1.1babddb82ce12p-6, 0x1.d55bd078600d6p-7, 0x1.33328959e63d6p-7,
0x1.2b5993bda1d9bp-6, -0x1.806aff270bf25p-7, 0x1.02614e5ed3936p-5,
// > dirtyinfnorm((asin(x) - x*Q)/x, [0, 0.5]);
// 0x1.feb2fcdba66447ccbe28a1a0f935b51678a718fb1p-59
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that the degree of approximation and the evaluation scheme for double precision asin, acos, atan are different. Can you retest whether the error bounds are changed for these functions when the accurate paths are skipped?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will do!

// Coefficients for float-precision asin (ASINF_COEFFS excludes the leading 1
// term; used as: asin(x) ~ x + x^3 * asinf_eval(x^2)).
LIBC_INLINE_VAR constexpr double ASINF_COEFFS[12] = {
0x1.555555555538p-3, 0x1.333333336fd5bp-4, 0x1.6db6db41ce4bcp-5,
0x1.f1c72c66896dep-6, 0x1.6e89f0a0ac64bp-6, 0x1.1c6c111de4074p-6,
0x1.c6fa84b5699acp-7, 0x1.8ed60a3e6dd19p-7, 0x1.ab3a090750049p-8,
0x1.405213cd1ef46p-6, -0x1.0a5a381f73f65p-6, 0x1.05985a32a9045p-5,
};

LIBC_INLINE double asin_eval(double u) {
double u2 = u * u;
double c0 = fputil::multiply_add(u, ASIN_COEFFS[1], ASIN_COEFFS[0]);
double c1 = fputil::multiply_add(u, ASIN_COEFFS[3], ASIN_COEFFS[2]);
double c2 = fputil::multiply_add(u, ASIN_COEFFS[5], ASIN_COEFFS[4]);
double c3 = fputil::multiply_add(u, ASIN_COEFFS[7], ASIN_COEFFS[6]);
double c4 = fputil::multiply_add(u, ASIN_COEFFS[9], ASIN_COEFFS[8]);
double c5 = fputil::multiply_add(u, ASIN_COEFFS[11], ASIN_COEFFS[10]);

double u4 = u2 * u2;
double d0 = fputil::multiply_add(u2, c1, c0);
double d1 = fputil::multiply_add(u2, c3, c2);
double d2 = fputil::multiply_add(u2, c5, c4);

return fputil::polyeval(u4, d0, d1, d2);
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x, for float-precision asin.
// Used as: asin(x) ~ x + x^3 * asinf_eval(x^2).
LIBC_INLINE double asinf_eval(double xsq) {
double x4 = xsq * xsq;
double c0 = fputil::multiply_add(xsq, ASINF_COEFFS[1], ASINF_COEFFS[0]);
double c1 = fputil::multiply_add(xsq, ASINF_COEFFS[3], ASINF_COEFFS[2]);
double c2 = fputil::multiply_add(xsq, ASINF_COEFFS[5], ASINF_COEFFS[4]);
double c3 = fputil::multiply_add(xsq, ASINF_COEFFS[7], ASINF_COEFFS[6]);
double c4 = fputil::multiply_add(xsq, ASINF_COEFFS[9], ASINF_COEFFS[8]);
double c5 = fputil::multiply_add(xsq, ASINF_COEFFS[11], ASINF_COEFFS[10]);
double x8 = x4 * x4;
double d0 = fputil::multiply_add(x4, c1, c0);
double d1 = fputil::multiply_add(x4, c3, c2);
double d2 = fputil::multiply_add(x4, c5, c4);
return fputil::polyeval(x8, d0, d1, d2);
}

// Coefficients for the polynomial approximation of asin(x)/(pi*x) on [0, 0.5].
// Generated by Sollya:
// the coefficients for the polynomial approximation of asin(x)/(pi*x) in the
// range [0, 0.5] extracted using Sollya, for float-precision asinpi.
//
// Sollya code:
// > prec = 200;
// > display = hexadecimal;
// > g = asin(x) / (pi * x);
// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|],
// > [|D...|], [0, 0.5]);
// > for i from 0 to degree(P) do coeff(P, i);
// > print("Error:", dirtyinfnorm(P - g, [1e-30; 0.25]));
// Error : 0x1.a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778acp-54 ~= 2^-54
LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[12] = {
0x1.45f306dc9c881p-2, // x^0
0x1.b2995e7b7e756p-5, // x^2
0x1.8723a1d12f828p-6, // x^4
0x1.d1a45564b9545p-7, // x^6
0x1.3ce4ceaa0e1e9p-7, // x^8
0x1.d2c305898ea13p-8, // x^10
0x1.692212e27a5f9p-8, // x^12
0x1.2b22cc744d25bp-8, // x^14
0x1.8427b864479ffp-9, // x^16
0x1.815522d7a2bf1p-8, // x^18
-0x1.f6df98438aef4p-9, // x^20
0x1.4b50c2eb13708p-7, // x^22
// Error : 0x1.6b01ec54170565911f924eb53361de37df00d74e2a10a21d5p-56 ~ 2^−55.496
//
// Non-zero coefficients (even powers only):
LIBC_INLINE_VAR constexpr double ASINPIF_COEFFS[13] = {
0x1.45f306dc9c883p-2, // x^0
0x1.b2995e7b7af0fp-5, // x^2
0x1.8723a1d61d2e9p-6, // x^4
0x1.d1a4529a30a69p-7, // x^6
0x1.3ce53861f8f1fp-7, // x^8
0x1.d2b076c914efep-8, // x^10
0x1.6a2b36f9aed68p-8, // x^12
0x1.21604ae2879a2p-8, // x^14
0x1.ff0549b4fd0d6p-9, // x^16
0x1.035d343508f72p-9, // x^18
0x1.a7b91f72b1592p-8, // x^20
-0x1.6a3fb073e97aep-8, // x^22
0x1.547a51d51664ap-7 // x^24
};

// Evaluate P(u) where P(u) ~ asin(sqrt(u))/(pi*sqrt(u)), using Estrin's scheme.
LIBC_INLINE double asinpi_eval(double u) {
double u2 = u * u;
double c0 = fputil::multiply_add(u, ASINPI_COEFFS[1], ASINPI_COEFFS[0]);
double c1 = fputil::multiply_add(u, ASINPI_COEFFS[3], ASINPI_COEFFS[2]);
double c2 = fputil::multiply_add(u, ASINPI_COEFFS[5], ASINPI_COEFFS[4]);
double c3 = fputil::multiply_add(u, ASINPI_COEFFS[7], ASINPI_COEFFS[6]);
double c4 = fputil::multiply_add(u, ASINPI_COEFFS[9], ASINPI_COEFFS[8]);
double c5 = fputil::multiply_add(u, ASINPI_COEFFS[11], ASINPI_COEFFS[10]);

double u4 = u2 * u2;
double d0 = fputil::multiply_add(u2, c1, c0);
double d1 = fputil::multiply_add(u2, c3, c2);
double d2 = fputil::multiply_add(u2, c5, c4);

return fputil::polyeval(u4, d0, d1, d2);
// Evaluates P1(v2) = c1 + c2*v2 + ... + c12*v2^11 (tail of the asinpif
// polynomial without c0) using Estrin's scheme.
// Used as: asinpif(x) ~ x * (ASINPIF_COEFFS[0] + v2 * asinpif_eval(v2))
// where v2 = x^2.
LIBC_INLINE double asinpif_eval(double v2) {
double v4 = v2 * v2;
double v8 = v4 * v4;
double v16 = v8 * v8;

double p0 = fputil::multiply_add(v2, ASINPIF_COEFFS[2], ASINPIF_COEFFS[1]);
double p1 = fputil::multiply_add(v2, ASINPIF_COEFFS[4], ASINPIF_COEFFS[3]);
double p2 = fputil::multiply_add(v2, ASINPIF_COEFFS[6], ASINPIF_COEFFS[5]);
double p3 = fputil::multiply_add(v2, ASINPIF_COEFFS[8], ASINPIF_COEFFS[7]);
double p4 = fputil::multiply_add(v2, ASINPIF_COEFFS[10], ASINPIF_COEFFS[9]);
double p5 = fputil::multiply_add(v2, ASINPIF_COEFFS[12], ASINPIF_COEFFS[11]);

double q0 = fputil::multiply_add(v4, p1, p0);
double q1 = fputil::multiply_add(v4, p3, p2);
double q2 = fputil::multiply_add(v4, p5, p4);

double r0 = fputil::multiply_add(v8, q1, q0);

return fputil::multiply_add(v16, q2, r0);
}

#else

// The Taylor expansion of asin(x) around 0 is:
// asin(x) = x + x^3/6 + 3x^5/40 + ...
Expand Down Expand Up @@ -635,8 +650,6 @@ LIBC_INLINE constexpr Float128 asinpi_eval(const Float128 &u, unsigned idx) {
return fputil::quick_mul(p, ONE_OVER_PI_F128);
}

#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS

} // namespace asin_internal

} // namespace LIBC_NAMESPACE_DECL
Expand Down
Loading