Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a837853

Browse files
committed
ENH: Import the catanh/catan implemenation from FreeBSD
The code from FreeBSD was lightly adapted to fit with the numpy style. An incorrect test for the branch cuts of both arctanh and arctan was corrected in both test_umath.py and test_c99complex.c. With this commit, npy_catanh(f) and npy_catan(f) pass all of the tests in test_c99complex.c.
1 parent 3710481 commit a837853

3 files changed

Lines changed: 242 additions & 95 deletions

File tree

numpy/core/src/npymath/npy_math_complex.c.src

Lines changed: 199 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
* 2009), under the following license:
88
*
99
* Copyright (c) 2007, 2011 David Schultz <[email protected]>
10+
* Copyright (c) 2012 Stephen Montgomery-Smith <[email protected]>
1011
* All rights reserved.
1112
*
1213
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +42,7 @@
4142
* #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX#
4243
* #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN#
4344
* #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG#
45+
* #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON#
4446
* #precision = 1, 2, 3#
4547
*/
4648

@@ -194,7 +196,6 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt)
194196
npy_ldexp@c@(mant * mantsin, expt + exsin));
195197
}
196198

197-
198199
#ifndef HAVE_CEXP@C@
199200
@ctype@ npy_cexp@c@(@ctype@ z)
200201
{
@@ -922,38 +923,9 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt)
922923
#ifndef HAVE_CATAN@C@
923924
@ctype@ npy_catan@c@(@ctype@ z)
924925
{
925-
@type@ x, y;
926-
x = npy_creal@c@(z);
927-
y = npy_cimag@c@(z);
928-
929-
if (npy_fabs(x) > 1e-3 || npy_fabs(y) > 1e-3) {
930-
/* catan(z) = 0.5*i * log((i+z)/(i-z)) */
931-
@ctype@ ip, im;
932-
ip = cadd@c@(c_i@c@, z);
933-
im = csub@c@(c_i@c@, z);
934-
return cmul@c@(c_ihalf@c@, npy_clog@c@(cdiv@c@(ip, im)));
935-
}
936-
else {
937-
/*
938-
* Small arguments: series expansion, to avoid loss of precision
939-
* atan(x) = x [1 - (1/3) x^2 [1 - (3/5) x^2 [1 - ...]]]
940-
*
941-
* |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l)
942-
*/
943-
@ctype@ z2, r;
944-
z2 = cmul@c@(z, z);
945-
r = c_1@c@;
946-
#if @precision@ >= 3
947-
SERIES_HORNER_TERM@C@(r, z2, -9.0@C@/11);
948-
SERIES_HORNER_TERM@C@(r, z2, -7.0@C@/9);
949-
#endif
950-
#if @precision@ >= 2
951-
SERIES_HORNER_TERM@C@(r, z2, -5.0@C@/7);
952-
#endif
953-
SERIES_HORNER_TERM@C@(r, z2, -3.0@C@/5);
954-
SERIES_HORNER_TERM@C@(r, z2, -1.0@C@/3);
955-
return cmul@c@(r, z);
956-
}
926+
/* catan(z) = I * conj( catanh(I * conj(z)) ) */
927+
z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)));
928+
return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z));
957929
}
958930
#endif
959931

@@ -1007,41 +979,208 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt)
1007979
#endif
1008980

1009981
#ifndef HAVE_CATANH@C@
982+
/*
983+
* sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
984+
* Assumes x*x and y*y will not overflow.
985+
* Assumes x and y are finite.
986+
* Assumes y is non-negative.
987+
* Assumes fabs(x) >= DBL_EPSILON.
988+
*/
989+
static inline @type@ _sum_squares@c@(@type@ x, @type@ y)
990+
{
991+
#if @precision@ == 1
992+
const npy_float SQRT_MIN = 1.0842022e-19f;
993+
#endif
994+
#if @precision@ == 2
995+
const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */
996+
#endif
997+
#if @precision@ == 3
998+
/* this is correct for 80 bit long doubles */
999+
const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l;
1000+
#endif
1001+
/* Avoid underflow when y is small. */
1002+
if (y < SQRT_MIN)
1003+
return (x * x);
1004+
1005+
return (x * x + y * y);
1006+
}
1007+
1008+
/*
1009+
* real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
1010+
* Assumes x and y are not NaN, and one of x and y is larger than
1011+
* RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use
1012+
* the code creal(1/z), because the imaginary part may produce an unwanted
1013+
* underflow.
1014+
* This is only called in a context where inexact is always raised before
1015+
* the call, so no effort is made to avoid or force inexact.
1016+
*/
1017+
#if @precision@ == 1
1018+
#define BIAS (FLT_MAX_EXP - 1)
1019+
#define CUTOFF (FLT_MANT_DIG / 2 + 1)
1020+
static inline npy_float _real_part_reciprocalf(npy_float x, npy_float y)
1021+
{
1022+
npy_float scale;
1023+
npy_uint32 hx, hy;
1024+
npy_int32 ix, iy;
1025+
1026+
GET_FLOAT_WORD(hx, x);
1027+
ix = hx & 0x7f800000;
1028+
GET_FLOAT_WORD(hy, y);
1029+
iy = hy & 0x7f800000;
1030+
if (ix - iy >= CUTOFF << 23 || npy_isinf(x))
1031+
return (1 / x);
1032+
if (iy - ix >= CUTOFF << 23)
1033+
return (x / y / y);
1034+
if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23)
1035+
return (x / (x * x + y * y));
1036+
SET_FLOAT_WORD(scale, 0x7f800000 - ix);
1037+
x *= scale;
1038+
y *= scale;
1039+
return (x / (x * x + y * y) * scale);
1040+
}
1041+
#undef BIAS
1042+
#undef CUTOFF
1043+
#endif
1044+
#if @precision@ == 2
1045+
#define BIAS (DBL_MAX_EXP - 1)
1046+
/* XXX more guard digits are useful iff there is extra precision. */
1047+
#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */
1048+
static inline npy_double _real_part_reciprocal(npy_double x, npy_double y)
1049+
{
1050+
npy_double scale;
1051+
npy_uint32 hx, hy;
1052+
npy_int32 ix, iy;
1053+
1054+
/*
1055+
* This code is inspired by the C99 document n1124.pdf, Section G.5.1,
1056+
* example 2.
1057+
*/
1058+
GET_HIGH_WORD(hx, x);
1059+
ix = hx & 0x7ff00000;
1060+
GET_HIGH_WORD(hy, y);
1061+
iy = hy & 0x7ff00000;
1062+
if (ix - iy >= CUTOFF << 20 || npy_isinf(x))
1063+
return (1 / x); /* +-Inf -> +-0 is special */
1064+
if (iy - ix >= CUTOFF << 20)
1065+
return (x / y / y); /* should avoid double div, but hard */
1066+
if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20)
1067+
return (x / (x * x + y * y));
1068+
scale = 1;
1069+
SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */
1070+
x *= scale;
1071+
y *= scale;
1072+
return (x / (x * x + y * y) * scale);
1073+
}
1074+
#undef BIAS
1075+
#undef CUTOFF
1076+
#endif
1077+
#if @precision@ == 3
1078+
#define BIAS (LDBL_MAX_EXP - 1)
1079+
#define CUTOFF (LDBL_MANT_DIG / 2 + 1)
1080+
static inline npy_longdouble _real_part_reciprocall(npy_longdouble x, npy_longdouble y)
1081+
{
1082+
npy_longdouble scale;
1083+
union IEEEl2bitsrep ux, uy, us;
1084+
npy_int32 ix, iy;
1085+
1086+
ux.e = x;
1087+
ix = GET_LDOUBLE_EXP(ux);
1088+
uy.e = y;
1089+
iy = GET_LDOUBLE_EXP(uy);
1090+
if (ix - iy >= CUTOFF || npy_isinf(x))
1091+
return (1/x);
1092+
if (iy - ix >= CUTOFF)
1093+
return (x/y/y);
1094+
if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF)
1095+
return (x/(x*x + y*y));
1096+
us.e = 1;
1097+
SET_LDOUBLE_EXP(us, 0x7fff - ix);
1098+
scale = us.e;
1099+
x *= scale;
1100+
y *= scale;
1101+
return (x/(x*x + y*y) * scale);
1102+
}
1103+
#undef BIAS
1104+
#undef CUTOFF
1105+
#endif
1106+
10101107
@ctype@ npy_catanh@c@(@ctype@ z)
10111108
{
1012-
@type@ x, y;
1109+
#if @precision@ == 1
1110+
/* this is sqrt(3*EPS) */
1111+
const npy_float SQRT_3_EPSILON = 5.9801995673e-4f;
1112+
/* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */
1113+
const volatile float pio2_lo = 7.5497899549e-9f;
1114+
#endif
1115+
#if @precision@ == 2
1116+
const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8;
1117+
const volatile npy_double pio2_lo = 6.1232339957367659e-17;
1118+
#endif
1119+
#if @precision@ == 3
1120+
const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10;
1121+
const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l;
1122+
#endif
1123+
const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@;
1124+
const @type@ pio2_hi = NPY_PI_2@c@;
1125+
const volatile float tiny = 3.9443045e-31f;
1126+
@type@ x, y, ax, ay, rx, ry;
10131127

10141128
x = npy_creal@c@(z);
10151129
y = npy_cimag@c@(z);
1016-
1017-
if (npy_fabs(x) > 1e-3 || npy_fabs(y) > 1e-3) {
1018-
/* catanh(z) = 0.5 * log((1+z)/(1-z)) */
1019-
@ctype@ p1, m1;
1020-
p1 = cadd@c@(c_1@c@, z);
1021-
m1 = csub@c@(c_1@c@, z);
1022-
return cmul@c@(c_half@c@, npy_clog@c@(cdiv@c@(p1, m1)));
1130+
ax = npy_fabs@c@(x);
1131+
ay = npy_fabs@c@(y);
1132+
1133+
/* This helps handle many cases. */
1134+
if (y == 0 && ax <= 1)
1135+
return npy_cpack@c@(npy_atanh@c@(x), y);
1136+
1137+
/* To ensure the same accuracy as atan(), and to filter out z = 0. */
1138+
if (x == 0)
1139+
return npy_cpack@c@(x, npy_atan@c@(y));
1140+
1141+
if (npy_isnan(x) || npy_isnan(y)) {
1142+
/* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
1143+
if (npy_isinf(x))
1144+
return npy_cpack@c@(npy_copysign@c@(0, x), y + y);
1145+
/* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
1146+
if (npy_isinf(y))
1147+
return npy_cpack@c@(npy_copysign@c@(0, x),
1148+
npy_copysign@c@(pio2_hi + pio2_lo, y));
1149+
/*
1150+
* All other cases involving NaN return NaN + I*NaN.
1151+
* C99 leaves it optional whether to raise invalid if one of
1152+
* the arguments is not NaN, so we opt not to raise it.
1153+
*/
1154+
return npy_cpack@c@(x + 0.0L + (y + 0), x + 0.0L + (y + 0));
10231155
}
1024-
else {
1156+
1157+
if (ax > RECIP_EPSILON || ay > RECIP_EPSILON)
1158+
return npy_cpack@c@(_real_part_reciprocal@c@(x, y),
1159+
npy_copysign@c@(pio2_hi + pio2_lo, y));
1160+
1161+
if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
10251162
/*
1026-
* Small arguments: series expansion, to avoid loss of precision
1027-
* atan(x) = x [1 + (1/3) x^2 [1 + (3/5) x^2 [1 + ...]]]
1028-
*
1029-
* |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l)
1163+
* z = 0 was filtered out above. All other cases must raise
1164+
* inexact, but this is the only only that needs to do it
1165+
* explicitly.
10301166
*/
1031-
@ctype@ z2, r;
1032-
z2 = cmul@c@(z, z);
1033-
r = c_1@c@;
1034-
#if @precision@ >= 3
1035-
SERIES_HORNER_TERM@C@(r, z2, 9.0@C@/11);
1036-
SERIES_HORNER_TERM@C@(r, z2, 7.0@C@/9);
1037-
#endif
1038-
#if @precision@ >= 2
1039-
SERIES_HORNER_TERM@C@(r, z2, 5.0@C@/7);
1040-
#endif
1041-
SERIES_HORNER_TERM@C@(r, z2, 3.0@C@/5);
1042-
SERIES_HORNER_TERM@C@(r, z2, 1.0@C@/3);
1043-
return cmul@c@(z, r);
1044-
}
1167+
volatile npy_float junk = 1 + tiny;
1168+
return (z);
1169+
}
1170+
1171+
if (ax == 1 && ay < @TEPS@)
1172+
rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2;
1173+
else
1174+
rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4;
1175+
1176+
if (ax == 1)
1177+
ry = npy_atan2@c@(2, -ay) / 2;
1178+
else if (ay < @TEPS@)
1179+
ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2;
1180+
else
1181+
ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
1182+
1183+
return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y));
10451184
}
10461185
#endif
10471186
/**end repeat**/

0 commit comments

Comments
 (0)