|
7 | 7 | * 2009), under the following license: |
8 | 8 | * |
9 | 9 | * Copyright (c) 2007, 2011 David Schultz <[email protected]> |
| 10 | + * Copyright (c) 2012 Stephen Montgomery-Smith <[email protected]> |
10 | 11 | * All rights reserved. |
11 | 12 | * |
12 | 13 | * Redistribution and use in source and binary forms, with or without |
|
41 | 42 | * #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX# |
42 | 43 | * #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN# |
43 | 44 | * #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG# |
| 45 | + * #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON# |
44 | 46 | * #precision = 1, 2, 3# |
45 | 47 | */ |
46 | 48 |
|
@@ -194,7 +196,6 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) |
194 | 196 | npy_ldexp@c@(mant * mantsin, expt + exsin)); |
195 | 197 | } |
196 | 198 |
|
197 | | - |
198 | 199 | #ifndef HAVE_CEXP@C@ |
199 | 200 | @ctype@ npy_cexp@c@(@ctype@ z) |
200 | 201 | { |
@@ -922,38 +923,9 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) |
922 | 923 | #ifndef HAVE_CATAN@C@ |
923 | 924 | @ctype@ npy_catan@c@(@ctype@ z) |
924 | 925 | { |
925 | | - @type@ x, y; |
926 | | - x = npy_creal@c@(z); |
927 | | - y = npy_cimag@c@(z); |
928 | | - |
929 | | - if (npy_fabs(x) > 1e-3 || npy_fabs(y) > 1e-3) { |
930 | | - /* catan(z) = 0.5*i * log((i+z)/(i-z)) */ |
931 | | - @ctype@ ip, im; |
932 | | - ip = cadd@c@(c_i@c@, z); |
933 | | - im = csub@c@(c_i@c@, z); |
934 | | - return cmul@c@(c_ihalf@c@, npy_clog@c@(cdiv@c@(ip, im))); |
935 | | - } |
936 | | - else { |
937 | | - /* |
938 | | - * Small arguments: series expansion, to avoid loss of precision |
939 | | - * atan(x) = x [1 - (1/3) x^2 [1 - (3/5) x^2 [1 - ...]]] |
940 | | - * |
941 | | - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) |
942 | | - */ |
943 | | - @ctype@ z2, r; |
944 | | - z2 = cmul@c@(z, z); |
945 | | - r = c_1@c@; |
946 | | -#if @precision@ >= 3 |
947 | | - SERIES_HORNER_TERM@C@(r, z2, -9.0@C@/11); |
948 | | - SERIES_HORNER_TERM@C@(r, z2, -7.0@C@/9); |
949 | | -#endif |
950 | | -#if @precision@ >= 2 |
951 | | - SERIES_HORNER_TERM@C@(r, z2, -5.0@C@/7); |
952 | | -#endif |
953 | | - SERIES_HORNER_TERM@C@(r, z2, -3.0@C@/5); |
954 | | - SERIES_HORNER_TERM@C@(r, z2, -1.0@C@/3); |
955 | | - return cmul@c@(r, z); |
956 | | - } |
| 926 | + /* catan(z) = I * conj( catanh(I * conj(z)) ) */ |
| 927 | + z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); |
| 928 | + return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); |
957 | 929 | } |
958 | 930 | #endif |
959 | 931 |
|
@@ -1007,41 +979,208 @@ static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) |
1007 | 979 | #endif |
1008 | 980 |
|
1009 | 981 | #ifndef HAVE_CATANH@C@ |
| 982 | +/* |
| 983 | + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). |
| 984 | + * Assumes x*x and y*y will not overflow. |
| 985 | + * Assumes x and y are finite. |
| 986 | + * Assumes y is non-negative. |
| 987 | + * Assumes fabs(x) >= DBL_EPSILON. |
| 988 | + */ |
| 989 | +static inline @type@ _sum_squares@c@(@type@ x, @type@ y) |
| 990 | +{ |
| 991 | +#if @precision@ == 1 |
| 992 | +const npy_float SQRT_MIN = 1.0842022e-19f; |
| 993 | +#endif |
| 994 | +#if @precision@ == 2 |
| 995 | +const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ |
| 996 | +#endif |
| 997 | +#if @precision@ == 3 |
| 998 | +/* this is correct for 80 bit long doubles */ |
| 999 | +const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; |
| 1000 | +#endif |
| 1001 | + /* Avoid underflow when y is small. */ |
| 1002 | + if (y < SQRT_MIN) |
| 1003 | + return (x * x); |
| 1004 | + |
| 1005 | + return (x * x + y * y); |
| 1006 | +} |
| 1007 | + |
| 1008 | +/* |
| 1009 | + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). |
| 1010 | + * Assumes x and y are not NaN, and one of x and y is larger than |
| 1011 | + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use |
| 1012 | + * the code creal(1/z), because the imaginary part may produce an unwanted |
| 1013 | + * underflow. |
| 1014 | + * This is only called in a context where inexact is always raised before |
| 1015 | + * the call, so no effort is made to avoid or force inexact. |
| 1016 | + */ |
| 1017 | +#if @precision@ == 1 |
| 1018 | +#define BIAS (FLT_MAX_EXP - 1) |
| 1019 | +#define CUTOFF (FLT_MANT_DIG / 2 + 1) |
| 1020 | +static inline npy_float _real_part_reciprocalf(npy_float x, npy_float y) |
| 1021 | +{ |
| 1022 | + npy_float scale; |
| 1023 | + npy_uint32 hx, hy; |
| 1024 | + npy_int32 ix, iy; |
| 1025 | + |
| 1026 | + GET_FLOAT_WORD(hx, x); |
| 1027 | + ix = hx & 0x7f800000; |
| 1028 | + GET_FLOAT_WORD(hy, y); |
| 1029 | + iy = hy & 0x7f800000; |
| 1030 | + if (ix - iy >= CUTOFF << 23 || npy_isinf(x)) |
| 1031 | + return (1 / x); |
| 1032 | + if (iy - ix >= CUTOFF << 23) |
| 1033 | + return (x / y / y); |
| 1034 | + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) |
| 1035 | + return (x / (x * x + y * y)); |
| 1036 | + SET_FLOAT_WORD(scale, 0x7f800000 - ix); |
| 1037 | + x *= scale; |
| 1038 | + y *= scale; |
| 1039 | + return (x / (x * x + y * y) * scale); |
| 1040 | +} |
| 1041 | +#undef BIAS |
| 1042 | +#undef CUTOFF |
| 1043 | +#endif |
| 1044 | +#if @precision@ == 2 |
| 1045 | +#define BIAS (DBL_MAX_EXP - 1) |
| 1046 | +/* XXX more guard digits are useful iff there is extra precision. */ |
| 1047 | +#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ |
| 1048 | +static inline npy_double _real_part_reciprocal(npy_double x, npy_double y) |
| 1049 | +{ |
| 1050 | + npy_double scale; |
| 1051 | + npy_uint32 hx, hy; |
| 1052 | + npy_int32 ix, iy; |
| 1053 | + |
| 1054 | + /* |
| 1055 | + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, |
| 1056 | + * example 2. |
| 1057 | + */ |
| 1058 | + GET_HIGH_WORD(hx, x); |
| 1059 | + ix = hx & 0x7ff00000; |
| 1060 | + GET_HIGH_WORD(hy, y); |
| 1061 | + iy = hy & 0x7ff00000; |
| 1062 | + if (ix - iy >= CUTOFF << 20 || npy_isinf(x)) |
| 1063 | + return (1 / x); /* +-Inf -> +-0 is special */ |
| 1064 | + if (iy - ix >= CUTOFF << 20) |
| 1065 | + return (x / y / y); /* should avoid double div, but hard */ |
| 1066 | + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) |
| 1067 | + return (x / (x * x + y * y)); |
| 1068 | + scale = 1; |
| 1069 | + SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ |
| 1070 | + x *= scale; |
| 1071 | + y *= scale; |
| 1072 | + return (x / (x * x + y * y) * scale); |
| 1073 | +} |
| 1074 | +#undef BIAS |
| 1075 | +#undef CUTOFF |
| 1076 | +#endif |
| 1077 | +#if @precision@ == 3 |
| 1078 | +#define BIAS (LDBL_MAX_EXP - 1) |
| 1079 | +#define CUTOFF (LDBL_MANT_DIG / 2 + 1) |
| 1080 | +static inline npy_longdouble _real_part_reciprocall(npy_longdouble x, npy_longdouble y) |
| 1081 | +{ |
| 1082 | + npy_longdouble scale; |
| 1083 | + union IEEEl2bitsrep ux, uy, us; |
| 1084 | + npy_int32 ix, iy; |
| 1085 | + |
| 1086 | + ux.e = x; |
| 1087 | + ix = GET_LDOUBLE_EXP(ux); |
| 1088 | + uy.e = y; |
| 1089 | + iy = GET_LDOUBLE_EXP(uy); |
| 1090 | + if (ix - iy >= CUTOFF || npy_isinf(x)) |
| 1091 | + return (1/x); |
| 1092 | + if (iy - ix >= CUTOFF) |
| 1093 | + return (x/y/y); |
| 1094 | + if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF) |
| 1095 | + return (x/(x*x + y*y)); |
| 1096 | + us.e = 1; |
| 1097 | + SET_LDOUBLE_EXP(us, 0x7fff - ix); |
| 1098 | + scale = us.e; |
| 1099 | + x *= scale; |
| 1100 | + y *= scale; |
| 1101 | + return (x/(x*x + y*y) * scale); |
| 1102 | +} |
| 1103 | +#undef BIAS |
| 1104 | +#undef CUTOFF |
| 1105 | +#endif |
| 1106 | + |
1010 | 1107 | @ctype@ npy_catanh@c@(@ctype@ z) |
1011 | 1108 | { |
1012 | | - @type@ x, y; |
| 1109 | +#if @precision@ == 1 |
| 1110 | + /* this is sqrt(3*EPS) */ |
| 1111 | + const npy_float SQRT_3_EPSILON = 5.9801995673e-4f; |
| 1112 | + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ |
| 1113 | + const volatile float pio2_lo = 7.5497899549e-9f; |
| 1114 | +#endif |
| 1115 | +#if @precision@ == 2 |
| 1116 | + const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8; |
| 1117 | + const volatile npy_double pio2_lo = 6.1232339957367659e-17; |
| 1118 | +#endif |
| 1119 | +#if @precision@ == 3 |
| 1120 | + const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10; |
| 1121 | + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; |
| 1122 | +#endif |
| 1123 | + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; |
| 1124 | + const @type@ pio2_hi = NPY_PI_2@c@; |
| 1125 | + const volatile float tiny = 3.9443045e-31f; |
| 1126 | + @type@ x, y, ax, ay, rx, ry; |
1013 | 1127 |
|
1014 | 1128 | x = npy_creal@c@(z); |
1015 | 1129 | y = npy_cimag@c@(z); |
1016 | | - |
1017 | | - if (npy_fabs(x) > 1e-3 || npy_fabs(y) > 1e-3) { |
1018 | | - /* catanh(z) = 0.5 * log((1+z)/(1-z)) */ |
1019 | | - @ctype@ p1, m1; |
1020 | | - p1 = cadd@c@(c_1@c@, z); |
1021 | | - m1 = csub@c@(c_1@c@, z); |
1022 | | - return cmul@c@(c_half@c@, npy_clog@c@(cdiv@c@(p1, m1))); |
| 1130 | + ax = npy_fabs@c@(x); |
| 1131 | + ay = npy_fabs@c@(y); |
| 1132 | + |
| 1133 | + /* This helps handle many cases. */ |
| 1134 | + if (y == 0 && ax <= 1) |
| 1135 | + return npy_cpack@c@(npy_atanh@c@(x), y); |
| 1136 | + |
| 1137 | + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ |
| 1138 | + if (x == 0) |
| 1139 | + return npy_cpack@c@(x, npy_atan@c@(y)); |
| 1140 | + |
| 1141 | + if (npy_isnan(x) || npy_isnan(y)) { |
| 1142 | + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ |
| 1143 | + if (npy_isinf(x)) |
| 1144 | + return npy_cpack@c@(npy_copysign@c@(0, x), y + y); |
| 1145 | + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ |
| 1146 | + if (npy_isinf(y)) |
| 1147 | + return npy_cpack@c@(npy_copysign@c@(0, x), |
| 1148 | + npy_copysign@c@(pio2_hi + pio2_lo, y)); |
| 1149 | + /* |
| 1150 | + * All other cases involving NaN return NaN + I*NaN. |
| 1151 | + * C99 leaves it optional whether to raise invalid if one of |
| 1152 | + * the arguments is not NaN, so we opt not to raise it. |
| 1153 | + */ |
| 1154 | + return npy_cpack@c@(x + 0.0L + (y + 0), x + 0.0L + (y + 0)); |
1023 | 1155 | } |
1024 | | - else { |
| 1156 | + |
| 1157 | + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) |
| 1158 | + return npy_cpack@c@(_real_part_reciprocal@c@(x, y), |
| 1159 | + npy_copysign@c@(pio2_hi + pio2_lo, y)); |
| 1160 | + |
| 1161 | + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { |
1025 | 1162 | /* |
1026 | | - * Small arguments: series expansion, to avoid loss of precision |
1027 | | - * atan(x) = x [1 + (1/3) x^2 [1 + (3/5) x^2 [1 + ...]]] |
1028 | | - * |
1029 | | - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) |
| 1163 | + * z = 0 was filtered out above. All other cases must raise |
| 1164 | + * inexact, but this is the only only that needs to do it |
| 1165 | + * explicitly. |
1030 | 1166 | */ |
1031 | | - @ctype@ z2, r; |
1032 | | - z2 = cmul@c@(z, z); |
1033 | | - r = c_1@c@; |
1034 | | -#if @precision@ >= 3 |
1035 | | - SERIES_HORNER_TERM@C@(r, z2, 9.0@C@/11); |
1036 | | - SERIES_HORNER_TERM@C@(r, z2, 7.0@C@/9); |
1037 | | -#endif |
1038 | | -#if @precision@ >= 2 |
1039 | | - SERIES_HORNER_TERM@C@(r, z2, 5.0@C@/7); |
1040 | | -#endif |
1041 | | - SERIES_HORNER_TERM@C@(r, z2, 3.0@C@/5); |
1042 | | - SERIES_HORNER_TERM@C@(r, z2, 1.0@C@/3); |
1043 | | - return cmul@c@(z, r); |
1044 | | - } |
| 1167 | + volatile npy_float junk = 1 + tiny; |
| 1168 | + return (z); |
| 1169 | + } |
| 1170 | + |
| 1171 | + if (ax == 1 && ay < @TEPS@) |
| 1172 | + rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2; |
| 1173 | + else |
| 1174 | + rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4; |
| 1175 | + |
| 1176 | + if (ax == 1) |
| 1177 | + ry = npy_atan2@c@(2, -ay) / 2; |
| 1178 | + else if (ay < @TEPS@) |
| 1179 | + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2; |
| 1180 | + else |
| 1181 | + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; |
| 1182 | + |
| 1183 | + return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); |
1045 | 1184 | } |
1046 | 1185 | #endif |
1047 | 1186 | /**end repeat**/ |
|
0 commit comments