Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c630e10

Browse files
authored
Factor-out common code. Also, optimize common cases by preallocating space on the stack. GH-8738
Improves speed by 9 to 10ns per call.
1 parent 1399074 commit c630e10

File tree

1 file changed

+56
-41
lines changed

1 file changed

+56
-41
lines changed

Modules/mathmodule.c

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,10 +2032,10 @@ math_fmod_impl(PyObject *module, double x, double y)
20322032
}
20332033

20342034
/*
2035-
Given an *n* length *vec* of non-negative, non-nan, non-inf values
2035+
Given an *n* length *vec* of non-negative values
20362036
where *max* is the largest value in the vector, compute:
20372037
2038-
sum((x / max) ** 2 for x in vec)
2038+
max * sqrt(sum((x / max) ** 2 for x in vec))
20392039
20402040
When a maximum value is found, it is swapped to the end. This
20412041
lets us skip one loop iteration and just add 1.0 at the end.
@@ -2045,19 +2045,31 @@ Kahan summation is used to improve accuracy. The *csum*
20452045
variable tracks the cumulative sum and *frac* tracks
20462046
fractional round-off error for the most recent addition.
20472047
2048+
The value of the *max* variable must be present in *vec*
2049+
or should equal to 0.0 when n==0. Likewise, *max* will
2050+
be INF if an infinity is present in the vec.
2051+
2052+
The *found_nan* variable indicates whether some member of
2053+
the *vec* is a NaN.
20482054
*/
20492055

20502056
static inline double
2051-
scaled_vector_squared(Py_ssize_t n, double *vec, double max)
2057+
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
20522058
{
20532059
double x, csum = 0.0, oldcsum, frac = 0.0;
20542060
Py_ssize_t i;
20552061

2062+
if (Py_IS_INFINITY(max)) {
2063+
return max;
2064+
}
2065+
if (found_nan) {
2066+
return Py_NAN;
2067+
}
20562068
if (max == 0.0) {
20572069
return 0.0;
20582070
}
20592071
assert(n > 0);
2060-
for (i=0 ; i<n-1 ; i++) {
2072+
for (i=0 ; i < n-1 ; i++) {
20612073
x = vec[i];
20622074
if (x == max) {
20632075
x = vec[n-1];
@@ -2071,9 +2083,11 @@ scaled_vector_squared(Py_ssize_t n, double *vec, double max)
20712083
}
20722084
assert(vec[n-1] == max);
20732085
csum += 1.0 - frac;
2074-
return csum;
2086+
return max * sqrt(csum);
20752087
}
20762088

2089+
#define NUM_STACK_ELEMS 16
2090+
20772091
/*[clinic input]
20782092
math.dist
20792093
@@ -2095,11 +2109,12 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
20952109
/*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/
20962110
{
20972111
PyObject *item;
2098-
double *diffs;
20992112
double max = 0.0;
21002113
double x, px, qx, result;
21012114
Py_ssize_t i, m, n;
21022115
int found_nan = 0;
2116+
double diffs_on_stack[NUM_STACK_ELEMS];
2117+
double *diffs = diffs_on_stack;
21032118

21042119
m = PyTuple_GET_SIZE(p);
21052120
n = PyTuple_GET_SIZE(q);
@@ -2109,22 +2124,22 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
21092124
return NULL;
21102125

21112126
}
2112-
diffs = (double *) PyObject_Malloc(n * sizeof(double));
2113-
if (diffs == NULL) {
2114-
return NULL;
2127+
if (n > NUM_STACK_ELEMS) {
2128+
diffs = (double *) PyObject_Malloc(n * sizeof(double));
2129+
if (diffs == NULL) {
2130+
return NULL;
2131+
}
21152132
}
21162133
for (i=0 ; i<n ; i++) {
21172134
item = PyTuple_GET_ITEM(p, i);
21182135
px = PyFloat_AsDouble(item);
21192136
if (px == -1.0 && PyErr_Occurred()) {
2120-
PyObject_Free(diffs);
2121-
return NULL;
2137+
goto error_exit;
21222138
}
21232139
item = PyTuple_GET_ITEM(q, i);
21242140
qx = PyFloat_AsDouble(item);
21252141
if (qx == -1.0 && PyErr_Occurred()) {
2126-
PyObject_Free(diffs);
2127-
return NULL;
2142+
goto error_exit;
21282143
}
21292144
x = fabs(px - qx);
21302145
diffs[i] = x;
@@ -2133,19 +2148,17 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
21332148
max = x;
21342149
}
21352150
}
2136-
if (Py_IS_INFINITY(max)) {
2137-
result = max;
2138-
goto done;
2139-
}
2140-
if (found_nan) {
2141-
result = Py_NAN;
2142-
goto done;
2151+
result = vector_norm(n, diffs, max, found_nan);
2152+
if (diffs != diffs_on_stack) {
2153+
PyObject_Free(diffs);
21432154
}
2144-
result = max * sqrt(scaled_vector_squared(n, diffs, max));
2145-
2146-
done:
2147-
PyObject_Free(diffs);
21482155
return PyFloat_FromDouble(result);
2156+
2157+
error_exit:
2158+
if (diffs != diffs_on_stack) {
2159+
PyObject_Free(diffs);
2160+
}
2161+
return NULL;
21492162
}
21502163

21512164
/* AC: cannot convert yet, waiting for *args support */
@@ -2154,21 +2167,23 @@ math_hypot(PyObject *self, PyObject *args)
21542167
{
21552168
Py_ssize_t i, n;
21562169
PyObject *item;
2157-
double *coordinates;
21582170
double max = 0.0;
21592171
double x, result;
21602172
int found_nan = 0;
2173+
double coord_on_stack[NUM_STACK_ELEMS];
2174+
double *coordinates = coord_on_stack;
21612175

21622176
n = PyTuple_GET_SIZE(args);
2163-
coordinates = (double *) PyObject_Malloc(n * sizeof(double));
2164-
if (coordinates == NULL)
2165-
return NULL;
2177+
if (n > NUM_STACK_ELEMS) {
2178+
coordinates = (double *) PyObject_Malloc(n * sizeof(double));
2179+
if (coordinates == NULL)
2180+
return NULL;
2181+
}
21662182
for (i=0 ; i<n ; i++) {
21672183
item = PyTuple_GET_ITEM(args, i);
21682184
x = PyFloat_AsDouble(item);
21692185
if (x == -1.0 && PyErr_Occurred()) {
2170-
PyObject_Free(coordinates);
2171-
return NULL;
2186+
goto error_exit;
21722187
}
21732188
x = fabs(x);
21742189
coordinates[i] = x;
@@ -2177,21 +2192,21 @@ math_hypot(PyObject *self, PyObject *args)
21772192
max = x;
21782193
}
21792194
}
2180-
if (Py_IS_INFINITY(max)) {
2181-
result = max;
2182-
goto done;
2195+
result = vector_norm(n, coordinates, max, found_nan);
2196+
if (coordinates != coord_on_stack) {
2197+
PyObject_Free(coordinates);
21832198
}
2184-
if (found_nan) {
2185-
result = Py_NAN;
2186-
goto done;
2187-
}
2188-
result = max * sqrt(scaled_vector_squared(n, coordinates, max));
2189-
2190-
done:
2191-
PyObject_Free(coordinates);
21922199
return PyFloat_FromDouble(result);
2200+
2201+
error_exit:
2202+
if (coordinates != coord_on_stack) {
2203+
PyObject_Free(coordinates);
2204+
}
2205+
return NULL;
21932206
}
21942207

2208+
#undef NUM_STACK_ELEMS
2209+
21952210
PyDoc_STRVAR(math_hypot_doc,
21962211
"hypot(*coordinates) -> value\n\n\
21972212
Multidimensional Euclidean distance from the origin to a point.\n\

0 commit comments

Comments
 (0)