Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 9a2be91

Browse files
author
Steven D'Aprano
committed
Issue27181 add geometric mean.
1 parent e7fef52 commit 9a2be91

2 files changed

Lines changed: 552 additions & 0 deletions

File tree

Lib/statistics.py

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,230 @@ def _fail_neg(values, errmsg='negative value'):
303303
yield x
304304

305305

306+
class _nroot_NS:
307+
"""Hands off! Don't touch!
308+
309+
Everything inside this namespace (class) is an even-more-private
310+
implementation detail of the private _nth_root function.
311+
"""
312+
# This class exists only to be used as a namespace, for convenience
313+
# of being able to keep the related functions together, and to
314+
# collapse the group in an editor. If this were C# or C++, I would
315+
# use a Namespace, but the closest Python has is a class.
316+
#
317+
# FIXME possibly move this out into a separate module?
318+
# That feels like overkill, and may encourage people to treat it as
319+
# a public feature.
320+
def __init__(self):
321+
raise TypeError('namespace only, do not instantiate')
322+
323+
def nth_root(x, n):
324+
"""Return the positive nth root of numeric x.
325+
326+
This may be more accurate than ** or pow():
327+
328+
>>> math.pow(1000, 1.0/3) #doctest:+SKIP
329+
9.999999999999998
330+
331+
>>> _nth_root(1000, 3)
332+
10.0
333+
>>> _nth_root(11**5, 5)
334+
11.0
335+
>>> _nth_root(2, 12)
336+
1.0594630943592953
337+
338+
"""
339+
if not isinstance(n, int):
340+
raise TypeError('degree n must be an int')
341+
if n < 2:
342+
raise ValueError('degree n must be 2 or more')
343+
if isinstance(x, decimal.Decimal):
344+
return _nroot_NS.decimal_nroot(x, n)
345+
elif isinstance(x, numbers.Real):
346+
return _nroot_NS.float_nroot(x, n)
347+
else:
348+
raise TypeError('expected a number, got %s') % type(x).__name__
349+
350+
def float_nroot(x, n):
351+
"""Handle nth root of Reals, treated as a float."""
352+
assert isinstance(n, int) and n > 1
353+
if x < 0:
354+
if n%2 == 0:
355+
raise ValueError('domain error: even root of negative number')
356+
else:
357+
return -_nroot_NS.nroot(-x, n)
358+
elif x == 0:
359+
return math.copysign(0.0, x)
360+
elif x > 0:
361+
try:
362+
isinfinity = math.isinf(x)
363+
except OverflowError:
364+
return _nroot_NS.bignum_nroot(x, n)
365+
else:
366+
if isinfinity:
367+
return float('inf')
368+
else:
369+
return _nroot_NS.nroot(x, n)
370+
else:
371+
assert math.isnan(x)
372+
return float('nan')
373+
374+
def nroot(x, n):
375+
"""Calculate x**(1/n), then improve the answer."""
376+
# This uses math.pow() to calculate an initial guess for the root,
377+
# then uses the iterated nroot algorithm to improve it.
378+
#
379+
# By my testing, about 8% of the time the iterated algorithm ends
380+
# up converging to a result which is less accurate than the initial
381+
# guess. [FIXME: is this still true?] In that case, we use the
382+
# guess instead of the "improved" value. This way, we're never
383+
# less accurate than math.pow().
384+
r1 = math.pow(x, 1.0/n)
385+
eps1 = abs(r1**n - x)
386+
if eps1 == 0.0:
387+
# r1 is the exact root, so we're done. By my testing, this
388+
# occurs about 80% of the time for x < 1 and 30% of the
389+
# time for x > 1.
390+
return r1
391+
else:
392+
try:
393+
r2 = _nroot_NS.iterated_nroot(x, n, r1)
394+
except RuntimeError:
395+
return r1
396+
else:
397+
eps2 = abs(r2**n - x)
398+
if eps1 < eps2:
399+
return r1
400+
return r2
401+
402+
def iterated_nroot(a, n, g):
403+
"""Return the nth root of a, starting with guess g.
404+
405+
This is a special case of Newton's Method.
406+
https://en.wikipedia.org/wiki/Nth_root_algorithm
407+
"""
408+
np = n - 1
409+
def iterate(r):
410+
try:
411+
return (np*r + a/math.pow(r, np))/n
412+
except OverflowError:
413+
# If r is large enough, r**np may overflow. If that
414+
# happens, r**-np will be small, but not necessarily zero.
415+
return (np*r + a*math.pow(r, -np))/n
416+
# With a good guess, such as g = a**(1/n), this will converge in
417+
# only a few iterations. However a poor guess can take thousands
418+
# of iterations to converge, if at all. We guard against poor
419+
# guesses by setting an upper limit to the number of iterations.
420+
r1 = g
421+
r2 = iterate(g)
422+
for i in range(1000):
423+
if r1 == r2:
424+
break
425+
# Use Floyd's cycle-finding algorithm to avoid being trapped
426+
# in a cycle.
427+
# https://en.wikipedia.org/wiki/Cycle_detection#Tortoise_and_hare
428+
r1 = iterate(r1)
429+
r2 = iterate(iterate(r2))
430+
else:
431+
# If the guess is particularly bad, the above may fail to
432+
# converge in any reasonable time.
433+
raise RuntimeError('nth-root failed to converge')
434+
return r2
435+
436+
def decimal_nroot(x, n):
437+
"""Handle nth root of Decimals."""
438+
assert isinstance(x, decimal.Decimal)
439+
assert isinstance(n, int)
440+
if x.is_snan():
441+
# Signalling NANs always raise.
442+
raise decimal.InvalidOperation('nth-root of snan')
443+
if x.is_qnan():
444+
# Quiet NANs only raise if the context is set to raise,
445+
# otherwise return a NAN.
446+
ctx = decimal.getcontext()
447+
if ctx.traps[decimal.InvalidOperation]:
448+
raise decimal.InvalidOperation('nth-root of nan')
449+
else:
450+
# Preserve the input NAN.
451+
return x
452+
if x.is_infinite():
453+
return x
454+
# FIXME this hasn't had the extensive testing of the float
455+
# version _iterated_nroot so there's possibly some buggy
456+
# corner cases buried in here. Can it overflow? Fail to
457+
# converge or get trapped in a cycle? Converge to a less
458+
# accurate root?
459+
np = n - 1
460+
def iterate(r):
461+
return (np*r + x/r**np)/n
462+
r0 = x**(decimal.Decimal(1)/n)
463+
assert isinstance(r0, decimal.Decimal)
464+
r1 = iterate(r0)
465+
while True:
466+
if r1 == r0:
467+
return r1
468+
r0, r1 = r1, iterate(r1)
469+
470+
def bignum_nroot(x, n):
471+
"""Return the nth root of a positive huge number."""
472+
assert x > 0
473+
# I state without proof that ⁿ√x ≈ ⁿ√2·ⁿ√(x//2)
474+
# and that for sufficiently big x the error is acceptible.
475+
# We now halve x until it is small enough to get the root.
476+
m = 0
477+
while True:
478+
x //= 2
479+
m += 1
480+
try:
481+
y = float(x)
482+
except OverflowError:
483+
continue
484+
break
485+
a = _nroot_NS.nroot(y, n)
486+
# At this point, we want the nth-root of 2**m, or 2**(m/n).
487+
# We can write that as 2**(q + r/n) = 2**q * ⁿ√2**r where q = m//n.
488+
q, r = divmod(m, n)
489+
b = 2**q * _nroot_NS.nroot(2**r, n)
490+
return a * b
491+
492+
493+
# This is the (private) function for calculating nth roots:
494+
_nth_root = _nroot_NS.nth_root
495+
assert type(_nth_root) is type(lambda: None)
496+
497+
498+
def _product(values):
499+
"""Return product of values as (exponent, mantissa)."""
500+
errmsg = 'mixed Decimal and float is not supported'
501+
prod = 1
502+
for x in values:
503+
if isinstance(x, float):
504+
break
505+
prod *= x
506+
else:
507+
return (0, prod)
508+
if isinstance(prod, Decimal):
509+
raise TypeError(errmsg)
510+
# Since floats can overflow easily, we calculate the product as a
511+
# sort of poor-man's BigFloat. Given that:
512+
#
513+
# x = 2**p * m # p == power or exponent (scale), m = mantissa
514+
#
515+
# we can calculate the product of two (or more) x values as:
516+
#
517+
# x1*x2 = 2**p1*m1 * 2**p2*m2 = 2**(p1+p2)*(m1*m2)
518+
#
519+
mant, scale = 1, 0 #math.frexp(prod) # FIXME
520+
for y in chain([x], values):
521+
if isinstance(y, Decimal):
522+
raise TypeError(errmsg)
523+
m1, e1 = math.frexp(y)
524+
m2, e2 = math.frexp(mant)
525+
scale += (e1 + e2)
526+
mant = m1*m2
527+
return (scale, mant)
528+
529+
306530
# === Measures of central tendency (averages) ===
307531

308532
def mean(data):
@@ -331,6 +555,49 @@ def mean(data):
331555
return _convert(total/n, T)
332556

333557

558+
def geometric_mean(data):
559+
"""Return the geometric mean of data.
560+
561+
The geometric mean is appropriate when averaging quantities which
562+
are multiplied together rather than added, for example growth rates.
563+
Suppose an investment grows by 10% in the first year, falls by 5% in
564+
the second, then grows by 12% in the third, what is the average rate
565+
of growth over the three years?
566+
567+
>>> geometric_mean([1.10, 0.95, 1.12])
568+
1.0538483123382172
569+
570+
giving an average growth of 5.385%. Using the arithmetic mean will
571+
give approximately 5.667%, which is too high.
572+
573+
``StatisticsError`` will be raised if ``data`` is empty, or any
574+
element is less than zero.
575+
"""
576+
if iter(data) is data:
577+
data = list(data)
578+
errmsg = 'geometric mean does not support negative values'
579+
n = len(data)
580+
if n < 1:
581+
raise StatisticsError('geometric_mean requires at least one data point')
582+
elif n == 1:
583+
x = data[0]
584+
if isinstance(g, (numbers.Real, Decimal)):
585+
if x < 0:
586+
raise StatisticsError(errmsg)
587+
return x
588+
else:
589+
raise TypeError('unsupported type')
590+
else:
591+
scale, prod = _product(_fail_neg(data, errmsg))
592+
r = _nth_root(prod, n)
593+
if scale:
594+
p, q = divmod(scale, n)
595+
s = 2**p * _nth_root(2**q, n)
596+
else:
597+
s = 1
598+
return s*r
599+
600+
334601
def harmonic_mean(data):
335602
"""Return the harmonic mean of data.
336603

0 commit comments

Comments
 (0)