|
17 | 17 | median_high High median of data. |
18 | 18 | median_grouped Median, or 50th percentile, of grouped data. |
19 | 19 | mode Mode (most common value) of data. |
| 20 | +multimode List of modes (most common values of data) |
20 | 21 | ================== ============================================= |
21 | 22 |
|
22 | 23 | Calculate the arithmetic mean ("the average") of data: |
|
79 | 80 | __all__ = [ 'StatisticsError', 'NormalDist', |
80 | 81 | 'pstdev', 'pvariance', 'stdev', 'variance', |
81 | 82 | 'median', 'median_low', 'median_high', 'median_grouped', |
82 | | - 'mean', 'mode', 'harmonic_mean', 'fmean', |
| 83 | + 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', |
83 | 84 | ] |
84 | 85 |
|
85 | | -import collections |
86 | 86 | import math |
87 | 87 | import numbers |
88 | 88 | import random |
|
92 | 92 | from itertools import groupby |
93 | 93 | from bisect import bisect_left, bisect_right |
94 | 94 | from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum |
95 | | - |
96 | | - |
| 95 | +from operator import itemgetter |
| 96 | +from collections import Counter |
97 | 97 |
|
98 | 98 | # === Exceptions === |
99 | 99 |
|
@@ -249,20 +249,6 @@ def _convert(value, T): |
249 | 249 | raise |
250 | 250 |
|
251 | 251 |
|
252 | | -def _counts(data): |
253 | | - # Generate a table of sorted (value, frequency) pairs. |
254 | | - table = collections.Counter(iter(data)).most_common() |
255 | | - if not table: |
256 | | - return table |
257 | | - # Extract the values with the highest frequency. |
258 | | - maxfreq = table[0][1] |
259 | | - for i in range(1, len(table)): |
260 | | - if table[i][1] != maxfreq: |
261 | | - table = table[:i] |
262 | | - break |
263 | | - return table |
264 | | - |
265 | | - |
266 | 252 | def _find_lteq(a, x): |
267 | 253 | 'Locate the leftmost value exactly equal to x' |
268 | 254 | i = bisect_left(a, x) |
@@ -334,9 +320,9 @@ def count(x): |
334 | 320 | nonlocal n |
335 | 321 | n += 1 |
336 | 322 | return x |
337 | | - total = math.fsum(map(count, data)) |
| 323 | + total = fsum(map(count, data)) |
338 | 324 | else: |
339 | | - total = math.fsum(data) |
| 325 | + total = fsum(data) |
340 | 326 | try: |
341 | 327 | return total / n |
342 | 328 | except ZeroDivisionError: |
@@ -523,19 +509,38 @@ def mode(data): |
523 | 509 | >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) |
524 | 510 | 'red' |
525 | 511 |
|
526 | | - If there is not exactly one most common value, ``mode`` will raise |
527 | | - StatisticsError. |
| 512 | + If there are multiple modes, return the first one encountered. |
| 513 | +
|
| 514 | + >>> mode(['red', 'red', 'green', 'blue', 'blue']) |
| 515 | + 'red' |
| 516 | +
|
| 517 | + If *data* is empty, ``mode``, raises StatisticsError. |
| 518 | +
|
528 | 519 | """ |
529 | | - # Generate a table of sorted (value, frequency) pairs. |
530 | | - table = _counts(data) |
531 | | - if len(table) == 1: |
532 | | - return table[0][0] |
533 | | - elif table: |
534 | | - raise StatisticsError( |
535 | | - 'no unique mode; found %d equally common values' % len(table) |
536 | | - ) |
537 | | - else: |
538 | | - raise StatisticsError('no mode for empty data') |
| 520 | + data = iter(data) |
| 521 | + try: |
| 522 | + return Counter(data).most_common(1)[0][0] |
| 523 | + except IndexError: |
| 524 | + raise StatisticsError('no mode for empty data') from None |
| 525 | + |
| 526 | + |
| 527 | +def multimode(data): |
| 528 | + """ Return a list of the most frequently occurring values. |
| 529 | +
|
| 530 | + Will return more than one result if there are multiple modes |
| 531 | + or an empty list if *data* is empty. |
| 532 | +
|
| 533 | + >>> multimode('aabbbbbbbbcc') |
| 534 | + ['b'] |
| 535 | + >>> multimode('aabbbbccddddeeffffgg') |
| 536 | + ['b', 'd', 'f'] |
| 537 | + >>> multimode('') |
| 538 | + [] |
| 539 | +
|
| 540 | + """ |
| 541 | + counts = Counter(iter(data)).most_common() |
| 542 | + maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, [])) |
| 543 | + return list(map(itemgetter(0), mode_items)) |
539 | 544 |
|
540 | 545 |
|
541 | 546 | # === Measures of spread === |
@@ -836,6 +841,7 @@ def __repr__(self): |
836 | 841 | from math import isclose |
837 | 842 | from operator import add, sub, mul, truediv |
838 | 843 | from itertools import repeat |
| 844 | + import doctest |
839 | 845 |
|
840 | 846 | g1 = NormalDist(10, 20) |
841 | 847 | g2 = NormalDist(-5, 25) |
@@ -893,3 +899,5 @@ def assert_close(G1, G2): |
893 | 899 | S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n), |
894 | 900 | Y.samples(n))]) |
895 | 901 | assert_close(X - Y, S) |
| 902 | + |
| 903 | + print(doctest.testmod()) |
0 commit comments