Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
49f3471
Add kde_rand()
rhettinger Apr 5, 2024
0fd40f3
Update __all__
rhettinger Apr 5, 2024
75680e5
Auto detect size changes
rhettinger Apr 6, 2024
63ba396
.
rhettinger Apr 6, 2024
c69c504
Factor-out s-scurve function
rhettinger Apr 6, 2024
dcd83f2
.
rhettinger Apr 6, 2024
57492bf
.
rhettinger Apr 6, 2024
5b41598
.
rhettinger Apr 7, 2024
89a6033
Add docs for kde_random().
rhettinger Apr 7, 2024
4d4f792
.
rhettinger Apr 7, 2024
7848cca
.
rhettinger Apr 7, 2024
37a8955
Improve docstrings
rhettinger Apr 7, 2024
a30fb9d
The "with locks" comment was inaccurate.
rhettinger Apr 7, 2024
a2a077f
Add summary table entry. Fix doctests.
rhettinger Apr 7, 2024
a82b100
Fix typo
rhettinger Apr 8, 2024
1397e91
.
rhettinger Apr 11, 2024
630d942
Change variable name to match the supporting reference.
rhettinger Apr 13, 2024
e9e5d54
Closed-form for _parabolic_invcdf().
rhettinger Apr 15, 2024
8a0fd69
Flip sign in Newton-Raphson to match the common presentation.
rhettinger Apr 15, 2024
879a1c8
Merge branch 'main' into kde_rand
rhettinger Apr 15, 2024
3459bf2
Add kernel_invcdf tests
rhettinger Apr 17, 2024
1ca870a
Better _quartic_invcdf_estimate
rhettinger Apr 19, 2024
96a5f14
Make standalone _triweight_invcdf_estimate()
rhettinger Apr 19, 2024
f05eddb
Floats everywhere
rhettinger Apr 19, 2024
065be11
.
rhettinger Apr 23, 2024
3ac42df
Merge branch 'main' into kde_rand
rhettinger Apr 24, 2024
bba0bdf
Merge branch 'main' into kde_rand
rhettinger Apr 30, 2024
d71ca9d
Never used the global, shared Random instance.
rhettinger Apr 30, 2024
ee43ed7
.
rhettinger Apr 30, 2024
7ed7d41
Merge branch 'main' into kde_rand
rhettinger Apr 30, 2024
f85c303
Update whatsnew
rhettinger Apr 30, 2024
544ca8f
Expand "it" variable name to "iterator"
rhettinger Apr 30, 2024
1249d14
Test the kde_random() outer function
rhettinger May 4, 2024
258e0f4
Merge branch 'main' into kde_rand
rhettinger May 4, 2024
a09d30b
Add fully qualified reference
rhettinger May 4, 2024
405d443
Add an approximate distribution test
rhettinger May 4, 2024
a4692bf
Use F_hat() for a better estimate
rhettinger May 4, 2024
ec2d9f2
Test the curve in more places
rhettinger May 4, 2024
c612c1f
Refine the reproducibility note.
rhettinger May 4, 2024
1324952
Missing CDF qualifier
rhettinger May 4, 2024
13e702f
Word smithing
rhettinger May 4, 2024
15a4764
Word smithing
rhettinger May 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add kde_rand()
  • Loading branch information
rhettinger committed Apr 5, 2024
commit 49f34716234bd02b09ca086419e6843532c240a1
92 changes: 91 additions & 1 deletion Lib/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,13 @@
from itertools import count, groupby, repeat
from bisect import bisect_left, bisect_right
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum, sumprod
from math import isfinite, isinf, pi, cos, sin, cosh, atan
from math import isfinite, isinf, pi, cos, sin, tan, cosh, asin, atan
from functools import reduce
from operator import itemgetter
from collections import Counter, namedtuple, defaultdict

_SQRT2 = sqrt(2.0)
_random = random

# === Exceptions ===

Expand Down Expand Up @@ -1697,3 +1698,92 @@ def __getstate__(self):

def __setstate__(self, state):
self._mu, self._sigma = state


## kde_rand() ################################################################

def _newton_raphson(f_inv_est, f, f_prime, tolerance=1e-12):
def f_inv(y):
"Return x such that f(x) β‰ˆ y within the specified tolerance."
x = f_inv_est(y)
while abs(diff := y - f(x)) > tolerance:
x += diff / f_prime(x)
return x
return f_inv

_parabolic_invcdf = _newton_raphson(
f_inv_est = lambda p: ((2.0 * p) ** 0.583367470424302 - 1.0
if p <= 1/2 else
1.0 - (2.0 - 2.0*p) ** 0.583367470424302),
f = lambda t: -1/4 * t**3 + 3/4 * t + 1/2,
f_prime = lambda t: 3/4 * (1.0 - t * t))

_quartic_invcdf = _newton_raphson(
f_inv_est = lambda p: ((2.0 * p) ** 0.4258865685331 - 1.0
if p <= 1/2 else
1.0 - (2.0 - 2.0*p) ** 0.4258865685331),
f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2,
f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2)

_triweight_invcdf = _newton_raphson(
f_inv_est = lambda p: ((2.0 * p) ** 0.3400218741872791 - 1.0
if p <= 1/2 else
1.0 - (2.0 - 2.0*p) ** 0.3400218741872791),
f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2,
f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3)

_kernel_invcdfs = {
'normal': NormalDist().inv_cdf,
'logisitic': lambda p: log(p / (1 - p)),
'sigmoid': lambda p: log(tan(p * pi/2)),
'rectangular': lambda p: 2*p - 1,
'parabolic': _parabolic_invcdf,
'quartic': _quartic_invcdf,
'triweight': _triweight_invcdf,
'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
'cosine': lambda p: 2*asin(2*p - 1)/pi,
}
_kernel_invcdfs['gauss'] = _kernel_invcdfs['normal']
_kernel_invcdfs['uniform'] = _kernel_invcdfs['rectangular']
_kernel_invcdfs['epanechnikov'] = _kernel_invcdfs['parabolic']
_kernel_invcdfs['biweight'] = _kernel_invcdfs['quartic']

def kde_random(data, h, kernel='normal', *, seed=None):
"""Return a function that makes a random selection from the estimated
probability density function created by: kde(data, h, kernel)

For reproducible results, set *seed* to an integer, float, str, or bytes.
Not thread-safe without a lock around calls.

A StatisticsError will be raised if the data sequence is empty.

"""
n = len(data)
if not n:
raise StatisticsError('Empty data sequence')

if not isinstance(data[0], (int, float)):
raise TypeError('Data sequence must contain ints or floats')

if h <= 0.0:
raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}')

if seed is None:
random = _random.random
choice = _random.choice
else:
prng = _random.Random(seed)
random = prng.random
choice = prng.choice

try:
kernel_invcdf = _kernel_invcdfs[kernel]
except KeyError:
raise StatisticsError(f'Unknown kernel name: {kernel!r}')

def rand():
return choice(data) + h * kernel_invcdf(random())

rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}'

return rand