-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathalgo.py
More file actions
141 lines (109 loc) · 3.38 KB
/
algo.py
File metadata and controls
141 lines (109 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import numpy
def _make_boot_index(elements, niter):
"""Generate an array of bootstrap sample sets
Parameters
----------
elements : int
The number of rows in the original dataset.
niter : int
Number of iteration for the bootstrapping.
Returns
-------
index : numpy array
A collection of random *indices* that can be used to randomly
sample a dataset ``niter`` times.
"""
return numpy.random.randint(low=0, high=elements, size=(niter, elements))
def _fit_simple(x, y, xhat, fitlogs=None):
"""
Simple linear fit of x and y data using ``numpy.polyfit``.
Parameters
----------
x, y : array-like
fitlogs : str, optional.
Defines which data should be log-transformed. Valid values are
'x', 'y', or 'both'.
Returns
-------
xhat, yhat : array-like
Estimates of x and y based on the linear fit
results : dict
Dictionary of the fit coefficients
See also
--------
numpy.polyfit
"""
# do the best-fit
coeffs = numpy.polyfit(x, y, 1)
results = {"slope": coeffs[0], "intercept": coeffs[1]}
# estimate y values
yhat = _estimate_from_fit(
xhat,
coeffs[0],
coeffs[1],
xlog=fitlogs in ["x", "both"],
ylog=fitlogs in ["y", "both"],
)
return yhat, results
def _bs_fit(x, y, xhat, fitlogs=None, niter=10000, alpha=0.05):
"""
Percentile method bootstrapping of linear fit of x and y data using
``numpy.polyfit``.
Parameters
----------
x, y : array-like
fitlogs : str, optional.
Defines which data should be log-transformed. Valid values are
'x', 'y', or 'both'.
niter : int, optional (default is 10000)
Number of bootstrap iterations to use
alpha : float, optional
Confidence level of the estimate.
Returns
-------
xhat, yhat : array-like
Estimates of x and y based on the linear fit
results : dict
Dictionary of the fit coefficients
See also
--------
numpy.polyfit
"""
index = _make_boot_index(len(x), niter)
yhat_array = numpy.array(
[_fit_simple(x[ii], y[ii], xhat, fitlogs=fitlogs)[0] for ii in index]
)
percentiles = 100 * numpy.array([alpha * 0.5, 1 - alpha * 0.5])
yhat_lo, yhat_hi = numpy.percentile(yhat_array, percentiles, axis=0)
return yhat_lo, yhat_hi
def _estimate_from_fit(xhat, slope, intercept, xlog=False, ylog=False):
"""Estimate the dependent variables of a linear fit given x-data
and linear parameters.
Parameters
----------
xhat : numpy array or pandas Series/DataFrame
The input independent variable of the fit
slope : float
Slope of the best-fit line
intercept : float
y-intercept of the best-fit line
xlog, ylog : bool (default = False)
Toggles whether or not the logs of the x- or y- data should be
used to perform the regression.
Returns
-------
yhat : numpy array
Estimate of the dependent variable.
"""
xhat = numpy.asarray(xhat)
if ylog:
if xlog:
yhat = numpy.exp(intercept) * xhat**slope
else:
yhat = numpy.exp(intercept) * numpy.exp(slope) ** xhat
else:
if xlog:
yhat = slope * numpy.log(xhat) + intercept
else:
yhat = slope * xhat + intercept
return yhat