|
82 | 82 | # properties of the original sample, and a boxplot is one visual tool |
83 | 83 | # to make this assessment |
84 | 84 |
|
85 | | -numDists = 5 |
86 | | -randomDists = ['Normal(1,1)', ' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)', |
87 | | - 'Triangular(2,9,11)'] |
| 85 | +random_dists = ['Normal(1,1)', ' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)', |
| 86 | + 'Triangular(2,9,11)'] |
88 | 87 | N = 500 |
89 | 88 |
|
90 | 89 | norm = np.random.normal(1, 1, N) |
|
95 | 94 |
|
96 | 95 | # Generate some random indices that we'll use to resample the original data |
97 | 96 | # arrays. For code brevity, just use the same random indices for each array |
98 | | -bootstrapIndices = np.random.random_integers(0, N - 1, N) |
99 | | -normBoot = norm[bootstrapIndices] |
100 | | -expoBoot = expo[bootstrapIndices] |
101 | | -gumbBoot = gumb[bootstrapIndices] |
102 | | -lognBoot = logn[bootstrapIndices] |
103 | | -triaBoot = tria[bootstrapIndices] |
104 | | - |
105 | | -data = [norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot, |
106 | | - tria, triaBoot] |
| 97 | +bootstrap_indices = np.random.randint(0, N, N) |
| 98 | +data = [ |
| 99 | + norm, norm[bootstrap_indices], |
| 100 | + logn, logn[bootstrap_indices], |
| 101 | + expo, expo[bootstrap_indices], |
| 102 | + gumb, gumb[bootstrap_indices], |
| 103 | + tria, tria[bootstrap_indices], |
| 104 | +] |
107 | 105 |
|
108 | 106 | fig, ax1 = plt.subplots(figsize=(10, 6)) |
109 | 107 | fig.canvas.set_window_title('A Boxplot Example') |
|
126 | 124 | ax1.set_ylabel('Value') |
127 | 125 |
|
128 | 126 | # Now fill the boxes with desired colors |
129 | | -boxColors = ['darkkhaki', 'royalblue'] |
130 | | -numBoxes = numDists*2 |
131 | | -medians = list(range(numBoxes)) |
132 | | -for i in range(numBoxes): |
| 127 | +box_colors = ['darkkhaki', 'royalblue'] |
| 128 | +num_boxes = len(data) |
| 129 | +medians = np.empty(num_boxes) |
| 130 | +for i in range(num_boxes): |
133 | 131 | box = bp['boxes'][i] |
134 | 132 | boxX = [] |
135 | 133 | boxY = [] |
136 | 134 | for j in range(5): |
137 | 135 | boxX.append(box.get_xdata()[j]) |
138 | 136 | boxY.append(box.get_ydata()[j]) |
139 | | - boxCoords = np.column_stack([boxX, boxY]) |
| 137 | + box_coords = np.column_stack([boxX, boxY]) |
140 | 138 | # Alternate between Dark Khaki and Royal Blue |
141 | | - k = i % 2 |
142 | | - boxPolygon = Polygon(boxCoords, facecolor=boxColors[k]) |
143 | | - ax1.add_patch(boxPolygon) |
| 139 | + ax1.add_patch(Polygon(box_coords, facecolor=box_colors[i % 2])) |
144 | 140 | # Now draw the median lines back over what we just filled in |
145 | 141 | med = bp['medians'][i] |
146 | 142 | medianX = [] |
|
149 | 145 | medianX.append(med.get_xdata()[j]) |
150 | 146 | medianY.append(med.get_ydata()[j]) |
151 | 147 | ax1.plot(medianX, medianY, 'k') |
152 | | - medians[i] = medianY[0] |
| 148 | + medians[i] = medianY[0] |
153 | 149 | # Finally, overplot the sample averages, with horizontal alignment |
154 | 150 | # in the center of each box |
155 | | - ax1.plot([np.average(med.get_xdata())], [np.average(data[i])], |
| 151 | + ax1.plot(np.average(med.get_xdata()), np.average(data[i]), |
156 | 152 | color='w', marker='*', markeredgecolor='k') |
157 | 153 |
|
158 | 154 | # Set the axes ranges and axes labels |
159 | | -ax1.set_xlim(0.5, numBoxes + 0.5) |
| 155 | +ax1.set_xlim(0.5, num_boxes + 0.5) |
160 | 156 | top = 40 |
161 | 157 | bottom = -5 |
162 | 158 | ax1.set_ylim(bottom, top) |
163 | | -ax1.set_xticklabels(np.repeat(randomDists, 2), |
| 159 | +ax1.set_xticklabels(np.repeat(random_dists, 2), |
164 | 160 | rotation=45, fontsize=8) |
165 | 161 |
|
166 | 162 | # Due to the Y-axis scale being different across samples, it can be |
167 | 163 | # hard to compare differences in medians across the samples. Add upper |
168 | 164 | # X-axis tick labels with the sample medians to aid in comparison |
169 | 165 | # (just use two decimal places of precision) |
170 | | -pos = np.arange(numBoxes) + 1 |
171 | | -upperLabels = [str(np.round(s, 2)) for s in medians] |
| 166 | +pos = np.arange(num_boxes) + 1 |
| 167 | +upper_labels = [str(np.round(s, 2)) for s in medians] |
172 | 168 | weights = ['bold', 'semibold'] |
173 | | -for tick, label in zip(range(numBoxes), ax1.get_xticklabels()): |
| 169 | +for tick, label in zip(range(num_boxes), ax1.get_xticklabels()): |
174 | 170 | k = tick % 2 |
175 | | - ax1.text(pos[tick], top - (top*0.05), upperLabels[tick], |
176 | | - horizontalalignment='center', size='x-small', weight=weights[k], |
177 | | - color=boxColors[k]) |
| 171 | + ax1.text(pos[tick], .95, upper_labels[tick], |
| 172 | + transform=ax1.get_xaxis_transform(), |
| 173 | + horizontalalignment='center', size='x-small', |
| 174 | + weight=weights[k], color=box_colors[k]) |
178 | 175 |
|
179 | 176 | # Finally, add a basic legend |
180 | | -fig.text(0.80, 0.08, str(N) + ' Random Numbers', |
181 | | - backgroundcolor=boxColors[0], color='black', weight='roman', |
| 177 | +fig.text(0.80, 0.08, f'{N} Random Numbers', |
| 178 | + backgroundcolor=box_colors[0], color='black', weight='roman', |
182 | 179 | size='x-small') |
183 | 180 | fig.text(0.80, 0.045, 'IID Bootstrap Resample', |
184 | | - backgroundcolor=boxColors[1], |
| 181 | + backgroundcolor=box_colors[1], |
185 | 182 | color='white', weight='roman', size='x-small') |
186 | 183 | fig.text(0.80, 0.015, '*', color='white', backgroundcolor='silver', |
187 | 184 | weight='roman', size='medium') |
@@ -213,10 +210,10 @@ def fakeBootStrapper(n): |
213 | 210 | return med, CI |
214 | 211 |
|
215 | 212 | inc = 0.1 |
216 | | -e1 = np.random.normal(0, 1, size=(500,)) |
217 | | -e2 = np.random.normal(0, 1, size=(500,)) |
218 | | -e3 = np.random.normal(0, 1 + inc, size=(500,)) |
219 | | -e4 = np.random.normal(0, 1 + 2*inc, size=(500,)) |
| 213 | +e1 = np.random.normal(0, 1, size=500) |
| 214 | +e2 = np.random.normal(0, 1, size=500) |
| 215 | +e3 = np.random.normal(0, 1 + inc, size=500) |
| 216 | +e4 = np.random.normal(0, 1 + 2*inc, size=500) |
220 | 217 |
|
221 | 218 | treatments = [e1, e2, e3, e4] |
222 | 219 | med1, CI1 = fakeBootStrapper(1) |
|
0 commit comments