Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 452dcc2

Browse files
committed
added two examples from Josh Hemann
svn path=/branches/v0_99_maint/; revision=7335
1 parent a447613 commit 452dcc2

File tree

2 files changed

+228
-0
lines changed

2 files changed

+228
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""
2+
Thanks Josh Hemann for the example
3+
4+
This examples comes from an application in which grade school gym
5+
teachers wanted to be able to show parents how their child did across
6+
a handful of fitness tests, and importantly, relative to how other
7+
children did. To extract the plotting code for demo purposes, we'll
8+
just make up some data for little Johnny Doe...
9+
10+
"""
11+
import numpy as np
12+
import matplotlib.pyplot as plt
13+
import pylab
14+
from matplotlib.patches import Polygon
15+
from matplotlib.ticker import MaxNLocator
16+
17+
18+
19+
student = 'Johnny Doe'
20+
grade = 2
21+
gender = 'boy'
22+
cohortSize = 62 #The number of other 2nd grade boys
23+
24+
numTests = 5
25+
testNames = ['Pacer Test', 'Flexed Arm\n Hang', 'Mile Run', 'Agility',
26+
'Push Ups']
27+
testMeta = ['laps', 'sec', 'min:sec', 'sec', '']
28+
scores = ['7', '48', '12:52', '17', '14']
29+
rankings = np.round(np.random.uniform(0, 1, numTests)*100, 0)
30+
31+
fig = plt.figure(figsize=(9,7))
32+
ax1 = fig.add_subplot(111)
33+
plt.subplots_adjust(left=0.115, right=0.88)
34+
fig.canvas.set_window_title('Eldorado K-8 Fitness Chart')
35+
pos = np.arange(numTests)+0.5 #Center bars on the Y-axis ticks
36+
rects = ax1.barh(pos, rankings, align='center', height=0.5, color='m')
37+
38+
ax1.axis([0,100,0,5])
39+
pylab.yticks(pos, testNames)
40+
ax1.set_title('Johnny Doe')
41+
plt.text(50, -0.5, 'Cohort Size: ' + str(cohortSize),
42+
horizontalalignment='center', size='small')
43+
44+
# Set the right-hand Y-axis ticks and labels and set X-axis tick marks at the
45+
# deciles
46+
ax2 = ax1.twinx()
47+
ax2.plot([100,100], [0, 5], 'white', alpha=0.1)
48+
ax2.xaxis.set_major_locator(MaxNLocator(11))
49+
xticks = pylab.setp(ax2, xticklabels=['0','10','20','30','40','50','60',
50+
'70',
51+
'80','90','100'])
52+
ax2.xaxis.grid(True, linestyle='--', which='major', color='grey',
53+
alpha=0.25)
54+
#Plot a solid vertical gridline to highlight the median position
55+
plt.plot([50,50], [0, 5], 'grey', alpha=0.25)
56+
57+
# Build up the score labels for the right Y-axis by first appending a carriage
58+
# return to each string and then tacking on the appropriate meta information
59+
# (i.e., 'laps' vs 'seconds'). We want the labels centered on the ticks, so if
60+
# there is no meta info (like for pushups) then don't add the carriage return to
61+
# the string
62+
63+
def withnew(i, scr):
64+
if testMeta[i] != '' : return '%s\n'%scr
65+
else: return scr
66+
scoreLabels = [withnew(i, scr) for i,scr in enumerate(scores)]
67+
scoreLabels = [i+j for i,j in zip(scoreLabels, testMeta)]
68+
pylab.yticks(pos, scoreLabels)
69+
ax2.set_ylabel('Test Scores')
70+
#Make list of numerical suffixes corresponding to position in a list
71+
# 0 1 2 3 4 5 6 7 8 9
72+
suffixes =['th', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th']
73+
ax2.set_xlabel('Percentile Ranking Across ' + str(grade) + suffixes[grade] \
74+
+ ' Grade ' + gender.title() + 's')
75+
76+
# Lastly, write in the ranking inside each bar to aid in interpretation
77+
for rect in rects:
78+
# Rectangle widths are already integer-valued but are floating
79+
# type, so it helps to remove the trailing decimal point and 0 by
80+
# converting width to int type
81+
width = int(rect.get_width())
82+
83+
# Figure out what the last digit (width modulo 10) so we can add
84+
# the appropriate numerical suffix (e.g. 1st, 2nd, 3rd, etc)
85+
lastDigit = width % 10
86+
# Note that 11, 12, and 13 are special cases
87+
if (width == 11) or (width == 12) or (width == 13):
88+
suffix = 'th'
89+
else:
90+
suffix = suffixes[lastDigit]
91+
92+
rankStr = str(width) + suffix
93+
if (width < 5): # The bars aren't wide enough to print the ranking inside
94+
xloc = width + 1 # Shift the text to the right side of the right edge
95+
clr = 'black' # Black against white background
96+
align = 'left'
97+
else:
98+
xloc = 0.98*width # Shift the text to the left side of the right edge
99+
clr = 'white' # White on magenta
100+
align = 'right'
101+
102+
yloc = rect.get_y()+rect.get_height()/2.0 #Center the text vertically in the bar
103+
ax1.text(xloc, yloc, rankStr, horizontalalignment=align,
104+
verticalalignment='center', color=clr, weight='bold')
105+
106+
plt.show()
107+
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""
2+
Thanks Josh Hemann for the example
3+
"""
4+
5+
import numpy as np
6+
import matplotlib.pyplot as plt
7+
from matplotlib.patches import Polygon
8+
9+
10+
# Generate some data from five different probability distributions,
11+
# each with different characteristics. We want to play with how an IID
12+
# bootstrap resample of the data preserves the distributional
13+
# properties of the original sample, and a boxplot is one visual tool
14+
# to make this assessment
15+
numDists = 5
16+
randomDists = ['Normal(1,1)',' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)',
17+
'Triangular(2,9,11)']
18+
N = 500
19+
norm = np.random.normal(1,1, N)
20+
logn = np.random.lognormal(1,1, N)
21+
expo = np.random.exponential(1, N)
22+
gumb = np.random.gumbel(6, 4, N)
23+
tria = np.random.triangular(2, 9, 11, N)
24+
25+
# Generate some random indices that we'll use to resample the original data
26+
# arrays. For code brevity, just use the same random indices for each array
27+
bootstrapIndices = np.random.random_integers(0, N-1, N)
28+
normBoot = norm[bootstrapIndices]
29+
expoBoot = expo[bootstrapIndices]
30+
gumbBoot = gumb[bootstrapIndices]
31+
lognBoot = logn[bootstrapIndices]
32+
triaBoot = tria[bootstrapIndices]
33+
34+
data = [norm, normBoot, logn, lognBoot, expo, expoBoot, gumb, gumbBoot,
35+
tria, triaBoot]
36+
37+
fig = plt.figure(figsize=(10,6))
38+
fig.canvas.set_window_title('A Boxplot Example')
39+
ax1 = fig.add_subplot(111)
40+
plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)
41+
42+
bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
43+
plt.setp(bp['boxes'], color='black')
44+
plt.setp(bp['whiskers'], color='black')
45+
plt.setp(bp['fliers'], color='red', marker='+')
46+
47+
# Add a horizontal grid to the plot, but make it very light in color
48+
# so we can use it for reading data values but not be distracting
49+
ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
50+
alpha=0.5)
51+
52+
# Hide these grid behind plot objects
53+
ax1.set_axisbelow(True)
54+
ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
55+
ax1.set_xlabel('Distribution')
56+
ax1.set_ylabel('Value')
57+
58+
# Now fill the boxes with desired colors
59+
boxColors = ['darkkhaki','royalblue']
60+
numBoxes = numDists*2
61+
medians = range(numBoxes)
62+
for i in range(numBoxes):
63+
box = bp['boxes'][i]
64+
boxX = []
65+
boxY = []
66+
for j in range(5):
67+
boxX.append(box.get_xdata()[j])
68+
boxY.append(box.get_ydata()[j])
69+
boxCoords = zip(boxX,boxY)
70+
# Alternate between Dark Khaki and Royal Blue
71+
k = i % 2
72+
boxPolygon = Polygon(boxCoords, facecolor=boxColors[k])
73+
ax1.add_patch(boxPolygon)
74+
# Now draw the median lines back over what we just filled in
75+
med = bp['medians'][i]
76+
medianX = []
77+
medianY = []
78+
for j in range(2):
79+
medianX.append(med.get_xdata()[j])
80+
medianY.append(med.get_ydata()[j])
81+
plt.plot(medianX, medianY, 'k')
82+
medians[i] = medianY[0]
83+
# Finally, overplot the sample averages, with horixzontal alignment
84+
# in the center of each box
85+
plt.plot([np.average(med.get_xdata())], [np.average(data[i])],
86+
color='w', marker='*', markeredgecolor='k')
87+
88+
# Set the axes ranges and axes labels
89+
ax1.set_xlim(0.5, numBoxes+0.5)
90+
top = 40
91+
bottom = -5
92+
ax1.set_ylim(bottom, top)
93+
xtickNames = plt.setp(ax1, xticklabels=np.repeat(randomDists, 2))
94+
plt.setp(xtickNames, rotation=45, fontsize=8)
95+
96+
# Due to the Y-axis scale being different across samples, it can be
97+
# hard to compare differences in medians across the samples. Add upper
98+
# X-axis tick labels with the sample medians to aid in comparison
99+
# (just use two decimal places of precision)
100+
pos = np.arange(numBoxes)+1
101+
upperLabels = [str(np.round(s, 2)) for s in medians]
102+
weights = ['bold', 'semibold']
103+
for tick,label in zip(range(numBoxes),ax1.get_xticklabels()):
104+
k = tick % 2
105+
ax1.text(pos[tick], top-(top*0.05), upperLabels[tick],
106+
horizontalalignment='center', size='x-small', weight=weights[k],
107+
color=boxColors[k])
108+
109+
# Finally, add a basic legend
110+
plt.figtext(0.80, 0.08, str(N) + ' Random Numbers' ,
111+
backgroundcolor=boxColors[0], color='black', weight='roman',
112+
size='x-small')
113+
plt.figtext(0.80, 0.045, 'IID Bootstrap Resample',
114+
backgroundcolor=boxColors[1],
115+
color='white', weight='roman', size='x-small')
116+
plt.figtext(0.80, 0.015, '*', color='white', backgroundcolor='silver',
117+
weight='roman', size='medium')
118+
plt.figtext(0.815, 0.013, ' Average Value', color='black', weight='roman',
119+
size='x-small')
120+
121+
plt.show()

0 commit comments

Comments
 (0)