added two examples from Josh Hemann

jdh2358 · jdh2358 · commit 452dcc2c1b03 · 2009-08-03T17:19:42.000Z
svn path=/branches/v0_99_maint/; revision=7335
diff --git a/examples/pylab_examples/barchart_demo2.py b/examples/pylab_examples/barchart_demo2.py
@@ -0,0 +1,107 @@
+"""
+Thanks Josh Hemann for the example
+
+This examples comes from an application in which grade school gym
+teachers wanted to be able to show parents how their child did across
+a handful of fitness tests, and importantly, relative to how other
+children did. To extract the plotting code for demo purposes, we'll
+just make up some data for little Johnny Doe...
+
+"""
+import numpy as np
+import matplotlib.pyplot as plt
+import pylab
+from matplotlib.patches import Polygon
+from matplotlib.ticker import MaxNLocator
+
+
+
+student = 'Johnny Doe'
+grade = 2
+gender = 'boy'
+cohortSize = 62 #The number of other 2nd grade boys
+
+numTests = 5
+testNames = ['Pacer Test', 'Flexed Arm\n Hang', 'Mile Run', 'Agility',
+            'Push Ups']
+testMeta = ['laps', 'sec', 'min:sec', 'sec', '']
+scores = ['7', '48', '12:52', '17', '14']
+rankings = np.round(np.random.uniform(0, 1, numTests)*100, 0)
+
+fig = plt.figure(figsize=(9,7))
+ax1 = fig.add_subplot(111)
+plt.subplots_adjust(left=0.115, right=0.88)
+fig.canvas.set_window_title('Eldorado K-8 Fitness Chart')
+pos = np.arange(numTests)+0.5    #Center bars on the Y-axis ticks
+rects = ax1.barh(pos, rankings, align='center', height=0.5, color='m')
+
+ax1.axis([0,100,0,5])
+pylab.yticks(pos, testNames)
+ax1.set_title('Johnny Doe')
+plt.text(50, -0.5, 'Cohort Size: ' + str(cohortSize),
+        horizontalalignment='center', size='small')
+
+# Set the right-hand Y-axis ticks and labels and set X-axis tick marks at the
+# deciles
+ax2 = ax1.twinx()
+ax2.plot([100,100], [0, 5], 'white', alpha=0.1)
+ax2.xaxis.set_major_locator(MaxNLocator(11))
+xticks = pylab.setp(ax2, xticklabels=['0','10','20','30','40','50','60',
+'70',
+                                     '80','90','100'])
+ax2.xaxis.grid(True, linestyle='--', which='major', color='grey',
+alpha=0.25)
+#Plot a solid vertical gridline to highlight the median position
+plt.plot([50,50], [0, 5], 'grey', alpha=0.25)
+
+# Build up the score labels for the right Y-axis by first appending a carriage
+# return to each string and then tacking on the appropriate meta information
+# (i.e., 'laps' vs 'seconds'). We want the labels centered on the ticks, so if
+# there is no meta info (like for pushups) then don't add the carriage return to
+# the string
+
+def withnew(i, scr):
+    if testMeta[i] != '' : return '%s\n'%scr
+    else: return scr
+scoreLabels = [withnew(i, scr) for i,scr in enumerate(scores)]
+scoreLabels = [i+j for i,j in zip(scoreLabels, testMeta)]
+pylab.yticks(pos, scoreLabels)
+ax2.set_ylabel('Test Scores')
+#Make list of numerical suffixes corresponding to position in a list
+#           0     1     2     3     4     5     6     7     8     9
+suffixes =['th', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th']
+ax2.set_xlabel('Percentile Ranking Across ' + str(grade) + suffixes[grade] \
+              + ' Grade ' + gender.title() + 's')
+
+# Lastly, write in the ranking inside each bar to aid in interpretation
+for rect in rects:
+   # Rectangle widths are already integer-valued but are floating
+   # type, so it helps to remove the trailing decimal point and 0 by
+   # converting width to int type
+   width = int(rect.get_width())
+
+   # Figure out what the last digit (width modulo 10) so we can add
+   # the appropriate numerical suffix (e.g. 1st, 2nd, 3rd, etc)
+   lastDigit = width % 10
+   # Note that 11, 12, and 13 are special cases
+   if (width == 11) or (width == 12) or (width == 13):
+       suffix = 'th'
+   else:
+       suffix = suffixes[lastDigit]
+
+   rankStr = str(width) + suffix
+   if (width < 5): # The bars aren't wide enough to print the ranking inside
+       xloc = width + 1 # Shift the text to the right side of the right edge
+       clr = 'black' # Black against white background
+       align = 'left'
+   else:
+       xloc = 0.98*width # Shift the text to the left side of the right edge
+       clr = 'white' # White on magenta
+       align = 'right'
+
+   yloc = rect.get_y()+rect.get_height()/2.0 #Center the text vertically in the bar
+   ax1.text(xloc, yloc, rankStr, horizontalalignment=align,
+            verticalalignment='center', color=clr, weight='bold')
+
+plt.show()
+
diff --git a/examples/pylab_examples/boxplot_demo2.py b/examples/pylab_examples/boxplot_demo2.py
@@ -0,0 +1,121 @@
+"""
+Thanks Josh Hemann for the example
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from matplotlib.patches import Polygon
+
+
+# Generate some data from five different probability distributions,
+# each with different characteristics. We want to play with how an IID
+# bootstrap resample of the data preserves the distributional
+# properties of the original sample, and a boxplot is one visual tool
+# to make this assessment
+numDists = 5
+randomDists = ['Normal(1,1)',' Lognormal(1,1)', 'Exp(1)', 'Gumbel(6,4)',
+              'Triangular(2,9,11)']
+N = 500
+norm = np.random.normal(1,1, N)
+logn = np.random.lognormal(1,1, N)
+expo = np.random.exponential(1, N)
+gumb = np.random.gumbel(6, 4, N)
+tria = np.random.triangular(2, 9, 11, N)
+
+# Generate some random indices that we'll use to resample the original data
+# arrays. For code brevity, just use the same random indices for each array
+bootstrapIndices = np.random.random_integers(0, N-1, N)
+normBoot = norm[bootstrapIndices]
+expoBoot = expo[bootstrapIndices]
+gumbBoot = gumb[bootstrapIndices]
+lognBoot = logn[bootstrapIndices]
+triaBoot = tria[bootstrapIndices]
+
+data = [norm, normBoot,  logn, lognBoot, expo, expoBoot, gumb, gumbBoot,
+       tria, triaBoot]
+
+fig = plt.figure(figsize=(10,6))
+fig.canvas.set_window_title('A Boxplot Example')
+ax1 = fig.add_subplot(111)
+plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25)
+
+bp = plt.boxplot(data, notch=0, sym='+', vert=1, whis=1.5)
+plt.setp(bp['boxes'], color='black')
+plt.setp(bp['whiskers'], color='black')
+plt.setp(bp['fliers'], color='red', marker='+')
+
+# Add a horizontal grid to the plot, but make it very light in color
+# so we can use it for reading data values but not be distracting
+ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
+              alpha=0.5)
+
+# Hide these grid behind plot objects
+ax1.set_axisbelow(True)
+ax1.set_title('Comparison of IID Bootstrap Resampling Across Five Distributions')
+ax1.set_xlabel('Distribution')
+ax1.set_ylabel('Value')
+
+# Now fill the boxes with desired colors
+boxColors = ['darkkhaki','royalblue']
+numBoxes = numDists*2
+medians = range(numBoxes)
+for i in range(numBoxes):
+  box = bp['boxes'][i]
+  boxX = []
+  boxY = []
+  for j in range(5):
+      boxX.append(box.get_xdata()[j])
+      boxY.append(box.get_ydata()[j])
+  boxCoords = zip(boxX,boxY)
+  # Alternate between Dark Khaki and Royal Blue
+  k = i % 2
+  boxPolygon = Polygon(boxCoords, facecolor=boxColors[k])
+  ax1.add_patch(boxPolygon)
+  # Now draw the median lines back over what we just filled in
+  med = bp['medians'][i]
+  medianX = []
+  medianY = []
+  for j in range(2):
+      medianX.append(med.get_xdata()[j])
+      medianY.append(med.get_ydata()[j])
+      plt.plot(medianX, medianY, 'k')
+      medians[i] = medianY[0]
+  # Finally, overplot the sample averages, with horixzontal alignment
+  # in the center of each box
+  plt.plot([np.average(med.get_xdata())], [np.average(data[i])],
+           color='w', marker='*', markeredgecolor='k')
+
+# Set the axes ranges and axes labels
+ax1.set_xlim(0.5, numBoxes+0.5)
+top = 40
+bottom = -5
+ax1.set_ylim(bottom, top)
+xtickNames = plt.setp(ax1, xticklabels=np.repeat(randomDists, 2))
+plt.setp(xtickNames, rotation=45, fontsize=8)
+
+# Due to the Y-axis scale being different across samples, it can be
+# hard to compare differences in medians across the samples. Add upper
+# X-axis tick labels with the sample medians to aid in comparison
+# (just use two decimal places of precision)
+pos = np.arange(numBoxes)+1
+upperLabels = [str(np.round(s, 2)) for s in medians]
+weights = ['bold', 'semibold']
+for tick,label in zip(range(numBoxes),ax1.get_xticklabels()):
+   k = tick % 2
+   ax1.text(pos[tick], top-(top*0.05), upperLabels[tick],
+        horizontalalignment='center', size='x-small', weight=weights[k],
+        color=boxColors[k])
+
+# Finally, add a basic legend
+plt.figtext(0.80, 0.08,  str(N) + ' Random Numbers' ,
+           backgroundcolor=boxColors[0], color='black', weight='roman',
+           size='x-small')
+plt.figtext(0.80, 0.045, 'IID Bootstrap Resample',
+backgroundcolor=boxColors[1],
+           color='white', weight='roman', size='x-small')
+plt.figtext(0.80, 0.015, '*', color='white', backgroundcolor='silver',
+           weight='roman', size='medium')
+plt.figtext(0.815, 0.013, ' Average Value', color='black', weight='roman',
+           size='x-small')
+
+plt.show()