Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 687286a

Browse files
committed
Merge pull request #2996 from solvents/master
Violin Plots
2 parents 4b1bd63 + 01c3176 commit 687286a

25 files changed

+974
-27
lines changed

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
interpolation = 'none' and interpolation = 'nearest' in
3131
`imshow()` when saving vector graphics files.
3232

33+
2014-04-22 Added violin plotting functions. See `Axes.violinplot`,
34+
`Axes.violin`, `cbook.violin_stats` and `mlab.GaussianKDE` for
35+
details.
36+
3337
2014-04-10 Fixed the triangular marker rendering error. The "Up" triangle was
3438
rendered instead of "Right" triangle and vice-versa.
3539

boilerplate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def boilerplate_gen():
146146
'tricontourf',
147147
'tripcolor',
148148
'triplot',
149+
'violinplot',
149150
'vlines',
150151
'xcorr',
151152
'barbs',

doc/users/whats_new.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,27 @@ Added the Axes method :meth:`~matplotlib.axes.Axes.add_image` to put image
172172
handling on a par with artists, collections, containers, lines, patches,
173173
and tables.
174174

175+
Violin Plots
176+
````````````
177+
Per Parker, Gregory Kelsie, Adam Ortiz, Kevin Chan, Geoffrey Lee, Deokjae
178+
Donald Seo, and Taesu Terry Lim added a basic implementation for violin
179+
plots. Violin plots can be used to represent the distribution of sample data.
180+
They are similar to box plots, but use a kernel density estimation function to
181+
present a smooth approximation of the data sample used. The added features are:
182+
183+
:func:`~matplotlib.Axes.violin` - Renders a violin plot from a collection of
184+
statistics.
185+
:func:`~matplotlib.cbook.violin_stats` - Produces a collection of statistics
186+
suitable for rendering a violin plot.
187+
:func:`~matplotlib.pyplot.violinplot` - Creates a violin plot from a set of
188+
sample data. This method makes use of :func:`~matplotlib.cbook.violin_stats`
189+
to process the input data, and :func:`~matplotlib.cbook.violin_stats` to
190+
do the actual rendering. Users are also free to modify or replace the output of
191+
:func:`~matplotlib.cbook.violin_stats` in order to customize the violin plots
192+
to their liking.
193+
194+
This feature was implemented for a software engineering course at the
195+
University of Toronto, Scarborough, run in Winter 2014 by Anya Tafliovich.
175196

176197
More `markevery` options to show only a subset of markers
177198
`````````````````````````````````````````````````````````
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Demo of the new violinplot functionality
3+
"""
4+
5+
import random
6+
import numpy as np
7+
import matplotlib.pyplot as plt
8+
9+
# fake data
10+
fs = 10 # fontsize
11+
pos = [1,2,4,5,7,8]
12+
data = [np.random.normal(size=100) for i in pos]
13+
14+
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6,6))
15+
16+
axes[0, 0].violinplot(data, pos, points=20, widths=0.1,
17+
showmeans=True, showextrema=True, showmedians=True)
18+
axes[0, 0].set_title('Custom violinplot 1', fontsize=fs)
19+
20+
axes[0, 1].violinplot(data, pos, points=40, widths=0.3,
21+
showmeans=True, showextrema=True, showmedians=True,
22+
bw_method='silverman')
23+
axes[0, 1].set_title('Custom violinplot 2', fontsize=fs)
24+
25+
axes[0, 2].violinplot(data, pos, points=60, widths=0.5, showmeans=True,
26+
showextrema=True, showmedians=True, bw_method=0.5)
27+
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)
28+
29+
axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
30+
showmeans=True, showextrema=True, showmedians=True)
31+
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)
32+
33+
axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
34+
showmeans=True, showextrema=True, showmedians=True,
35+
bw_method='silverman')
36+
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)
37+
38+
axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
39+
showmeans=True, showextrema=True, showmedians=True,
40+
bw_method=0.5)
41+
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)
42+
43+
for ax in axes.flatten():
44+
ax.set_yticklabels([])
45+
46+
fig.suptitle("Violin Plotting Examples")
47+
fig.subplots_adjust(hspace=0.4)
48+
plt.show()

lib/matplotlib/axes/_axes.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6725,6 +6725,248 @@ def matshow(self, Z, **kwargs):
67256725
integer=True))
67266726
return im
67276727

6728+
def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6729+
showmeans=False, showextrema=True, showmedians=False,
6730+
points=100, bw_method=None):
6731+
"""
6732+
Make a violin plot.
6733+
6734+
Call signature::
6735+
6736+
violinplot(dataset, positions=None, vert=True, widths=0.5,
6737+
showmeans=False, showextrema=True, showmedians=False,
6738+
points=100, bw_method=None):
6739+
6740+
Make a violin plot for each column of *dataset* or each vector in
6741+
sequence *dataset*. Each filled area extends to represent the
6742+
entire data range, with optional lines at the mean, the median,
6743+
the minimum, and the maximum.
6744+
6745+
Parameters
6746+
----------
6747+
6748+
dataset : Array or a sequence of vectors.
6749+
The input data.
6750+
6751+
positions : array-like, default = [1, 2, ..., n]
6752+
Sets the positions of the violins. The ticks and limits are
6753+
automatically set to match the positions.
6754+
6755+
vert : bool, default = True.
6756+
If true, creates a vertical violin plot.
6757+
Otherwise, creates a horizontal violin plot.
6758+
6759+
widths : array-like, default = 0.5
6760+
Either a scalar or a vector that sets the maximal width of
6761+
each violin. The default is 0.5, which uses about half of the
6762+
available horizontal space.
6763+
6764+
showmeans : bool, default = False
6765+
If true, will toggle rendering of the means.
6766+
6767+
showextrema : bool, default = True
6768+
If true, will toggle rendering of the extrema.
6769+
6770+
showmedians : bool, default = False
6771+
If true, will toggle rendering of the medians.
6772+
6773+
points : scalar, default = 100
6774+
Defines the number of points to evaluate each of the gaussian
6775+
kernel density estimations at.
6776+
6777+
bw_method : str, scalar or callable, optional
6778+
The method used to calculate the estimator bandwidth. This can be
6779+
'scott', 'silverman', a scalar constant or a callable. If a
6780+
scalar, this will be used directly as `kde.factor`. If a
6781+
callable, it should take a `GaussianKDE` instance as its only
6782+
parameter and return a scalar. If None (default), 'scott' is used.
6783+
6784+
Returns
6785+
-------
6786+
6787+
A dictionary mapping each component of the violinplot to a list of the
6788+
corresponding collection instances created. The dictionary has
6789+
the following keys:
6790+
6791+
- bodies: A list of the
6792+
:class:`matplotlib.collections.PolyCollection` instances
6793+
containing the filled area of each violin.
6794+
- means: A :class:`matplotlib.collections.LineCollection` instance
6795+
created to identify the mean values of each of the violin's
6796+
distribution.
6797+
- mins: A :class:`matplotlib.collections.LineCollection` instance
6798+
created to identify the bottom of each violin's distribution.
6799+
- maxes: A :class:`matplotlib.collections.LineCollection` instance
6800+
created to identify the top of each violin's distribution.
6801+
- bars: A :class:`matplotlib.collections.LineCollection` instance
6802+
created to identify the centers of each violin's distribution.
6803+
- medians: A :class:`matplotlib.collections.LineCollection`
6804+
instance created to identify the median values of each of the
6805+
violin's distribution.
6806+
6807+
"""
6808+
6809+
def _kde_method(X, coords):
6810+
kde = mlab.GaussianKDE(X, bw_method)
6811+
return kde.evaluate(coords)
6812+
6813+
vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
6814+
return self.violin(vpstats, positions=positions, vert=vert,
6815+
widths=widths, showmeans=showmeans,
6816+
showextrema=showextrema, showmedians=showmedians)
6817+
6818+
def violin(self, vpstats, positions=None, vert=True, widths=0.5,
6819+
showmeans=False, showextrema=True, showmedians=False):
6820+
"""
6821+
Drawing function for violin plots.
6822+
6823+
Call signature::
6824+
6825+
violin(vpstats, positions=None, vert=True, widths=0.5,
6826+
showmeans=False, showextrema=True, showmedians=False):
6827+
6828+
Draw a violin plot for each column of `vpstats`. Each filled area
6829+
extends to represent the entire data range, with optional lines at the
6830+
mean, the median, the minimum, and the maximum.
6831+
6832+
Parameters
6833+
----------
6834+
6835+
vpstats : list of dicts
6836+
A list of dictionaries containing stats for each violin plot.
6837+
Required keys are:
6838+
- coords: A list of scalars containing the coordinates that
6839+
the violin's kernel density estimate were evaluated at.
6840+
- vals: A list of scalars containing the values of the kernel
6841+
density estimate at each of the coordinates given in `coords`.
6842+
- mean: The mean value for this violin's dataset.
6843+
- median: The median value for this violin's dataset.
6844+
- min: The minimum value for this violin's dataset.
6845+
- max: The maximum value for this violin's dataset.
6846+
6847+
positions : array-like, default = [1, 2, ..., n]
6848+
Sets the positions of the violins. The ticks and limits are
6849+
automatically set to match the positions.
6850+
6851+
vert : bool, default = True.
6852+
If true, plots the violins veritcally.
6853+
Otherwise, plots the violins horizontally.
6854+
6855+
widths : array-like, default = 0.5
6856+
Either a scalar or a vector that sets the maximal width of
6857+
each violin. The default is 0.5, which uses about half of the
6858+
available horizontal space.
6859+
6860+
showmeans : bool, default = False
6861+
If true, will toggle rendering of the means.
6862+
6863+
showextrema : bool, default = True
6864+
If true, will toggle rendering of the extrema.
6865+
6866+
showmedians : bool, default = False
6867+
If true, will toggle rendering of the medians.
6868+
6869+
Returns
6870+
-------
6871+
6872+
A dictionary mapping each component of the violinplot to a list of the
6873+
corresponding collection instances created. The dictionary has
6874+
the following keys:
6875+
6876+
- bodies: A list of the
6877+
:class:`matplotlib.collections.PolyCollection` instances
6878+
containing the filled area of each violin.
6879+
- means: A :class:`matplotlib.collections.LineCollection` instance
6880+
created to identify the mean values of each of the violin's
6881+
distribution.
6882+
- mins: A :class:`matplotlib.collections.LineCollection` instance
6883+
created to identify the bottom of each violin's distribution.
6884+
- maxes: A :class:`matplotlib.collections.LineCollection` instance
6885+
created to identify the top of each violin's distribution.
6886+
- bars: A :class:`matplotlib.collections.LineCollection` instance
6887+
created to identify the centers of each violin's distribution.
6888+
- medians: A :class:`matplotlib.collections.LineCollection`
6889+
instance created to identify the median values of each of the
6890+
violin's distribution.
6891+
6892+
"""
6893+
6894+
# Statistical quantities to be plotted on the violins
6895+
means = []
6896+
mins = []
6897+
maxes = []
6898+
medians = []
6899+
6900+
# Collections to be returned
6901+
artists = {}
6902+
6903+
N = len(vpstats)
6904+
datashape_message = ("List of violinplot statistics and `{0}` "
6905+
"values must have the same length")
6906+
6907+
# Validate positions
6908+
if positions is None:
6909+
positions = range(1, N + 1)
6910+
elif len(positions) != N:
6911+
raise ValueError(datashape_message.format("positions"))
6912+
6913+
# Validate widths
6914+
if np.isscalar(widths):
6915+
widths = [widths] * N
6916+
elif len(widths) != N:
6917+
raise ValueError(datashape_message.format("widths"))
6918+
6919+
# Calculate ranges for statistics lines
6920+
pmins = -0.25 * np.array(widths) + positions
6921+
pmaxes = 0.25 * np.array(widths) + positions
6922+
6923+
# Check whether we are rendering vertically or horizontally
6924+
if vert:
6925+
fill = self.fill_betweenx
6926+
perp_lines = self.hlines
6927+
par_lines = self.vlines
6928+
else:
6929+
fill = self.fill_between
6930+
perp_lines = self.vlines
6931+
par_lines = self.hlines
6932+
6933+
# Render violins
6934+
bodies = []
6935+
for stats, pos, width in zip(vpstats, positions, widths):
6936+
# The 0.5 factor reflects the fact that we plot from v-p to
6937+
# v+p
6938+
vals = np.array(stats['vals'])
6939+
vals = 0.5 * width * vals / vals.max()
6940+
bodies += [fill(stats['coords'],
6941+
-vals + pos,
6942+
vals + pos,
6943+
facecolor='y',
6944+
alpha=0.3)]
6945+
means.append(stats['mean'])
6946+
mins.append(stats['min'])
6947+
maxes.append(stats['max'])
6948+
medians.append(stats['median'])
6949+
artists['bodies'] = bodies
6950+
6951+
# Render means
6952+
if showmeans:
6953+
artists['cmeans'] = perp_lines(means, pmins, pmaxes, colors='r')
6954+
6955+
# Render extrema
6956+
if showextrema:
6957+
artists['cmaxes'] = perp_lines(maxes, pmins, pmaxes, colors='r')
6958+
artists['cmins'] = perp_lines(mins, pmins, pmaxes, colors='r')
6959+
artists['cbars'] = par_lines(positions, mins, maxes, colors='r')
6960+
6961+
# Render medians
6962+
if showmedians:
6963+
artists['cmedians'] = perp_lines(medians,
6964+
pmins,
6965+
pmaxes,
6966+
colors='r')
6967+
6968+
return artists
6969+
67286970
def tricontour(self, *args, **kwargs):
67296971
return mtri.tricontour(self, *args, **kwargs)
67306972
tricontour.__doc__ = mtri.TriContourSet.tricontour_doc

0 commit comments

Comments
 (0)