@@ -6739,8 +6739,8 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6739
6739
6740
6740
Make a violin plot for each column of *dataset* or each vector in
6741
6741
sequence *dataset*. Each filled area extends to represent the
6742
- entire data range, with three lines at the mean, the minimum, and
6743
- the maximum.
6742
+ entire data range, with optional lines at the mean, the median,
6743
+ the minimum, and the maximum.
6744
6744
6745
6745
Parameters
6746
6746
----------
@@ -6778,7 +6778,7 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6778
6778
The method used to calculate the estimator bandwidth. This can be
6779
6779
'scott', 'silverman', a scalar constant or a callable. If a
6780
6780
scalar, this will be used directly as `kde.factor`. If a
6781
- callable, it should take a `GaussianKDE` instance as only
6781
+ callable, it should take a `GaussianKDE` instance as its only
6782
6782
parameter and return a scalar. If None (default), 'scott' is used.
6783
6783
6784
6784
Returns
@@ -6806,6 +6806,91 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6806
6806
6807
6807
"""
6808
6808
6809
+ def _kde_method (X , coords ):
6810
+ kde = mlab .GaussianKDE (X , bw_method )
6811
+ return kde .evaluate (coords )
6812
+
6813
+ vpstats = cbook .violin_stats (dataset , _kde_method , points = points )
6814
+ return self .violin (vpstats , positions = positions , vert = vert ,
6815
+ widths = widths , showmeans = showmeans ,
6816
+ showextrema = showextrema , showmedians = showmedians )
6817
+
6818
+ def violin (self , vpstats , positions = None , vert = True , widths = 0.5 ,
6819
+ showmeans = False , showextrema = True , showmedians = False ):
6820
+ """
6821
+ Drawing function for violin plots.
6822
+
6823
+ Call signature::
6824
+
6825
+ violin(vpstats, positions=None, vert=True, widths=0.5,
6826
+ showmeans=False, showextrema=True, showmedians=False):
6827
+
6828
+ Draw a violin plot for each column of `vpstats`. Each filled area
6829
+ extends to represent the entire data range, with optional lines at the
6830
+ mean, the median, the minimum, and the maximum.
6831
+
6832
+ Parameters
6833
+ ----------
6834
+
6835
+ vpstats : list of dicts
6836
+ A list of dictionaries containing stats for each violin plot.
6837
+ Required keys are:
6838
+ - coords: A list of scalars containing the coordinates that
6839
+ the violin's kernel density estimate were evaluated at.
6840
+ - vals: A list of scalars containing the values of the kernel
6841
+ density estimate at each of the coordinates given in `coords`.
6842
+ - mean: The mean value for this violin's dataset.
6843
+ - median: The median value for this violin's dataset.
6844
+ - min: The minimum value for this violin's dataset.
6845
+ - max: The maximum value for this violin's dataset.
6846
+
6847
+ positions : array-like, default = [1, 2, ..., n]
6848
+ Sets the positions of the violins. The ticks and limits are
6849
+ automatically set to match the positions.
6850
+
6851
+ vert : bool, default = True.
6852
+ If true, plots the violins veritcally.
6853
+ Otherwise, plots the violins horizontally.
6854
+
6855
+ widths : array-like, default = 0.5
6856
+ Either a scalar or a vector that sets the maximal width of
6857
+ each violin. The default is 0.5, which uses about half of the
6858
+ available horizontal space.
6859
+
6860
+ showmeans : bool, default = False
6861
+ If true, will toggle rendering of the means.
6862
+
6863
+ showextrema : bool, default = True
6864
+ If true, will toggle rendering of the extrema.
6865
+
6866
+ showmedians : bool, default = False
6867
+ If true, will toggle rendering of the medians.
6868
+
6869
+ Returns
6870
+ -------
6871
+
6872
+ A dictionary mapping each component of the violinplot to a list of the
6873
+ corresponding collection instances created. The dictionary has
6874
+ the following keys:
6875
+
6876
+ - bodies: A list of the
6877
+ :class:`matplotlib.collections.PolyCollection` instances
6878
+ containing the filled area of each violin.
6879
+ - means: A :class:`matplotlib.collections.LineCollection` instance
6880
+ created to identify the mean values of each of the violin's
6881
+ distribution.
6882
+ - mins: A :class:`matplotlib.collections.LineCollection` instance
6883
+ created to identify the bottom of each violin's distribution.
6884
+ - maxes: A :class:`matplotlib.collections.LineCollection` instance
6885
+ created to identify the top of each violin's distribution.
6886
+ - bars: A :class:`matplotlib.collections.LineCollection` instance
6887
+ created to identify the centers of each violin's distribution.
6888
+ - medians: A :class:`matplotlib.collections.LineCollection`
6889
+ instance created to identify the median values of each of the
6890
+ violin's distribution.
6891
+
6892
+ """
6893
+
6809
6894
# Statistical quantities to be plotted on the violins
6810
6895
means = []
6811
6896
mins = []
@@ -6822,22 +6907,23 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6822
6907
'cmedians' : None
6823
6908
}
6824
6909
6910
+ N = len (vpstats )
6825
6911
datashape_message = ("List of violinplot statistics and `{0}` "
6826
6912
"values must have the same length" )
6827
6913
6828
6914
# Validate positions
6829
6915
if positions is None :
6830
- positions = range (1 , len ( dataset ) + 1 )
6831
- elif len (positions ) != len ( dataset ) :
6916
+ positions = range (1 , N + 1 )
6917
+ elif len (positions ) != N :
6832
6918
raise ValueError (datashape_message .format ("positions" ))
6833
6919
6834
6920
# Validate widths
6835
6921
if np .isscalar (widths ):
6836
- widths = [widths ] * len ( dataset )
6837
- elif len (widths ) != len ( dataset ) :
6922
+ widths = [widths ] * N
6923
+ elif len (widths ) != N :
6838
6924
raise ValueError (datashape_message .format ("widths" ))
6839
6925
6840
- # Calculate mins and maxes for statistics lines
6926
+ # Calculate ranges for statistics lines
6841
6927
pmins = - 0.25 * np .array (widths ) + positions
6842
6928
pmaxes = 0.25 * np .array (widths ) + positions
6843
6929
@@ -6857,33 +6943,20 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
6857
6943
blines = self .hlines
6858
6944
6859
6945
# Render violins
6860
- for data , pos , width in zip (dataset , positions , widths ):
6861
- # Calculate the kernel density
6862
- kde = mlab .GaussianKDE (data , bw_method )
6863
- min_val = kde .dataset .min ()
6864
- max_val = kde .dataset .max ()
6865
- mean = np .mean (kde .dataset )
6866
- median = np .median (kde .dataset )
6867
- coords = np .linspace (min_val , max_val , points )
6868
-
6869
- vals = kde .evaluate (coords )
6870
-
6871
- # Since each data point p is plotted from v-p to v+p,
6872
- # we need to scale it by an additional 0.5 factor so that we get
6873
- # correct width in the end.
6874
- vals = 0.5 * width * vals / vals .max ()
6875
-
6876
- # create the violin bodies
6877
- artists ['bodies' ] += [fill (coords ,
6946
+ for stats , pos , width in zip (vpstats , positions , widths ):
6947
+ # The 0.5 factor reflects the fact that we plot from v-p to
6948
+ # v+p
6949
+ vals = np .array (stats ['vals' ])
6950
+ vals = 0.5 * width * vals / vals .max ()
6951
+ artists ['bodies' ] += [fill (stats ['coords' ],
6878
6952
- vals + pos ,
6879
6953
vals + pos ,
6880
6954
facecolor = 'y' ,
6881
6955
alpha = 0.3 )]
6882
-
6883
- means .append (mean )
6884
- mins .append (min_val )
6885
- maxes .append (max_val )
6886
- medians .append (median )
6956
+ means .append (stats ['mean' ])
6957
+ mins .append (stats ['min' ])
6958
+ maxes .append (stats ['max' ])
6959
+ medians .append (stats ['median' ])
6887
6960
6888
6961
# Render means
6889
6962
if showmeans :
0 commit comments