From 75632e27c7c38c934e752440321dade5f2d43d81 Mon Sep 17 00:00:00 2001 From: Dylan Dotti Date: Sun, 10 Dec 2017 18:21:55 -0500 Subject: [PATCH] added support for percentiles to violinplot --- lib/matplotlib/axes/_axes.py | 40 ++++++++++++++++++++++++++++---- lib/matplotlib/cbook/__init__.py | 15 ++++++++++-- lib/matplotlib/pyplot.py | 7 +++--- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 85dcc041d089..53d046f7fa30 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -7374,7 +7374,7 @@ def matshow(self, Z, **kwargs): @_preprocess_data(replace_names=["dataset"], label_namer=None) def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False, showextrema=True, showmedians=False, - points=100, bw_method=None): + percentiles=[], points=100, bw_method=None): """ Make a violin plot. @@ -7410,6 +7410,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmedians : bool, default = False If `True`, will toggle rendering of the medians. + percentiles : array-like, default = [] + Displays percentiles in the plot for all the given percentiles + in the set + points : scalar, default = 100 Defines the number of points to evaluate each of the gaussian kernel density estimations at. @@ -7457,6 +7461,11 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, :class:`matplotlib.collections.LineCollection` instance created to identify the median values of each of the violin's distribution. + + - ``cpercentiles`` : A + :class:`matplotlib.collections.LineCollection` instance + created to identify the percentile values of each of the + violins' distributions """ def _kde_method(X, coords): @@ -7466,7 +7475,8 @@ def _kde_method(X, coords): kde = mlab.GaussianKDE(X, bw_method) return kde.evaluate(coords) - vpstats = cbook.violin_stats(dataset, _kde_method, points=points) + vpstats = cbook.violin_stats(dataset, _kde_method, points=points, + percentiles=percentiles) return self.violin(vpstats, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, showmedians=showmedians) @@ -7501,6 +7511,8 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, - ``max``: The maximum value for this violin's dataset. + - ``percentiles``: The percentiles for this violin's dataset. + positions : array-like, default = [1, 2, ..., n] Sets the positions of the violins. The ticks and limits are automatically set to match the positions. @@ -7523,6 +7535,10 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, showmedians : bool, default = False If true, will toggle rendering of the medians. + percentiles : array-like, default = [] + Displays percentiles in the plot for all the given percentiles in the set. + + Returns ------- result : dict @@ -7558,6 +7574,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, :class:`matplotlib.collections.LineCollection` instance created to identify the median values of each of the violin's distribution. + + - ``cpercentiles``: A + :class:`matplotlib.collections.LineCollection` instance + created to identify the percentiles values of each of the + violin's distribution. """ @@ -7566,6 +7587,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins = [] maxes = [] medians = [] + percentiles = [] # Collections to be returned artists = {} @@ -7622,6 +7644,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins.append(stats['min']) maxes.append(stats['max']) medians.append(stats['median']) + percentiles.append(stats['percentiles']) artists['bodies'] = bodies # Render means @@ -7640,11 +7663,18 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, # Render medians if showmedians: - artists['cmedians'] = perp_lines(medians, - pmins, - pmaxes, + artists['cmedians'] = perp_lines(medians, pmins, pmaxes, colors=edgecolor) + # Render percentiles + artists['cpercentiles'] = [] + for pcs, pmin, pmax in zip(percentiles, pmins, pmaxes): + linelen = pmax - pmin + artists['cpercentiles'].append(perp_lines(pcs, + pmin + linelen * 0.17, + pmax - linelen * 0.17, + colors=edgecolor)) + return artists def tricontour(self, *args, **kwargs): diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index 0b6a4968b113..794c233c2f2e 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -2047,7 +2047,7 @@ def _reshape_2D(X, name): raise ValueError("{} must have 2 or fewer dimensions".format(name)) -def violin_stats(X, method, points=100): +def violin_stats(X, method, points=100, percentiles=[]): """ Returns a list of dictionaries of data which can be used to draw a series of violin plots. See the `Returns` section below to view the required keys @@ -2071,6 +2071,9 @@ def violin_stats(X, method, points=100): Defines the number of points to evaluate each of the gaussian kernel density estimates at. + percentiles : array-like, default = [] + Defines a set of percentiles, if any, that will be displayed per plotted point + Returns ------- @@ -2085,6 +2088,7 @@ def violin_stats(X, method, points=100): - median: The median value for this column of data. - min: The minimum value for this column of data. - max: The maximum value for this column of data. + - percentiles: The set of percentiles that will be rendered for this data. """ # List of dictionaries describing each of the violins. @@ -2093,7 +2097,13 @@ def violin_stats(X, method, points=100): # Want X to be a list of data sequences X = _reshape_2D(X, "X") - for x in X: + if percentiles is None: + percentiles = [] + percentiles = _reshape_2D(percentiles, "percentiles") + while len(percentiles) < len(X): + percentiles.append([]) + + for x, pcs in zip(X, percentiles): # Dictionary of results for this distribution stats = {} @@ -2111,6 +2121,7 @@ def violin_stats(X, method, points=100): stats['median'] = np.median(x) stats['min'] = min_val stats['max'] = max_val + stats['percentiles'] = np.percentile(x, pcs) # Append to output vpstats.append(stats) diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 668124817326..6c0c9bf3a892 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -3652,8 +3652,8 @@ def triplot(*args, **kwargs): # changes will be lost @_autogen_docstring(Axes.violinplot) def violinplot(dataset, positions=None, vert=True, widths=0.5, showmeans=False, - showextrema=True, showmedians=False, points=100, bw_method=None, - hold=None, data=None): + showextrema=True, showmedians=False, percentiles=[], points=100, + bw_method=None, hold=None, data=None): ax = gca() # Deprecated: allow callers to override the hold state # by passing hold=True|False @@ -3668,7 +3668,8 @@ def violinplot(dataset, positions=None, vert=True, widths=0.5, showmeans=False, ret = ax.violinplot(dataset, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, showmedians=showmedians, - points=points, bw_method=bw_method, data=data) + points=points, bw_method=bw_method, data=data, + percentiles=percentiles) finally: ax._hold = washold