diff --git a/examples/statistics/violinplot.py b/examples/statistics/violinplot.py index 19d80b494fba..5d26cc63966e 100644 --- a/examples/statistics/violinplot.py +++ b/examples/statistics/violinplot.py @@ -28,7 +28,7 @@ pos = [1, 2, 4, 5, 7, 8] data = [np.random.normal(0, std, size=100) for std in pos] -fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6)) +fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 6)) axes[0, 0].violinplot(data, pos, points=20, widths=0.3, showmeans=True, showextrema=True, showmedians=True) @@ -43,19 +43,42 @@ showextrema=True, showmedians=True, bw_method=0.5) axes[0, 2].set_title('Custom violinplot 3', fontsize=fs) +axes[0, 3].violinplot(data, pos, points=60, widths=0.7, showmeans=True, + showextrema=True, showmedians=True, bw_method=0.5, + quantiles=[[0.1], [], [], [0.175, 0.954], [0.75], + [0.25]]) +axes[0, 3].set_title('Custom violinplot 4', fontsize=fs) + +axes[0, 4].violinplot(data[-1:], pos[-1:], points=60, widths=0.7, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5) +axes[0, 4].set_title('Custom violinplot 5', fontsize=fs) + axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7, showmeans=True, showextrema=True, showmedians=True) -axes[1, 0].set_title('Custom violinplot 4', fontsize=fs) +axes[1, 0].set_title('Custom violinplot 6', fontsize=fs) axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9, showmeans=True, showextrema=True, showmedians=True, bw_method='silverman') -axes[1, 1].set_title('Custom violinplot 5', fontsize=fs) +axes[1, 1].set_title('Custom violinplot 7', fontsize=fs) axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) -axes[1, 2].set_title('Custom violinplot 6', fontsize=fs) +axes[1, 2].set_title('Custom violinplot 8', fontsize=fs) + +axes[1, 3].violinplot(data, pos, points=200, vert=False, widths=1.1, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[[0.1], [], [], [0.175, 0.954], [0.75], + [0.25]], + bw_method=0.5) +axes[1, 3].set_title('Custom violinplot 9', fontsize=fs) + +axes[1, 4].violinplot(data[-1:], pos[-1:], points=200, vert=False, widths=1.1, + showmeans=True, showextrema=True, showmedians=True, + quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5) +axes[1, 4].set_title('Custom violinplot 10', fontsize=fs) for ax in axes.flat: ax.set_yticklabels([]) diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py index 06d945b7cc8f..862c7b7b4eec 100644 --- a/lib/matplotlib/axes/_axes.py +++ b/lib/matplotlib/axes/_axes.py @@ -7880,14 +7880,14 @@ def matshow(self, Z, **kwargs): @_preprocess_data(replace_names=["dataset"]) def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False, showextrema=True, showmedians=False, - points=100, bw_method=None): + quantiles=None, points=100, bw_method=None): """ Make a violin plot. Make a violin plot for each column of *dataset* or each vector in sequence *dataset*. Each filled area extends to represent the entire data range, with optional lines at the mean, the median, - the minimum, and the maximum. + the minimum, the maximum, and user-specified quantiles. Parameters ---------- @@ -7916,6 +7916,11 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmedians : bool, default = False If `True`, will toggle rendering of the medians. + quantiles : array-like, default = None + If not None, set a list of floats in interval [0, 1] for each violin, + which stands for the quantiles that will be rendered for that + violin. + points : scalar, default = 100 Defines the number of points to evaluate each of the gaussian kernel density estimations at. @@ -7953,6 +7958,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, - ``cmedians``: A `~.collections.LineCollection` instance that marks the median values of each of the violin's distribution. + - ``cquantiles``: A `~.collections.LineCollection` instance created + to identify the quantile values of each of the violin's + distribution. + """ def _kde_method(X, coords): @@ -7962,7 +7971,8 @@ def _kde_method(X, coords): kde = mlab.GaussianKDE(X, bw_method) return kde.evaluate(coords) - vpstats = cbook.violin_stats(dataset, _kde_method, points=points) + vpstats = cbook.violin_stats(dataset, _kde_method, points=points, + quantiles=quantiles) return self.violin(vpstats, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, showmedians=showmedians) @@ -7973,7 +7983,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, Draw a violin plot for each column of `vpstats`. Each filled area extends to represent the entire data range, with optional lines at the - mean, the median, the minimum, and the maximum. + mean, the median, the minimum, the maximum, and the quantiles values. Parameters ---------- @@ -7997,6 +8007,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, - ``max``: The maximum value for this violin's dataset. + Optional keys are: + + - ``quantiles``: A list of scalars containing the quantile values + for this violin's dataset. + positions : array-like, default = [1, 2, ..., n] Sets the positions of the violins. The ticks and limits are automatically set to match the positions. @@ -8043,6 +8058,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, - ``cmedians``: A `~.collections.LineCollection` instance that marks the median values of each of the violin's distribution. + + - ``cquantiles``: A `~.collections.LineCollection` instance created + to identify the quantiles values of each of the violin's + distribution. + """ # Statistical quantities to be plotted on the violins @@ -8050,6 +8070,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins = [] maxes = [] medians = [] + quantiles = np.asarray([]) # Collections to be returned artists = {} @@ -8106,6 +8127,10 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, mins.append(stats['min']) maxes.append(stats['max']) medians.append(stats['median']) + q = stats.get('quantiles') + if q is not None: + # If exist key quantiles, assume it's a list of floats + quantiles = np.concatenate((quantiles, q)) artists['bodies'] = bodies # Render means @@ -8129,6 +8154,22 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5, pmaxes, colors=edgecolor) + # Render quantile values + if quantiles.size > 0: + # Recalculate ranges for statistics lines for quantiles. + # ppmins are the left end of quantiles lines + ppmins = np.asarray([]) + # pmaxes are the right end of quantiles lines + ppmaxs = np.asarray([]) + for stats, cmin, cmax in zip(vpstats, pmins, pmaxes): + q = stats.get('quantiles') + if q is not None: + ppmins = np.concatenate((ppmins, [cmin] * np.size(q))) + ppmaxs = np.concatenate((ppmaxs, [cmax] * np.size(q))) + # Start rendering + artists['cquantiles'] = perp_lines(quantiles, ppmins, ppmaxs, + colors=edgecolor) + return artists # Methods that are entirely implemented in other modules. diff --git a/lib/matplotlib/cbook/__init__.py b/lib/matplotlib/cbook/__init__.py index bbeb82b18f1a..3f539b569830 100644 --- a/lib/matplotlib/cbook/__init__.py +++ b/lib/matplotlib/cbook/__init__.py @@ -1431,7 +1431,7 @@ def _reshape_2D(X, name): raise ValueError("{} must have 2 or fewer dimensions".format(name)) -def violin_stats(X, method, points=100): +def violin_stats(X, method, points=100, quantiles=None): """ Returns a list of dictionaries of data which can be used to draw a series of violin plots. See the `Returns` section below to view the required keys @@ -1455,6 +1455,12 @@ def violin_stats(X, method, points=100): Defines the number of points to evaluate each of the gaussian kernel density estimates at. + quantiles : array-like, default = None + Defines (if not None) a list of floats in interval [0, 1] for each + column of data, which represents the quantiles that will be rendered + for that column of data. Must have 2 or fewer dimensions. 1D array will + be treated as a singleton list containing them. + Returns ------- @@ -1469,6 +1475,7 @@ def violin_stats(X, method, points=100): - median: The median value for this column of data. - min: The minimum value for this column of data. - max: The maximum value for this column of data. + - quantiles: The quantile values for this column of data. """ # List of dictionaries describing each of the violins. @@ -1477,13 +1484,27 @@ def violin_stats(X, method, points=100): # Want X to be a list of data sequences X = _reshape_2D(X, "X") - for x in X: + # Want quantiles to be as the same shape as data sequences + if quantiles is not None and len(quantiles) != 0: + quantiles = _reshape_2D(quantiles, "quantiles") + # Else, mock quantiles if is none or empty + else: + quantiles = [[]] * np.shape(X)[0] + + # quantiles should has the same size as dataset + if np.shape(X)[:1] != np.shape(quantiles)[:1]: + raise ValueError("List of violinplot statistics and quantiles values" + " must have the same length") + + # Zip x and quantiles + for (x, q) in zip(X, quantiles): # Dictionary of results for this distribution stats = {} # Calculate basic stats for the distribution min_val = np.min(x) max_val = np.max(x) + quantile_val = np.percentile(x, 100 * q) # Evaluate the kernel density estimate coords = np.linspace(min_val, max_val, points) @@ -1495,6 +1516,7 @@ def violin_stats(X, method, points=100): stats['median'] = np.median(x) stats['min'] = min_val stats['max'] = max_val + stats['quantiles'] = np.atleast_1d(quantile_val) # Append to output vpstats.append(stats) diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index 19476ca75f11..da8f59019867 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -2998,12 +2998,13 @@ def triplot(*args, **kwargs): def violinplot( dataset, positions=None, vert=True, widths=0.5, showmeans=False, showextrema=True, showmedians=False, - points=100, bw_method=None, *, data=None): + quantiles=None, points=100, bw_method=None, *, data=None): return gca().violinplot( dataset, positions=positions, vert=vert, widths=widths, showmeans=showmeans, showextrema=showextrema, - showmedians=showmedians, points=points, bw_method=bw_method, - **({"data": data} if data is not None else {})) + showmedians=showmedians, quantiles=quantiles, points=points, + bw_method=bw_method, **({"data": data} if data is not None + else {})) # Autogenerated by boilerplate.py. Do not edit as changes will be lost. diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png index 95c657bd81d5..b3e807c153d9 100644 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_horiz_showall.png differ diff --git a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png index cdda9b7e1c65..49383f98bc3e 100644 Binary files a/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png and b/lib/matplotlib/tests/baseline_images/test_axes/violinplot_vert_showall.png differ diff --git a/lib/matplotlib/tests/test_axes.py b/lib/matplotlib/tests/test_axes.py index fea961a201ee..210d470636c2 100644 --- a/lib/matplotlib/tests/test_axes.py +++ b/lib/matplotlib/tests/test_axes.py @@ -2661,7 +2661,8 @@ def test_vert_violinplot_showall(): np.random.seed(316624790) data = [np.random.normal(size=100) for i in range(4)] ax.violinplot(data, positions=range(4), showmeans=1, showextrema=1, - showmedians=1) + showmedians=1, + quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]]) @image_comparison(baseline_images=['violinplot_vert_custompoints_10'], @@ -2738,7 +2739,8 @@ def test_horiz_violinplot_showall(): np.random.seed(82762530) data = [np.random.normal(size=100) for i in range(4)] ax.violinplot(data, positions=range(4), vert=False, showmeans=1, - showextrema=1, showmedians=1) + showextrema=1, showmedians=1, + quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]]) @image_comparison(baseline_images=['violinplot_horiz_custompoints_10'], @@ -2781,6 +2783,48 @@ def test_violinplot_bad_widths(): ax.violinplot(data, positions=range(4), widths=[1, 2, 3]) +def test_violinplot_bad_quantiles(): + ax = plt.axes() + # First 9 digits of frac(sqrt(73)) + np.random.seed(544003745) + data = [np.random.normal(size=100)] + + # Different size quantile list and plots + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[0.1, 0.2], [0.5, 0.7]]) + + +def test_violinplot_outofrange_quantiles(): + ax = plt.axes() + # First 9 digits of frac(sqrt(79)) + np.random.seed(888194417) + data = [np.random.normal(size=100)] + + # Quantile value above 100 + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[0.1, 0.2, 0.3, 1.05]]) + + # Quantile value below 0 + with pytest.raises(ValueError): + ax.violinplot(data, quantiles=[[-0.05, 0.2, 0.3, 0.75]]) + + +@check_figures_equal(extensions=["png"]) +def test_violinplot_single_list_quantiles(fig_test, fig_ref): + # Ensures quantile list for 1D can be passed in as single list + # First 9 digits of frac(sqrt(83)) + np.random.seed(110433579) + data = [np.random.normal(size=100)] + + # Test image + ax = fig_test.subplots() + ax.violinplot(data, quantiles=[0.1, 0.3, 0.9]) + + # Reference image + ax = fig_ref.subplots() + ax.violinplot(data, quantiles=[[0.1, 0.3, 0.9]]) + + def test_manage_xticks(): _, ax = plt.subplots() ax.set_xlim(0, 4)