From 08db92d1ed16f61148323e2656168611880f0de9 Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 00:36:24 +0530 Subject: [PATCH 1/9] Updated example as per suggestions in issue #7251 --- examples/statistics/customized_violin_demo.py | 107 ++++++++---------- 1 file changed, 47 insertions(+), 60 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 29dfda7b894e..dd0ceff1aeae 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -18,71 +18,44 @@ import numpy as np -# functions to calculate percentiles and adjacent values -def percentile(vals, p): - N = len(vals) - n = p*(N+1) - k = int(n) - d = n-k - if k <= 0: - return vals[0] - if k >= N: - return vals[N-1] - return vals[k-1] + d*(vals[k] - vals[k-1]) - - def adjacent_values(vals): - q1 = percentile(vals, 0.25) - q3 = percentile(vals, 0.75) - iqr = q3 - q1 # inter-quartile range - + q1, q3 = np.percentile(vals, [25, 75]) + # inter-quartile range iqr + iqr = q3 - q1 # upper adjacent values uav = q3 + iqr * 1.5 - if uav > vals[-1]: - uav = vals[-1] - if uav < q3: - uav = q3 - + uav = np.clip(uav, q3, vals[-1]) # lower adjacent values lav = q1 - iqr * 1.5 - if lav < vals[0]: - lav = vals[0] - if lav > q1: - lav = q1 + lav = np.clip(lav, q1, vals[0]) return [lav, uav] +def set_axis_style(ax, labels): + ax.get_xaxis().set_tick_params(direction='out') + ax.xaxis.set_ticks_position('bottom') + ax.set_xticks(np.arange(1, len(labels) + 1)) + ax.set_xticklabels(labels) + ax.set_xlim(0.25, len(labels) + 0.75) + ax.set_xlabel('Sample name') + + # create test data np.random.seed(123) -dat = [np.random.normal(0, std, 100) for std in range(1, 5)] -lab = ['A', 'B', 'C', 'D'] # labels -med = [] # medians -iqr = [] # inter-quantile ranges -avs = [] # upper and lower adjacent values -for arr in dat: - sarr = sorted(arr) - med.append(percentile(sarr, 0.5)) - iqr.append([percentile(sarr, 0.25), percentile(sarr, 0.75)]) - avs.append(adjacent_values(sarr)) - -# plot the violins -fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), - sharey=True) -_ = ax1.violinplot(dat) -parts = ax2.violinplot(dat, showmeans=False, showmedians=False, - showextrema=False) +dat = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] + +fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) +# plot the default violin ax1.set_title('Default violin plot') -ax2.set_title('Customized violin plot') +ax1.set_ylabel('Observed values') +ax1.violinplot(dat) -# plot whiskers as thin lines, quartiles as fat lines, -# and medians as points -for i in range(len(med)): - # whiskers - ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) - ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) - ax2.plot(i + 1, med[i], 'o', color='white', - markersize=6, markeredgecolor='none') +# customized violin +ax2.set_title('Customized violin plot') +parts = ax2.violinplot( + dat, showmeans=False, showmedians=False, + showextrema=False) # customize colors for pc in parts['bodies']: @@ -90,15 +63,29 @@ def adjacent_values(vals): pc.set_edgecolor('black') pc.set_alpha(1) -ax1.set_ylabel('Observed values') +# medians +med = [np.percentile(sarr, 50) for sarr in dat] +# inter-quartile ranges +iqr = [[np.percentile(sarr, 25), np.percentile(sarr, 75)] for sarr in dat] +# upper and lower adjacent values +avs = [adjacent_values(sarr) for sarr in dat] + +# plot whiskers as thin lines, quartiles as fat lines, +# and medians as points +for i, median in enumerate(med): + # whiskers + ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) + # quartiles + ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) + # medians + ax2.plot( + i + 1, median, 'o', color='white', + markersize=6, markeredgecolor='none') + +# set style for the axes +labels = ['A', 'B', 'C', 'D'] # labels for ax in [ax1, ax2]: - ax.get_xaxis().set_tick_params(direction='out') - ax.xaxis.set_ticks_position('bottom') - ax.set_xticks(np.arange(1, len(lab) + 1)) - ax.set_xticklabels(lab) - ax.set_xlim(0.25, len(lab) + 0.75) - ax.set_xlabel('Sample name') + set_axis_style(ax, labels) plt.subplots_adjust(bottom=0.15, wspace=0.05) - plt.show() From becbfa45bcdd70a22a4116aa3162c2f0a494deee Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 01:51:28 +0530 Subject: [PATCH 2/9] Fixed ordering of parameters to np.clip() --- examples/statistics/customized_violin_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index dd0ceff1aeae..cc4c65c9c25b 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -27,7 +27,7 @@ def adjacent_values(vals): uav = np.clip(uav, q3, vals[-1]) # lower adjacent values lav = q1 - iqr * 1.5 - lav = np.clip(lav, q1, vals[0]) + lav = np.clip(lav, vals[0], q1) return [lav, uav] From 14cd6cfd72a1e2d30e8e87448ee73be29bc19c30 Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 12:24:50 +0530 Subject: [PATCH 3/9] Made variable names more meaningful --- examples/statistics/customized_violin_demo.py | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index cc4c65c9c25b..a3fa7145443c 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -20,15 +20,14 @@ def adjacent_values(vals): q1, q3 = np.percentile(vals, [25, 75]) - # inter-quartile range iqr - iqr = q3 - q1 - # upper adjacent values - uav = q3 + iqr * 1.5 - uav = np.clip(uav, q3, vals[-1]) - # lower adjacent values - lav = q1 - iqr * 1.5 - lav = np.clip(lav, vals[0], q1) - return [lav, uav] + inter_quartile_range = q3 - q1 + + upper_adjacent_value = q3 + inter_quartile_range * 1.5 + upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) + + lower_adjacent_value = q1 - inter_quartile_range * 1.5 + lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) + return [lower_adjacent_value, upper_adjacent_value] def set_axis_style(ax, labels): @@ -42,19 +41,19 @@ def set_axis_style(ax, labels): # create test data np.random.seed(123) -dat = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] +data = [sorted(np.random.normal(0, std, 100)) for std in range(1, 5)] fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) # plot the default violin ax1.set_title('Default violin plot') ax1.set_ylabel('Observed values') -ax1.violinplot(dat) +ax1.violinplot(data) # customized violin ax2.set_title('Customized violin plot') parts = ax2.violinplot( - dat, showmeans=False, showmedians=False, + data, showmeans=False, showmedians=False, showextrema=False) # customize colors @@ -63,21 +62,17 @@ def set_axis_style(ax, labels): pc.set_edgecolor('black') pc.set_alpha(1) -# medians -med = [np.percentile(sarr, 50) for sarr in dat] -# inter-quartile ranges -iqr = [[np.percentile(sarr, 25), np.percentile(sarr, 75)] for sarr in dat] -# upper and lower adjacent values -avs = [adjacent_values(sarr) for sarr in dat] +medians = np.percentile(data, 50, axis=1) +inter_quartile_ranges = list(zip(*(np.percentile(data, [25, 75], axis=1)))) +whiskers = [adjacent_values(sorted_array) for sorted_array in data] # plot whiskers as thin lines, quartiles as fat lines, # and medians as points -for i, median in enumerate(med): - # whiskers - ax2.plot([i + 1, i + 1], avs[i], '-', color='black', linewidth=1) - # quartiles - ax2.plot([i + 1, i + 1], iqr[i], '-', color='black', linewidth=5) - # medians +for i, median in enumerate(medians): + ax2.plot([i + 1, i + 1], whiskers[i], '-', color='black', linewidth=1) + ax2.plot( + [i + 1, i + 1], inter_quartile_ranges[i], '-', color='black', + linewidth=5) ax2.plot( i + 1, median, 'o', color='white', markersize=6, markeredgecolor='none') From 03946d410bae14ba2d294f84700d3207d39c3e06 Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 12:50:15 +0530 Subject: [PATCH 4/9] Replaced use of zip with call to transpose --- examples/statistics/customized_violin_demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index a3fa7145443c..6085b220bece 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -62,8 +62,9 @@ def set_axis_style(ax, labels): pc.set_edgecolor('black') pc.set_alpha(1) -medians = np.percentile(data, 50, axis=1) -inter_quartile_ranges = list(zip(*(np.percentile(data, [25, 75], axis=1)))) +tmp = (np.percentile(data, [25, 50, 75], axis=1)).T +medians = tmp[:, 1] +inter_quartile_ranges = tmp[:, [0, 2]] whiskers = [adjacent_values(sorted_array) for sorted_array in data] # plot whiskers as thin lines, quartiles as fat lines, From 8044e9d4e328f4ac5894ef0c48e6d47e937cd60f Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 21:57:30 +0530 Subject: [PATCH 5/9] Changed variable 'tmp' to 'quartiles' and slightly altered transpose calculation --- examples/statistics/customized_violin_demo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 6085b220bece..64e5fb0d7216 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -62,9 +62,9 @@ def set_axis_style(ax, labels): pc.set_edgecolor('black') pc.set_alpha(1) -tmp = (np.percentile(data, [25, 50, 75], axis=1)).T -medians = tmp[:, 1] -inter_quartile_ranges = tmp[:, [0, 2]] +quartiles = (np.percentile(data, [25, 50, 75], axis=1)) +medians = quartiles[1] +inter_quartile_ranges = quartiles[[0, 2]].T whiskers = [adjacent_values(sorted_array) for sorted_array in data] # plot whiskers as thin lines, quartiles as fat lines, From 88a8c2e98b916d4ada3bd392ab5c4d3ece1ec166 Mon Sep 17 00:00:00 2001 From: Arvind Date: Sun, 30 Oct 2016 23:07:32 +0530 Subject: [PATCH 6/9] Fixed recalculation of interquartile ranges --- examples/statistics/customized_violin_demo.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 64e5fb0d7216..77b6a26cc1dc 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -18,14 +18,11 @@ import numpy as np -def adjacent_values(vals): - q1, q3 = np.percentile(vals, [25, 75]) - inter_quartile_range = q3 - q1 - - upper_adjacent_value = q3 + inter_quartile_range * 1.5 +def adjacent_values(vals, q1, q3): + upper_adjacent_value = q3 + (q3 - q1) * 1.5 upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) - lower_adjacent_value = q1 - inter_quartile_range * 1.5 + lower_adjacent_value = q1 - (q3 - q1) * 1.5 lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) return [lower_adjacent_value, upper_adjacent_value] @@ -62,10 +59,11 @@ def set_axis_style(ax, labels): pc.set_edgecolor('black') pc.set_alpha(1) -quartiles = (np.percentile(data, [25, 50, 75], axis=1)) -medians = quartiles[1] -inter_quartile_ranges = quartiles[[0, 2]].T -whiskers = [adjacent_values(sorted_array) for sorted_array in data] +quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1) +inter_quartile_ranges = np.vstack([quartile1, quartile3]).T +whiskers = [ + adjacent_values(sorted_array, q1, q3) + for sorted_array, q1, q3 in zip(data, quartile1, quartile3)] # plot whiskers as thin lines, quartiles as fat lines, # and medians as points From 62c17168deced84ac824cf8854913c08c145fc8d Mon Sep 17 00:00:00 2001 From: Arvind Date: Mon, 31 Oct 2016 00:16:54 +0530 Subject: [PATCH 7/9] Replaced plot() calls with scatter() and vlines() calls --- examples/statistics/customized_violin_demo.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 77b6a26cc1dc..1b625502dc0d 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -62,19 +62,15 @@ def set_axis_style(ax, labels): quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1) inter_quartile_ranges = np.vstack([quartile1, quartile3]).T whiskers = [ - adjacent_values(sorted_array, q1, q3) + adjacent_values(sorted_array, q1, q3) for sorted_array, q1, q3 in zip(data, quartile1, quartile3)] - -# plot whiskers as thin lines, quartiles as fat lines, -# and medians as points -for i, median in enumerate(medians): - ax2.plot([i + 1, i + 1], whiskers[i], '-', color='black', linewidth=1) - ax2.plot( - [i + 1, i + 1], inter_quartile_ranges[i], '-', color='black', - linewidth=5) - ax2.plot( - i + 1, median, 'o', color='white', - markersize=6, markeredgecolor='none') +whiskersMin, whiskersMax = list(zip(*whiskers)) +# plot medians as points, +# whiskers as thin lines, quartiles as fat lines +inds = np.arange(1, len(medians) + 1) +ax2.scatter(inds, medians, marker='o', color='white', s=30, zorder=3) +ax2.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) +ax2.vlines(inds, whiskersMin, whiskersMax, color='k', linestyle='-', lw=1) # set style for the axes labels = ['A', 'B', 'C', 'D'] # labels From 9f4bbdd1297942e7eea2baea9bc9013ff15cedd7 Mon Sep 17 00:00:00 2001 From: Arvind Date: Mon, 31 Oct 2016 12:45:11 +0530 Subject: [PATCH 8/9] Removed unused variable and comments --- examples/statistics/customized_violin_demo.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 1b625502dc0d..29229e4380b8 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -24,7 +24,7 @@ def adjacent_values(vals, q1, q3): lower_adjacent_value = q1 - (q3 - q1) * 1.5 lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) - return [lower_adjacent_value, upper_adjacent_value] + return lower_adjacent_value, upper_adjacent_value def set_axis_style(ax, labels): @@ -60,13 +60,11 @@ def set_axis_style(ax, labels): pc.set_alpha(1) quartile1, medians, quartile3 = np.percentile(data, [25, 50, 75], axis=1) -inter_quartile_ranges = np.vstack([quartile1, quartile3]).T -whiskers = [ +whiskers = np.array([ adjacent_values(sorted_array, q1, q3) - for sorted_array, q1, q3 in zip(data, quartile1, quartile3)] -whiskersMin, whiskersMax = list(zip(*whiskers)) -# plot medians as points, -# whiskers as thin lines, quartiles as fat lines + for sorted_array, q1, q3 in zip(data, quartile1, quartile3)]) +whiskersMin, whiskersMax = whiskers[:, 0], whiskers[:, 1] + inds = np.arange(1, len(medians) + 1) ax2.scatter(inds, medians, marker='o', color='white', s=30, zorder=3) ax2.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) From b3ce73bd0a65a7bbf705597364a6d9122e8e8cb4 Mon Sep 17 00:00:00 2001 From: Arvind Date: Mon, 31 Oct 2016 23:17:32 +0530 Subject: [PATCH 9/9] Removed unnecessary comments --- examples/statistics/customized_violin_demo.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/examples/statistics/customized_violin_demo.py b/examples/statistics/customized_violin_demo.py index 29229e4380b8..e37a034400d5 100644 --- a/examples/statistics/customized_violin_demo.py +++ b/examples/statistics/customized_violin_demo.py @@ -42,18 +42,15 @@ def set_axis_style(ax, labels): fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(9, 4), sharey=True) -# plot the default violin ax1.set_title('Default violin plot') ax1.set_ylabel('Observed values') ax1.violinplot(data) -# customized violin ax2.set_title('Customized violin plot') parts = ax2.violinplot( data, showmeans=False, showmedians=False, showextrema=False) -# customize colors for pc in parts['bodies']: pc.set_facecolor('#D43F3A') pc.set_edgecolor('black') @@ -71,7 +68,7 @@ def set_axis_style(ax, labels): ax2.vlines(inds, whiskersMin, whiskersMax, color='k', linestyle='-', lw=1) # set style for the axes -labels = ['A', 'B', 'C', 'D'] # labels +labels = ['A', 'B', 'C', 'D'] for ax in [ax1, ax2]: set_axis_style(ax, labels)