Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7cf8b35

Browse files
committed
ENH: cbook.boxplot_stats can now set the whiskers to -range- (min/max) or specified percentiles. If IQR is zero, min/max are used
1 parent 858964a commit 7cf8b35

File tree

2 files changed

+71
-11
lines changed

2 files changed

+71
-11
lines changed

lib/matplotlib/cbook.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,12 +1860,17 @@ def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None):
18601860
Data that will be represented in the boxplots. Should have 2 or fewer
18611861
dimensions.
18621862
1863-
whis : float (default = 1.5)
1864-
Determines the reach of the whiskers past the first and third
1865-
quartiles (e.g., Q3 + whis*IQR). Beyond the whiskers, data are
1866-
considers outliers and are plotted as individual points. Set
1867-
this to an unreasonably high value to force the whiskers to
1868-
show the min and max data. (IQR = interquartile range, Q3-Q1)
1863+
whis : float, string, or sequence (default = 1.5)
1864+
As a float, determines the reach of the whiskers past the first and
1865+
third quartiles (e.g., Q3 + whis*IQR). Beyond the whiskers, data are
1866+
considers outliers and are plotted as individual points. Set this
1867+
to an unreasonably high value to force the whiskers to show the min
1868+
and max data. (IQR = interquartile range, Q3-Q1). Alternatively, set
1869+
this to an ascending sequence of percentile (e.g., [5, 95]) to set
1870+
the whiskers at specific percentiles of the data. Finally, can be the
1871+
string 'range' to force the whiskers to the min and max of the data.
1872+
In the edge case that the 25th and 75th percentiles are equivalent,
1873+
`whis` will be automatically set to 'range'
18691874
18701875
bootstrap : int or None (default)
18711876
Number of times the confidence intervals around the median should
@@ -1968,22 +1973,38 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
19681973

19691974
# interquartile range
19701975
stats['iqr'] = stats['q3'] - stats['q1']
1976+
if stats['iqr'] == 0:
1977+
whis = 'range'
19711978

19721979
# conf. interval around median
19731980
stats['cilo'], stats['cihi'] = _compute_conf_interval(
19741981
x, stats['med'], stats['iqr'], bootstrap
19751982
)
19761983

1977-
# highest non-outliers
1978-
hival = stats['q3'] + whis * stats['iqr']
1984+
# lowest/highest non-outliers
1985+
if np.isscalar(whis):
1986+
if np.isreal(whis):
1987+
loval = stats['q1'] - whis * stats['iqr']
1988+
hival = stats['q3'] + whis * stats['iqr']
1989+
elif whis in ['range', 'limit', 'limits', 'min/max']:
1990+
loval = np.min(x)
1991+
hival = np.max(x)
1992+
else:
1993+
whismsg = 'whis must be a float, valid string, or '\
1994+
'list of percentiles'
1995+
raise ValueError(whismsg)
1996+
else:
1997+
loval = np.percentile(x, whis[0])
1998+
hival = np.percentile(x, whis[1])
1999+
2000+
# get high extreme
19792001
wiskhi = np.compress(x <= hival, x)
19802002
if len(wiskhi) == 0 or np.max(wiskhi) < stats['q3']:
19812003
stats['whishi'] = stats['q3']
19822004
else:
19832005
stats['whishi'] = max(wiskhi)
19842006

19852007
# get low extreme
1986-
loval = stats['q1'] - whis * stats['iqr']
19872008
wisklo = np.compress(x >= loval, x)
19882009
if len(wisklo) == 0 or np.min(wisklo) > stats['q1']:
19892010
stats['whislo'] = stats['q1']

lib/matplotlib/tests/test_cbook.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def setup(self):
101101
self.nrows = 37
102102
self.ncols = 4
103103
self.data = np.random.lognormal(size=(self.nrows, self.ncols),
104-
mean=1.5, sigma=1.75)
104+
mean=1.5, sigma=1.75)
105105
self.known_keys = sorted([
106106
'mean', 'med', 'q1', 'q3', 'iqr',
107107
'cilo', 'cihi', 'whislo', 'whishi',
@@ -140,6 +140,17 @@ def setup(self):
140140
'label': 'Test1'
141141
}
142142

143+
self.known_res_percentiles = {
144+
'whislo': 0.1933685896907924,
145+
'whishi': 42.232049135969874
146+
}
147+
148+
self.known_res_range = {
149+
'whislo': 0.042143774965502923,
150+
'whishi': 92.554670752188699
151+
152+
}
153+
143154
def test_form_main_list(self):
144155
assert_true(isinstance(self.std_results, list))
145156

@@ -175,7 +186,7 @@ def test_results_bootstrapped(self):
175186
self.known_bootstrapped_ci[key]
176187
)
177188

178-
def test_results_whiskers(self):
189+
def test_results_whiskers_float(self):
179190
results = cbook.boxplot_stats(self.data, whis=3)
180191
res = results[0]
181192
for key in list(self.known_whis3_res.keys()):
@@ -189,6 +200,34 @@ def test_results_whiskers(self):
189200
self.known_whis3_res[key]
190201
)
191202

203+
def test_results_whiskers_range(self):
204+
results = cbook.boxplot_stats(self.data, whis='range')
205+
res = results[0]
206+
for key in list(self.known_res_range.keys()):
207+
if key != 'fliers':
208+
assert_statement = assert_approx_equal
209+
else:
210+
assert_statement = assert_array_almost_equal
211+
212+
assert_statement(
213+
res[key],
214+
self.known_res_range[key]
215+
)
216+
217+
def test_results_whiskers_percentiles(self):
218+
results = cbook.boxplot_stats(self.data, whis=[5, 95])
219+
res = results[0]
220+
for key in list(self.known_res_percentiles.keys()):
221+
if key != 'fliers':
222+
assert_statement = assert_approx_equal
223+
else:
224+
assert_statement = assert_array_almost_equal
225+
226+
assert_statement(
227+
res[key],
228+
self.known_res_percentiles[key]
229+
)
230+
192231
def test_results_withlabels(self):
193232
labels = ['Test1', 2, 3, 4]
194233
results = cbook.boxplot_stats(self.data, labels=labels)

0 commit comments

Comments
 (0)