@@ -171,13 +171,13 @@ def new_function():
171
171
pending : bool, optional
172
172
If True, uses a PendingDeprecationWarning instead of a
173
173
DeprecationWarning.
174
-
174
+
175
175
Example
176
176
-------
177
177
@deprecated('1.4.0')
178
178
def the_function_to_deprecate():
179
179
pass
180
-
180
+
181
181
"""
182
182
def deprecate (func , message = message , name = name , alternative = alternative ,
183
183
pending = pending ):
@@ -1884,19 +1884,25 @@ def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None):
1884
1884
-------
1885
1885
bxpstats : A list of dictionaries containing the results for each column
1886
1886
of data. Keys are as
1887
+
1888
+ Notes
1889
+ -----
1890
+ Non-bootstrapping approach to confidence interval uses Gaussian-based
1891
+ asymptotic approximation.
1892
+
1893
+ General approach from:
1894
+ McGill, R., Tukey, J.W., and Larsen, W.A. (1978) "Variations of
1895
+ Boxplots", The American Statistician, 32:12-16.
1887
1896
'''
1888
1897
1889
1898
def _bootstrap_median (data , N = 5000 ):
1890
1899
# determine 95% confidence intervals of the median
1891
1900
M = len (data )
1892
1901
percentiles = [2.5 , 97.5 ]
1893
1902
1894
- # initialize the array of estimates
1895
- estimate = np .empty (N )
1896
- for n in range (N ):
1897
- bsIndex = np .random .random_integers (0 , M - 1 , M )
1898
- bsData = data [bsIndex ]
1899
- estimate [n ] = np .percentile (bsData , 50 )
1903
+ ii = np .random .randint (M , size = (N , M ))
1904
+ bsData = x [ii ]
1905
+ estimate = np .median (bsData , axis = 1 , overwrite_input = True )
1900
1906
1901
1907
CI = np .percentile (estimate , percentiles )
1902
1908
return CI
@@ -1909,12 +1915,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
1909
1915
notch_min = CI [0 ]
1910
1916
notch_max = CI [1 ]
1911
1917
else :
1912
- # Estimate notch locations using Gaussian-based
1913
- # asymptotic approximation.
1914
- #
1915
- # For discussion: McGill, R., Tukey, J.W.,
1916
- # and Larsen, W.A. (1978) "Variations of
1917
- # Boxplots", The American Statistician, 32:12-16.
1918
+
1918
1919
N = len (data )
1919
1920
notch_min = med - 1.57 * iqr / np .sqrt (N )
1920
1921
notch_max = med + 1.57 * iqr / np .sqrt (N )
@@ -1950,64 +1951,58 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
1950
1951
1951
1952
ncols = len (X )
1952
1953
if labels is None :
1953
- labels = [None ] * ncols
1954
+ labels = [str ( i ) for i in range ( ncols )]
1954
1955
elif len (labels ) != ncols :
1955
1956
raise ValueError ("Dimensions of labels and X must be compatible" )
1956
1957
1957
1958
for ii , (x , label ) in enumerate (zip (X , labels ), start = 0 ):
1958
1959
# empty dict
1959
1960
stats = {}
1960
-
1961
- # set the label
1962
- if label is not None :
1963
- stats ['label' ] = label
1964
- else :
1965
- stats ['label' ] = ii
1961
+ stats ['label' ] = label
1966
1962
1967
1963
# arithmetic mean
1968
1964
stats ['mean' ] = np .mean (x )
1969
1965
1970
1966
# medians and quartiles
1971
- stats ['q1' ], stats ['med' ], stats ['q3' ] = \
1972
- np .percentile (x , [25 , 50 , 75 ])
1967
+ q1 , med , q3 = np .percentile (x , [25 , 50 , 75 ])
1973
1968
1974
1969
# interquartile range
1975
- stats ['iqr' ] = stats [ 'q3' ] - stats [ 'q1' ]
1970
+ stats ['iqr' ] = q3 - q1
1976
1971
if stats ['iqr' ] == 0 :
1977
1972
whis = 'range'
1978
1973
1979
1974
# conf. interval around median
1980
1975
stats ['cilo' ], stats ['cihi' ] = _compute_conf_interval (
1981
- x , stats [ ' med' ] , stats ['iqr' ], bootstrap
1976
+ x , med , stats ['iqr' ], bootstrap
1982
1977
)
1983
1978
1984
1979
# lowest/highest non-outliers
1985
1980
if np .isscalar (whis ):
1986
1981
if np .isreal (whis ):
1987
- loval = stats [ 'q1' ] - whis * stats ['iqr' ]
1988
- hival = stats [ 'q3' ] + whis * stats ['iqr' ]
1982
+ loval = q1 - whis * stats ['iqr' ]
1983
+ hival = q3 + whis * stats ['iqr' ]
1989
1984
elif whis in ['range' , 'limit' , 'limits' , 'min/max' ]:
1990
1985
loval = np .min (x )
1991
1986
hival = np .max (x )
1992
1987
else :
1993
- whismsg = 'whis must be a float, valid string, or ' \
1994
- 'list of percentiles'
1988
+ whismsg = ( 'whis must be a float, valid string, or '
1989
+ 'list of percentiles' )
1995
1990
raise ValueError (whismsg )
1996
1991
else :
1997
1992
loval = np .percentile (x , whis [0 ])
1998
1993
hival = np .percentile (x , whis [1 ])
1999
1994
2000
1995
# get high extreme
2001
1996
wiskhi = np .compress (x <= hival , x )
2002
- if len (wiskhi ) == 0 or np .max (wiskhi ) < stats [ 'q3' ] :
2003
- stats ['whishi' ] = stats [ 'q3' ]
1997
+ if len (wiskhi ) == 0 or np .max (wiskhi ) < q3 :
1998
+ stats ['whishi' ] = q3
2004
1999
else :
2005
2000
stats ['whishi' ] = np .max (wiskhi )
2006
2001
2007
2002
# get low extreme
2008
2003
wisklo = np .compress (x >= loval , x )
2009
- if len (wisklo ) == 0 or np .min (wisklo ) > stats [ 'q1' ] :
2010
- stats ['whislo' ] = stats [ 'q1' ]
2004
+ if len (wisklo ) == 0 or np .min (wisklo ) > q1 :
2005
+ stats ['whislo' ] = q1
2011
2006
else :
2012
2007
stats ['whislo' ] = np .min (wisklo )
2013
2008
@@ -2017,6 +2012,8 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
2017
2012
np .compress (x > stats ['whishi' ], x )
2018
2013
])
2019
2014
2015
+ # add in teh remaining stats and append to final output
2016
+ stats ['q1' ], stats ['med' ], stats ['q3' ] = q1 , med , q3
2020
2017
bxpstats .append (stats )
2021
2018
2022
2019
return bxpstats
0 commit comments