@@ -171,13 +171,13 @@ def new_function():
171171 pending : bool, optional
172172 If True, uses a PendingDeprecationWarning instead of a
173173 DeprecationWarning.
174-
174+
175175 Example
176176 -------
177177 @deprecated('1.4.0')
178178 def the_function_to_deprecate():
179179 pass
180-
180+
181181 """
182182 def deprecate (func , message = message , name = name , alternative = alternative ,
183183 pending = pending ):
@@ -1884,19 +1884,25 @@ def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None):
18841884 -------
18851885 bxpstats : A list of dictionaries containing the results for each column
18861886 of data. Keys are as
1887+
1888+ Notes
1889+ -----
1890+ Non-bootstrapping approach to confidence interval uses Gaussian-based
1891+ asymptotic approximation.
1892+
1893+ General approach from:
1894+ McGill, R., Tukey, J.W., and Larsen, W.A. (1978) "Variations of
1895+ Boxplots", The American Statistician, 32:12-16.
18871896 '''
18881897
18891898 def _bootstrap_median (data , N = 5000 ):
18901899 # determine 95% confidence intervals of the median
18911900 M = len (data )
18921901 percentiles = [2.5 , 97.5 ]
18931902
1894- # initialize the array of estimates
1895- estimate = np .empty (N )
1896- for n in range (N ):
1897- bsIndex = np .random .random_integers (0 , M - 1 , M )
1898- bsData = data [bsIndex ]
1899- estimate [n ] = np .percentile (bsData , 50 )
1903+ ii = np .random .randint (M , size = (N , M ))
1904+ bsData = x [ii ]
1905+ estimate = np .median (bsData , axis = 1 , overwrite_input = True )
19001906
19011907 CI = np .percentile (estimate , percentiles )
19021908 return CI
@@ -1909,12 +1915,7 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
19091915 notch_min = CI [0 ]
19101916 notch_max = CI [1 ]
19111917 else :
1912- # Estimate notch locations using Gaussian-based
1913- # asymptotic approximation.
1914- #
1915- # For discussion: McGill, R., Tukey, J.W.,
1916- # and Larsen, W.A. (1978) "Variations of
1917- # Boxplots", The American Statistician, 32:12-16.
1918+
19181919 N = len (data )
19191920 notch_min = med - 1.57 * iqr / np .sqrt (N )
19201921 notch_max = med + 1.57 * iqr / np .sqrt (N )
@@ -1950,64 +1951,58 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
19501951
19511952 ncols = len (X )
19521953 if labels is None :
1953- labels = [None ] * ncols
1954+ labels = [str ( i ) for i in range ( ncols )]
19541955 elif len (labels ) != ncols :
19551956 raise ValueError ("Dimensions of labels and X must be compatible" )
19561957
19571958 for ii , (x , label ) in enumerate (zip (X , labels ), start = 0 ):
19581959 # empty dict
19591960 stats = {}
1960-
1961- # set the label
1962- if label is not None :
1963- stats ['label' ] = label
1964- else :
1965- stats ['label' ] = ii
1961+ stats ['label' ] = label
19661962
19671963 # arithmetic mean
19681964 stats ['mean' ] = np .mean (x )
19691965
19701966 # medians and quartiles
1971- stats ['q1' ], stats ['med' ], stats ['q3' ] = \
1972- np .percentile (x , [25 , 50 , 75 ])
1967+ q1 , med , q3 = np .percentile (x , [25 , 50 , 75 ])
19731968
19741969 # interquartile range
1975- stats ['iqr' ] = stats [ 'q3' ] - stats [ 'q1' ]
1970+ stats ['iqr' ] = q3 - q1
19761971 if stats ['iqr' ] == 0 :
19771972 whis = 'range'
19781973
19791974 # conf. interval around median
19801975 stats ['cilo' ], stats ['cihi' ] = _compute_conf_interval (
1981- x , stats [ ' med' ] , stats ['iqr' ], bootstrap
1976+ x , med , stats ['iqr' ], bootstrap
19821977 )
19831978
19841979 # lowest/highest non-outliers
19851980 if np .isscalar (whis ):
19861981 if np .isreal (whis ):
1987- loval = stats [ 'q1' ] - whis * stats ['iqr' ]
1988- hival = stats [ 'q3' ] + whis * stats ['iqr' ]
1982+ loval = q1 - whis * stats ['iqr' ]
1983+ hival = q3 + whis * stats ['iqr' ]
19891984 elif whis in ['range' , 'limit' , 'limits' , 'min/max' ]:
19901985 loval = np .min (x )
19911986 hival = np .max (x )
19921987 else :
1993- whismsg = 'whis must be a float, valid string, or ' \
1994- 'list of percentiles'
1988+ whismsg = ( 'whis must be a float, valid string, or '
1989+ 'list of percentiles' )
19951990 raise ValueError (whismsg )
19961991 else :
19971992 loval = np .percentile (x , whis [0 ])
19981993 hival = np .percentile (x , whis [1 ])
19991994
20001995 # get high extreme
20011996 wiskhi = np .compress (x <= hival , x )
2002- if len (wiskhi ) == 0 or np .max (wiskhi ) < stats [ 'q3' ] :
2003- stats ['whishi' ] = stats [ 'q3' ]
1997+ if len (wiskhi ) == 0 or np .max (wiskhi ) < q3 :
1998+ stats ['whishi' ] = q3
20041999 else :
20052000 stats ['whishi' ] = np .max (wiskhi )
20062001
20072002 # get low extreme
20082003 wisklo = np .compress (x >= loval , x )
2009- if len (wisklo ) == 0 or np .min (wisklo ) > stats [ 'q1' ] :
2010- stats ['whislo' ] = stats [ 'q1' ]
2004+ if len (wisklo ) == 0 or np .min (wisklo ) > q1 :
2005+ stats ['whislo' ] = q1
20112006 else :
20122007 stats ['whislo' ] = np .min (wisklo )
20132008
@@ -2017,6 +2012,8 @@ def _compute_conf_interval(data, med, iqr, bootstrap):
20172012 np .compress (x > stats ['whishi' ], x )
20182013 ])
20192014
2015+ # add in teh remaining stats and append to final output
2016+ stats ['q1' ], stats ['med' ], stats ['q3' ] = q1 , med , q3
20202017 bxpstats .append (stats )
20212018
20222019 return bxpstats
0 commit comments