@@ -1832,6 +1832,145 @@ def delete_masked_points(*args):
18321832 return margs
18331833
18341834
1835+ def boxplot_stats (X , whis = 1.5 , bootstrap = None ):
1836+ '''
1837+ Returns list of dictionaries of staticists to be use to draw a series of
1838+ box and whisker plots. See the `Returns` section below to the required
1839+ keys of the dictionary. Users can skip this function and pass a user-
1840+ defined set of dictionaries to the new `axes.bxp` method instead of
1841+ relying on MPL to do the calcs.
1842+
1843+ Parameters
1844+ ----------
1845+ X : array-like
1846+ Data that will be represented in the boxplots. Should have 2 or fewer
1847+ dimensions.
1848+
1849+ whis : float (default = 1.5)
1850+ Determines the reach of the whiskers past the first and third
1851+ quartiles (e.g., Q3 + whis*IQR). Beyone the whiskers, data are
1852+ considers outliers and are plotted as individual points. Set
1853+ this to an unreasonably high value to force the whiskers to
1854+ show the min and max data. (IQR = interquartile range, Q3-Q1)
1855+
1856+ bootstrap : int or None (default)
1857+ Number of times the confidence intervals around the median should
1858+ be bootstrapped (percentile method).
1859+
1860+ Returns
1861+ -------
1862+ bxpstats : A list of dictionaries containing the results for each column
1863+ of data. Keys are as
1864+ '''
1865+
1866+ def _bootstrap_median (data , N = 5000 ):
1867+ # determine 95% confidence intervals of the median
1868+ M = len (data )
1869+ percentiles = [2.5 , 97.5 ]
1870+
1871+ # initialize the array of estimates
1872+ estimate = np .empty (N )
1873+ for n in range (N ):
1874+ bsIndex = np .random .random_integers (0 , M - 1 , M )
1875+ bsData = data [bsIndex ]
1876+ estimate [n ] = np .percentile (bsData , 50 )
1877+
1878+ CI = np .percentile (estimate , percentiles )
1879+ return CI
1880+
1881+ def _compute_conf_interval (data , med , iqr , bootstrap ):
1882+ if bootstrap is not None :
1883+ # Do a bootstrap estimate of notch locations.
1884+ # get conf. intervals around median
1885+ CI = _bootstrap_median (data , N = bootstrap )
1886+ notch_min = CI [0 ]
1887+ notch_max = CI [1 ]
1888+ else :
1889+ # Estimate notch locations using Gaussian-based
1890+ # asymptotic approximation.
1891+ #
1892+ # For discussion: McGill, R., Tukey, J.W.,
1893+ # and Larsen, W.A. (1978) "Variations of
1894+ # Boxplots", The American Statistician, 32:12-16.
1895+ N = len (data )
1896+ notch_min = med - 1.57 * iqr / np .sqrt (N )
1897+ notch_max = med + 1.57 * iqr / np .sqrt (N )
1898+
1899+ return notch_min , notch_max
1900+
1901+ # output is a list of dicts
1902+ bxpstats = []
1903+
1904+ # convert X to a list of lists
1905+ if hasattr (X , 'shape' ):
1906+ # one item
1907+ if len (X .shape ) == 1 :
1908+ if hasattr (X [0 ], 'shape' ):
1909+ X = list (X )
1910+ else :
1911+ X = [X , ]
1912+
1913+ # several items
1914+ elif len (X .shape ) == 2 :
1915+ nrows , ncols = X .shape
1916+ if nrows == 1 :
1917+ X = [X ]
1918+ elif ncols == 1 :
1919+ X = [X .ravel ()]
1920+ else :
1921+ X = [X [:, i ] for i in xrange (ncols )]
1922+ else :
1923+ raise ValueError ("input `X` must have 2 or fewer dimensions" )
1924+
1925+ if not hasattr (X [0 ], '__len__' ):
1926+ X = [X ]
1927+
1928+ ncols = len (X )
1929+ for ii , x in enumerate (X , start = 0 ):
1930+ stats = {}
1931+
1932+ # arithmetic mean
1933+ stats ['mean' ] = np .mean (x )
1934+
1935+ # medians and quartiles
1936+ stats ['q1' ], stats ['med' ], stats ['q3' ] = \
1937+ np .percentile (x , [25 , 50 , 75 ])
1938+
1939+ # interquartile range
1940+ stats ['iqr' ] = stats ['q3' ] - stats ['q1' ]
1941+
1942+ # conf. interval around median
1943+ stats ['cilo' ], stats ['cihi' ] = _compute_conf_interval (
1944+ x , stats ['med' ], stats ['iqr' ], bootstrap
1945+ )
1946+
1947+ # highest non-outliers
1948+ hival = stats ['q3' ] + whis * stats ['iqr' ]
1949+ wiskhi = np .compress (x <= hival , x )
1950+ if len (wiskhi ) == 0 or np .max (wiskhi ) < stats ['q3' ]:
1951+ stats ['whishi' ] = stats ['q3' ]
1952+ else :
1953+ stats ['whishi' ] = max (wiskhi )
1954+
1955+ # get low extreme
1956+ loval = stats ['q1' ] - whis * stats ['iqr' ]
1957+ wisklo = np .compress (x >= loval , x )
1958+ if len (wisklo ) == 0 or np .min (wisklo ) > stats ['q1' ]:
1959+ stats ['whislo' ] = stats ['q1' ]
1960+ else :
1961+ stats ['whislo' ] = min (wisklo )
1962+
1963+ # compute a single array of outliers
1964+ stats ['outliers' ] = np .hstack ([
1965+ np .compress (x < stats ['whislo' ], x ),
1966+ np .compress (x > stats ['whishi' ], x )
1967+ ])
1968+
1969+ bxpstats .append (stats )
1970+
1971+ return bxpstats
1972+
1973+
18351974# FIXME I don't think this is used anywhere
18361975def unmasked_index_ranges (mask , compressed = True ):
18371976 """
0 commit comments