diff --git a/doc/api/next_api_changes/2018-09-18-DS-mlab.rst b/doc/api/next_api_changes/2018-09-18-DS-mlab.rst new file mode 100644 index 000000000000..644d5dd211ef --- /dev/null +++ b/doc/api/next_api_changes/2018-09-18-DS-mlab.rst @@ -0,0 +1,75 @@ +Removal of deprecated :mod:`matplotlib.mlab` code +------------------------------------------------- + +Lots of code inside the :mod:`matplotlib.mlab` module which was deprecated +in Matplotlib 2.2 has been removed. See below for a list: + +- ``mlab.exp_safe`` (use `numpy.exp` instead) +- ``mlab.amap`` +- ``mlab.logspace`` (use `numpy.logspace` instead) +- ``mlab.rms_flat`` +- ``mlab.l1norm`` (use ``numpy.linalg.norm(a, ord=1)`` instead) +- ``mlab.l2norm`` (use ``numpy.linalg.norm(a, ord=2)`` instead) +- ``mlab.norm_flat`` (use ``numpy.linalg.norm(a.flat, ord=2)`` instead) +- ``mlab.frange`` (use `numpy.arange` instead) +- ``mlab.identity`` (use `numpy.identity` instead) +- ``mlab.base_repr`` +- ``mlab.binary_repr`` +- ``mlab.ispower2`` +- ``mlab.log2`` (use `numpy.log2` instead) +- ``mlab.isvector`` +- ``mlab.movavg`` +- ``mlab.safe_isinf`` (use `numpy.isinf` instead) +- ``mlab.safe_isnan`` (use `numpy.isnan` instead) +- ``mlab.cohere_pairs`` (use `scipy.signal.coherence` instead) +- ``mlab.entropy`` (use `scipy.stats.entropy` instead) +- ``mlab.normpdf`` (use `scipy.stats.norm.pdf` instead) +- ``mlab.find`` (use ``np.nonzero(np.ravel(condition))`` instead) +- ``mlab.longest_contiguous_ones`` +- ``mlab.longest_ones`` +- ``mlab.PCA`` +- ``mlab.prctile`` (use `numpy.percentile` instead) +- ``mlab.prctile_rank`` +- ``mlab.center_matrix`` +- ``mlab.rk4`` (use `scipy.integrate.ode` instead) +- ``mlab.bivariate_normal`` +- ``mlab.get_xyz_where`` +- ``mlab.get_sparse_matrix`` +- ``mlab.dist`` (use `numpy.hypot` instead) +- ``mlab.dist_point_to_segment`` +- ``mlab.griddata`` (use `scipy.interpolate.griddata`) +- ``mlab.less_simple_linear_interpolation`` (use `numpy.interp`) +- ``mlab.slopes`` +- ``mlab.stineman_interp`` +- ``mlab.segments_intersect`` +- ``mlab.fftsurr`` +- ``mlab.offset_line`` +- ``mlab.quad2cubic`` +- ``mlab.vector_lengths`` +- ``mlab.distances_along_curve`` +- ``mlab.path_length`` +- ``mlab.cross_from_above`` +- ``mlab.cross_from_below`` +- ``mlab.contiguous_regions`` (use `.cbook.contiguous_regions` instead) +- ``mlab.is_closed_polygon`` +- ``mlab.poly_between`` +- ``mlab.poly_below`` +- ``mlab.inside_poly`` +- ``mlab.csv2rec`` +- ``mlab.rec2csv`` (use `numpy.recarray.tofile` instead) +- ``mlab.rec2text`` (use `numpy.recarray.tofile` instead) +- ``mlab.rec_summarize`` +- ``mlab.rec_join`` +- ``mlab.recs_join`` +- ``mlab.rec_groupby`` +- ``mlab.rec_keep_fields`` +- ``mlab.rec_drop_fields`` +- ``mlab.rec_append_fields`` +- ``mlab.csvformat_factory`` +- ``mlab.get_formatd`` +- ``mlab.FormatDatetime`` (use `datetime.datetime.strftime` instead) +- ``mlab.FormatDate`` (use `datetime.date.strftime` instead) +- ``mlab.FormatMillions``, ``mlab.FormatThousands``, ``mlab.FormatPercent``, + ``mlab.FormatBool``, ``mlab.FormatInt``, ``mlab.FormatFloat``, + ``mlab.FormatFormatStr``, ``mlab.FormatString``, ``mlab.FormatObj`` +- ``mlab.donothing_callback`` diff --git a/doc/api/next_api_changes/2018-09-18-DS-pylab.rst b/doc/api/next_api_changes/2018-09-18-DS-pylab.rst new file mode 100644 index 000000000000..4dae6b64af93 --- /dev/null +++ b/doc/api/next_api_changes/2018-09-18-DS-pylab.rst @@ -0,0 +1,52 @@ +Removal of deprecated :mod:`matplotlib.pylab` code +-------------------------------------------------- + +Lots of code inside the :mod:`matplotlib.mlab` module which was deprecated +in Matplotlib 2.2 has been removed. This means the following functions are +no longer available in the `matplotlib.pylab` module: + + - ``amap`` + - ``base_repr`` + - ``binary_repr`` + - ``bivariate_normal`` + - ``center_matrix`` + - ``csv2rec`` (use `numpy.recarray.tofile` instead) + - ``dist`` (use `numpy.hypot` instead) + - ``dist_point_to_segment`` + - ``distances_along_curve`` + - ``entropy`` (use `scipy.stats.entropy` instead) + - ``exp_safe`` (use `numpy.exp` instead) + - ``fftsurr`` + - ``find`` (use ``np.nonzero(np.ravel(condition))`` instead) + - ``frange`` (use `numpy.arange` instead) + - ``get_sparse_matrix`` + - ``get_xyz_where`` + - ``griddata`` (use `scipy.interpolate.griddata` instead) + - ``identity`` (use `numpy.identity` instead) + - ``inside_poly`` + - ``is_closed_polygon`` + - ``ispower2`` + - ``isvector`` + - ``l1norm`` (use ``numpy.linalg.norm(a, ord=1)`` instead) + - ``l2norm`` (use ``numpy.linalg.norm(a, ord=2)`` instead) + - ``log2`` (use `numpy.log2` instead) + - ``longest_contiguous_ones`` + - ``longest_ones`` + - ``movavg`` + - ``norm_flat`` (use ``numpy.linalg.norm(a.flat, ord=2)`` instead) + - ``normpdf`` (use `scipy.stats.norm.pdf` instead) + - ``path_length`` + - ``poly_below`` + - ``poly_between`` + - ``prctile`` (use `numpy.percentile` instead) + - ``prctile_rank`` + - ``rec2csv`` (use `numpy.recarray.tofile` instead) + - ``rec_append_fields`` + - ``rec_drop_fields`` + - ``rec_join`` + - ``rk4`` (use `scipy.integrate.ode` instead) + - ``rms_flat`` + - ``segments_intersect`` + - ``slopes`` + - ``stineman_interp`` + - ``vector_lengths`` diff --git a/examples/event_handling/poly_editor.py b/examples/event_handling/poly_editor.py index c0a27e2071c4..1afa89bffd12 100644 --- a/examples/event_handling/poly_editor.py +++ b/examples/event_handling/poly_editor.py @@ -9,7 +9,34 @@ import numpy as np from matplotlib.lines import Line2D from matplotlib.artist import Artist -from matplotlib.mlab import dist_point_to_segment + + +def dist(x, y): + """ + Return the distance between two points. + """ + d = x - y + return np.sqrt(np.dot(d, d)) + + +def dist_point_to_segment(p, s0, s1): + """ + Get the distance of a point to a segment. + *p*, *s0*, *s1* are *xy* sequences + This algorithm from + http://geomalgorithms.com/a02-_lines.html + """ + v = s1 - s0 + w = p - s0 + c1 = np.dot(w, v) + if c1 <= 0: + return dist(p, s0) + c2 = np.dot(v, v) + if c2 <= c1: + return dist(p, s1) + b = c1 / c2 + pb = s0 + b * v + return dist(p, pb) class PolygonInteractor(object): diff --git a/lib/matplotlib/mlab.py b/lib/matplotlib/mlab.py index dd1e91cb0984..4009763fbcff 100644 --- a/lib/matplotlib/mlab.py +++ b/lib/matplotlib/mlab.py @@ -1,9 +1,11 @@ """ Numerical python functions written for compatibility with MATLAB -commands with the same names. +commands with the same names. Most numerical python functions can be found in +the `numpy` and `scipy` libraries. What remains here is code for performing +spectral computations. -MATLAB compatible functions ---------------------------- +Spectral functions +------------------- :func:`cohere` Coherence (normalized cross spectral density) @@ -20,11 +22,6 @@ :func:`specgram` Spectrogram (spectrum over segments of time) -Miscellaneous functions ------------------------ - -Functions that don't exist in MATLAB, but are useful anyway: - :func:`complex_spectrum` Return the complex-valued frequency spectrum of a signal @@ -56,29 +53,16 @@ Apply a window along a given axis """ -import copy import csv -import operator -import os import warnings import numpy as np import matplotlib.cbook as cbook from matplotlib import docstring -from matplotlib.path import Path import math -@cbook.deprecated("2.2", alternative='numpy.logspace or numpy.geomspace') -def logspace(xmin, xmax, N): - ''' - Return N values logarithmically spaced between xmin and xmax. - - ''' - return np.exp(np.linspace(np.log(xmin), np.log(xmax), N)) - - def window_hanning(x): ''' Return x times the hanning window of len(x). @@ -1203,1594 +1187,249 @@ def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning, return Cxy, f -@cbook.deprecated('2.2') -def donothing_callback(*args): - pass - - -@cbook.deprecated('2.2', 'scipy.signal.coherence') -def cohere_pairs(X, ij, NFFT=256, Fs=2, detrend=detrend_none, - window=window_hanning, noverlap=0, - preferSpeedOverMemory=True, - progressCallback=donothing_callback, - returnPxx=False): - - """ - Compute the coherence and phase for all pairs *ij*, in *X*. - - *X* is a *numSamples* * *numCols* array - - *ij* is a list of tuples. Each tuple is a pair of indexes into - the columns of X for which you want to compute coherence. For - example, if *X* has 64 columns, and you want to compute all - nonredundant pairs, define *ij* as:: - - ij = [] - for i in range(64): - for j in range(i+1,64): - ij.append( (i,j) ) - - *preferSpeedOverMemory* is an optional bool. Defaults to true. If - False, limits the caching by only making one, rather than two, - complex cache arrays. This is useful if memory becomes critical. - Even when *preferSpeedOverMemory* is False, :func:`cohere_pairs` - will still give significant performance gains over calling - :func:`cohere` for each pair, and will use subtantially less - memory than if *preferSpeedOverMemory* is True. In my tests with - a 43000,64 array over all nonredundant pairs, - *preferSpeedOverMemory* = True delivered a 33% performance boost - on a 1.7GHZ Athlon with 512MB RAM compared with - *preferSpeedOverMemory* = False. But both solutions were more - than 10x faster than naively crunching all possible pairs through - :func:`cohere`. - - Returns - ------- - Cxy : dictionary of (*i*, *j*) tuples -> coherence vector for - that pair. i.e., ``Cxy[(i,j) = cohere(X[:,i], X[:,j])``. - Number of dictionary keys is ``len(ij)``. - - Phase : dictionary of phases of the cross spectral density at - each frequency for each pair. Keys are (*i*, *j*). - - freqs : vector of frequencies, equal in length to either the - coherence or phase vectors for any (*i*, *j*) key. - - e.g., to make a coherence Bode plot:: - - subplot(211) - plot( freqs, Cxy[(12,19)]) - subplot(212) - plot( freqs, Phase[(12,19)]) - - For a large number of pairs, :func:`cohere_pairs` can be much more - efficient than just calling :func:`cohere` for each pair, because - it caches most of the intensive computations. If :math:`N` is the - number of pairs, this function is :math:`O(N)` for most of the - heavy lifting, whereas calling cohere for each pair is - :math:`O(N^2)`. However, because of the caching, it is also more - memory intensive, making 2 additional complex arrays with - approximately the same number of elements as *X*. - - See :file:`test/cohere_pairs_test.py` in the src tree for an - example script that shows that this :func:`cohere_pairs` and - :func:`cohere` give the same results for a given pair. - - See Also - -------- - :func:`psd` - For information about the methods used to compute :math:`P_{xy}`, - :math:`P_{xx}` and :math:`P_{yy}`. +def _csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', + converterd=None, names=None, missing='', missingd=None, + use_mrecords=False, dayfirst=False, yearfirst=False): """ - numRows, numCols = X.shape - - # zero pad if X is too short - if numRows < NFFT: - tmp = X - X = np.zeros((NFFT, numCols), X.dtype) - X[:numRows, :] = tmp - del tmp - - numRows, numCols = X.shape - # get all the columns of X that we are interested in by checking - # the ij tuples - allColumns = set() - for i, j in ij: - allColumns.add(i) - allColumns.add(j) - Ncols = len(allColumns) - - # for real X, ignore the negative frequencies - if np.iscomplexobj(X): - numFreqs = NFFT - else: - numFreqs = NFFT//2+1 - - # cache the FFT of every windowed, detrended NFFT length segment - # of every channel. If preferSpeedOverMemory, cache the conjugate - # as well - if np.iterable(window): - if len(window) != NFFT: - raise ValueError("The length of the window must be equal to NFFT") - windowVals = window - else: - windowVals = window(np.ones(NFFT, X.dtype)) - ind = list(range(0, numRows-NFFT+1, NFFT-noverlap)) - numSlices = len(ind) - FFTSlices = {} - FFTConjSlices = {} - Pxx = {} - slices = range(numSlices) - normVal = np.linalg.norm(windowVals)**2 - for iCol in allColumns: - progressCallback(i/Ncols, 'Cacheing FFTs') - Slices = np.zeros((numSlices, numFreqs), dtype=np.complex_) - for iSlice in slices: - thisSlice = X[ind[iSlice]:ind[iSlice]+NFFT, iCol] - thisSlice = windowVals*detrend(thisSlice) - Slices[iSlice, :] = np.fft.fft(thisSlice)[:numFreqs] - - FFTSlices[iCol] = Slices - if preferSpeedOverMemory: - FFTConjSlices[iCol] = np.conj(Slices) - Pxx[iCol] = np.divide(np.mean(abs(Slices)**2, axis=0), normVal) - del Slices, ind, windowVals - - # compute the coherences and phases for all pairs using the - # cached FFTs - Cxy = {} - Phase = {} - count = 0 - N = len(ij) - for i, j in ij: - count += 1 - if count % 10 == 0: - progressCallback(count/N, 'Computing coherences') - - if preferSpeedOverMemory: - Pxy = FFTSlices[i] * FFTConjSlices[j] - else: - Pxy = FFTSlices[i] * np.conj(FFTSlices[j]) - if numSlices > 1: - Pxy = np.mean(Pxy, axis=0) -# Pxy = np.divide(Pxy, normVal) - Pxy /= normVal -# Cxy[(i,j)] = np.divide(np.absolute(Pxy)**2, Pxx[i]*Pxx[j]) - Cxy[i, j] = abs(Pxy)**2 / (Pxx[i]*Pxx[j]) - Phase[i, j] = np.arctan2(Pxy.imag, Pxy.real) - - freqs = Fs/NFFT*np.arange(numFreqs) - if returnPxx: - return Cxy, Phase, freqs, Pxx - else: - return Cxy, Phase, freqs - - -@cbook.deprecated('2.2', 'scipy.stats.entropy') -def entropy(y, bins): - r""" - Return the entropy of the data in *y* in units of nat. + Load data from comma/space/tab delimited file in *fname* into a + numpy record array and return the record array. - .. math:: + If *names* is *None*, a header row is required to automatically + assign the recarray names. The headers will be lower cased, + spaces will be converted to underscores, and illegal attribute + name characters removed. If *names* is not *None*, it is a + sequence of names to use for the column names. In this case, it + is assumed there is no header row. - -\sum p_i \ln(p_i) - where :math:`p_i` is the probability of observing *y* in the - :math:`i^{th}` bin of *bins*. *bins* can be a number of bins or a - range of bins; see :func:`numpy.histogram`. + - *fname*: can be a filename or a file handle. Support for gzipped + files is automatic, if the filename ends in '.gz' - Compare *S* with analytic calculation for a Gaussian:: + - *comments*: the character used to indicate the start of a comment + in the file, or *None* to switch off the removal of comments - x = mu + sigma * randn(200000) - Sanalytic = 0.5 * ( 1.0 + log(2*pi*sigma**2.0) ) - """ - n, bins = np.histogram(y, bins) - n = n.astype(float) + - *skiprows*: is the number of rows from the top to skip - n = np.take(n, np.nonzero(n)[0]) # get the positive + - *checkrows*: is the number of rows to check to validate the column + data type. When set to zero all rows are validated. - p = np.divide(n, len(y)) + - *converterd*: if not *None*, is a dictionary mapping column number or + munged column name to a converter function. - delta = bins[1] - bins[0] - S = -1.0 * np.sum(p * np.log(p)) + np.log(delta) - return S + - *names*: if not None, is a list of header names. In this case, no + header will be read from the file + - *missingd* is a dictionary mapping munged column names to field values + which signify that the field does not contain actual data and should + be masked, e.g., '0000-00-00' or 'unused' -@cbook.deprecated('2.2', 'scipy.stats.norm.pdf') -def normpdf(x, *args): - "Return the normal pdf evaluated at *x*; args provides *mu*, *sigma*" - mu, sigma = args - return 1./(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5 * (1./sigma*(x - mu))**2) + - *missing*: a string whose value signals a missing field regardless of + the column it appears in + - *use_mrecords*: if True, return an mrecords.fromrecords record array if + any of the data are missing -@cbook.deprecated('2.2') -def find(condition): - "Return the indices where ravel(condition) is true" - res, = np.nonzero(np.ravel(condition)) - return res + - *dayfirst*: default is False so that MM-DD-YY has precedence over + DD-MM-YY. See + http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 + for further information. + - *yearfirst*: default is False so that MM-DD-YY has precedence over + YY-MM-DD. See + http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 + for further information. -@cbook.deprecated('2.2') -def longest_contiguous_ones(x): - """ - Return the indices of the longest stretch of contiguous ones in *x*, - assuming *x* is a vector of zeros and ones. If there are two - equally long stretches, pick the first. + If no rows are found, *None* is returned """ - x = np.ravel(x) - if len(x) == 0: - return np.array([]) - ind = (x == 0).nonzero()[0] - if len(ind) == 0: - return np.arange(len(x)) - if len(ind) == len(x): - return np.array([]) - - y = np.zeros((len(x)+2,), x.dtype) - y[1:-1] = x - dif = np.diff(y) - up = (dif == 1).nonzero()[0] - dn = (dif == -1).nonzero()[0] - i = (dn-up == max(dn - up)).nonzero()[0][0] - ind = np.arange(up[i], dn[i]) + if converterd is None: + converterd = dict() - return ind + if missingd is None: + missingd = {} + import dateutil.parser + import datetime -@cbook.deprecated('2.2') -def longest_ones(x): - '''Alias for longest_contiguous_ones.''' - return longest_contiguous_ones(x) + fh = cbook.to_filehandle(fname) + delimiter = str(delimiter) -@cbook.deprecated('2.2') -class PCA(object): - def __init__(self, a, standardize=True): + class FH: """ - compute the SVD of a and store data for PCA. Use project to - project the data onto a reduced set of dimensions - - Parameters - ---------- - a : np.ndarray - A numobservations x numdims array - standardize : bool - True if input data are to be standardized. If False, only centering - will be carried out. - - Attributes - ---------- - a - A centered unit sigma version of input ``a``. - - numrows, numcols - The dimensions of ``a``. - - mu - A numdims array of means of ``a``. This is the vector that points - to the origin of PCA space. + For space-delimited files, we want different behavior than + comma or tab. Generally, we want multiple spaces to be + treated as a single separator, whereas with comma and tab we + want multiple commas to return multiple (empty) fields. The + join/strip trick below effects this. + """ + def __init__(self, fh): + self.fh = fh - sigma - A numdims array of standard deviation of ``a``. + def close(self): + self.fh.close() - fracs - The proportion of variance of each of the principal components. + def seek(self, arg): + self.fh.seek(arg) - s - The actual eigenvalues of the decomposition. + def fix(self, s): + return ' '.join(s.split()) - Wt - The weight vector for projecting a numdims point or array into - PCA space. + def __next__(self): + return self.fix(next(self.fh)) - Y - A projected into PCA space. + def __iter__(self): + for line in self.fh: + yield self.fix(line) - Notes - ----- - The factor loadings are in the ``Wt`` factor, i.e., the factor loadings - for the first principal component are given by ``Wt[0]``. This row is - also the first eigenvector. + if delimiter == ' ': + fh = FH(fh) - """ - n, m = a.shape - if n < m: - raise RuntimeError('we assume data in a is organized with ' - 'numrows>numcols') - - self.numrows, self.numcols = n, m - self.mu = a.mean(axis=0) - self.sigma = a.std(axis=0) - self.standardize = standardize - - a = self.center(a) - - self.a = a - - U, s, Vh = np.linalg.svd(a, full_matrices=False) - - # Note: .H indicates the conjugate transposed / Hermitian. - - # The SVD is commonly written as a = U s V.H. - # If U is a unitary matrix, it means that it satisfies U.H = inv(U). - - # The rows of Vh are the eigenvectors of a.H a. - # The columns of U are the eigenvectors of a a.H. - # For row i in Vh and column i in U, the corresponding eigenvalue is - # s[i]**2. - - self.Wt = Vh - - # save the transposed coordinates - Y = np.dot(Vh, a.T).T - self.Y = Y - - # save the eigenvalues - self.s = s**2 - - # and now the contribution of the individual components - vars = self.s / len(s) - self.fracs = vars/vars.sum() - - def project(self, x, minfrac=0.): - ''' - project x onto the principle axes, dropping any axes where fraction - of variance= minfrac - if x.ndim == 2: - Yreduced = Y[:, mask] - else: - Yreduced = Y[mask] - return Yreduced - - def center(self, x): - ''' - center and optionally standardize the data using the mean and sigma - from training set a - ''' - if self.standardize: - return (x - self.mu)/self.sigma - else: - return (x - self.mu) - - @staticmethod - def _get_colinear(): - c0 = np.array([ - 0.19294738, 0.6202667, 0.45962655, 0.07608613, 0.135818, - 0.83580842, 0.07218851, 0.48318321, 0.84472463, 0.18348462, - 0.81585306, 0.96923926, 0.12835919, 0.35075355, 0.15807861, - 0.837437, 0.10824303, 0.1723387, 0.43926494, 0.83705486]) - - c1 = np.array([ - -1.17705601, -0.513883, -0.26614584, 0.88067144, 1.00474954, - -1.1616545, 0.0266109, 0.38227157, 1.80489433, 0.21472396, - -1.41920399, -2.08158544, -0.10559009, 1.68999268, 0.34847107, - -0.4685737, 1.23980423, -0.14638744, -0.35907697, 0.22442616]) - - c2 = c0 + 2*c1 - c3 = -3*c0 + 4*c1 - a = np.array([c3, c0, c1, c2]).T - return a - - -@cbook.deprecated('2.2', 'numpy.percentile') -def prctile(x, p=(0.0, 25.0, 50.0, 75.0, 100.0)): - """ - Return the percentiles of *x*. *p* can either be a sequence of - percentile values or a scalar. If *p* is a sequence, the ith - element of the return sequence is the *p*(i)-th percentile of *x*. - If *p* is a scalar, the largest value of *x* less than or equal to - the *p* percentage point in the sequence is returned. - """ + reader = csv.reader(fh, delimiter=delimiter) - # This implementation derived from scipy.stats.scoreatpercentile - def _interpolate(a, b, fraction): - """Returns the point at the given fraction between a and b, where - 'fraction' must be between 0 and 1. - """ - return a + (b - a) * fraction - - per = np.array(p) - values = np.sort(x, axis=None) - - idxs = per / 100 * (values.shape[0] - 1) - ai = idxs.astype(int) - bi = ai + 1 - frac = idxs % 1 - - # handle cases where attempting to interpolate past last index - cond = bi >= len(values) - if per.ndim: - ai[cond] -= 1 - bi[cond] -= 1 - frac[cond] += 1 - else: - if cond: - ai -= 1 - bi -= 1 - frac += 1 + def process_skiprows(reader): + if skiprows: + for i, row in enumerate(reader): + if i >= (skiprows-1): + break - return _interpolate(values[ai], values[bi], frac) + return fh, reader + process_skiprows(reader) -@cbook.deprecated('2.2') -def prctile_rank(x, p): - """ - Return the rank for each element in *x*, return the rank - 0..len(*p*). e.g., if *p* = (25, 50, 75), the return value will be a - len(*x*) array with values in [0,1,2,3] where 0 indicates the - value is less than the 25th percentile, 1 indicates the value is - >= the 25th and < 50th percentile, ... and 3 indicates the value - is above the 75th percentile cutoff. - - *p* is either an array of percentiles in [0..100] or a scalar which - indicates how many quantiles of data you want ranked. - """ + def ismissing(name, val): + "Should the value val in column name be masked?" + return val == missing or val == missingd.get(name) or val == '' - if not np.iterable(p): - p = np.arange(100.0/p, 100.0, 100.0/p) - else: - p = np.asarray(p) + def with_default_value(func, default): + def newfunc(name, val): + if ismissing(name, val): + return default + else: + return func(val) + return newfunc - if p.max() <= 1 or p.min() < 0 or p.max() > 100: - raise ValueError('percentiles should be in range 0..100, not 0..1') + def mybool(x): + if x == 'True': + return True + elif x == 'False': + return False + else: + raise ValueError('invalid bool') - ptiles = prctile(x, p) - return np.searchsorted(ptiles, x) + dateparser = dateutil.parser.parse + def mydateparser(x): + # try and return a datetime object + d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) + return d -@cbook.deprecated('2.2') -def center_matrix(M, dim=0): - """ - Return the matrix *M* with each row having zero mean and unit std. + mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) - If *dim* = 1 operate on columns instead of rows. (*dim* is - opposite to the numpy axis kwarg.) - """ - M = np.asarray(M, float) - if dim: - M = (M - M.mean(axis=0)) / M.std(axis=0) - else: - M = (M - M.mean(axis=1)[:, np.newaxis]) - M = M / M.std(axis=1)[:, np.newaxis] - return M + myfloat = with_default_value(float, np.nan) + myint = with_default_value(int, -1) + mystr = with_default_value(str, '') + mybool = with_default_value(mybool, None) + def mydate(x): + # try and return a date object + d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) -@cbook.deprecated('2.2', 'scipy.integrate.ode') -def rk4(derivs, y0, t): - """ - Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta. - This is a toy implementation which may be useful if you find - yourself stranded on a system w/o scipy. Otherwise use - :func:`scipy.integrate`. + if d.hour > 0 or d.minute > 0 or d.second > 0: + raise ValueError('not a date') + return d.date() + mydate = with_default_value(mydate, datetime.date(1, 1, 1)) - Parameters - ---------- - y0 - initial state vector + def get_func(name, item, func): + # promote functions in this order + funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] + for func in funcs[funcs.index(func):]: + try: + func(name, item) + except Exception: + continue + return func + raise ValueError('Could not find a working conversion function') - t - sample times + # map column names that clash with builtins -- TODO - extend this list + itemd = { + 'return': 'return_', + 'file': 'file_', + 'print': 'print_', + } - derivs - returns the derivative of the system and has the - signature ``dy = derivs(yi, ti)`` + def get_converters(reader, comments): - Examples - -------- + converters = None + i = 0 + for row in reader: + if (len(row) and comments is not None and + row[0].startswith(comments)): + continue + if i == 0: + converters = [mybool]*len(row) + if checkrows and i > checkrows: + break + i += 1 - A 2D system:: + for j, (name, item) in enumerate(zip(names, row)): + func = converterd.get(j) + if func is None: + func = converterd.get(name) + if func is None: + func = converters[j] + if len(item.strip()): + func = get_func(name, item, func) + else: + # how should we handle custom converters and defaults? + func = with_default_value(func, None) + converters[j] = func + return converters - def derivs6(x,t): - d1 = x[0] + 2*x[1] - d2 = -3*x[0] + 4*x[1] - return (d1, d2) - dt = 0.0005 - t = arange(0.0, 2.0, dt) - y0 = (1,2) - yout = rk4(derivs6, y0, t) + # Get header and remove invalid characters + needheader = names is None - A 1D system:: + if needheader: + for row in reader: + if (len(row) and comments is not None and + row[0].startswith(comments)): + continue + headers = row + break - alpha = 2 - def derivs(x,t): - return -alpha*x + exp(-t) + # remove these chars + delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") + delete.add('"') - y0 = 1 - yout = rk4(derivs, y0, t) + names = [] + seen = dict() + for i, item in enumerate(headers): + item = item.strip().lower().replace(' ', '_') + item = ''.join([c for c in item if c not in delete]) + if not len(item): + item = 'column%d' % i - If you have access to scipy, you should probably be using the - scipy.integrate tools rather than this function. - """ + item = itemd.get(item, item) + cnt = seen.get(item, 0) + if cnt > 0: + names.append(item + '_%d' % cnt) + else: + names.append(item) + seen[item] = cnt+1 - try: - Ny = len(y0) - except TypeError: - yout = np.zeros((len(t),), float) else: - yout = np.zeros((len(t), Ny), float) - - yout[0] = y0 - - for i in np.arange(len(t)-1): - - thist = t[i] - dt = t[i+1] - thist - dt2 = dt/2.0 - y0 = yout[i] - - k1 = np.asarray(derivs(y0, thist)) - k2 = np.asarray(derivs(y0 + dt2*k1, thist+dt2)) - k3 = np.asarray(derivs(y0 + dt2*k2, thist+dt2)) - k4 = np.asarray(derivs(y0 + dt*k3, thist+dt)) - yout[i+1] = y0 + dt/6.0*(k1 + 2*k2 + 2*k3 + k4) - return yout + if isinstance(names, str): + names = [n.strip() for n in names.split(',')] + # get the converter functions by inspecting checkrows + converters = get_converters(reader, comments) + if converters is None: + raise ValueError('Could not find any valid data in CSV file') -@cbook.deprecated('2.2') -def bivariate_normal(X, Y, sigmax=1.0, sigmay=1.0, - mux=0.0, muy=0.0, sigmaxy=0.0): - """ - Bivariate Gaussian distribution for equal shape *X*, *Y*. - - See `bivariate normal - `_ - at mathworld. - """ - Xmu = X-mux - Ymu = Y-muy - - rho = sigmaxy/(sigmax*sigmay) - z = Xmu**2/sigmax**2 + Ymu**2/sigmay**2 - 2*rho*Xmu*Ymu/(sigmax*sigmay) - denom = 2*np.pi*sigmax*sigmay*np.sqrt(1-rho**2) - return np.exp(-z/(2*(1-rho**2))) / denom - - -@cbook.deprecated('2.2') -def get_xyz_where(Z, Cond): - """ - *Z* and *Cond* are *M* x *N* matrices. *Z* are data and *Cond* is - a boolean matrix where some condition is satisfied. Return value - is (*x*, *y*, *z*) where *x* and *y* are the indices into *Z* and - *z* are the values of *Z* at those indices. *x*, *y*, and *z* are - 1D arrays. - """ - X, Y = np.indices(Z.shape) - return X[Cond], Y[Cond], Z[Cond] - - -@cbook.deprecated('2.2') -def get_sparse_matrix(M, N, frac=0.1): - """ - Return a *M* x *N* sparse matrix with *frac* elements randomly - filled. - """ - data = np.zeros((M, N))*0. - for i in range(int(M*N*frac)): - x = np.random.randint(0, M-1) - y = np.random.randint(0, N-1) - data[x, y] = np.random.rand() - return data - - -@cbook.deprecated('2.2', 'numpy.hypot') -def dist(x, y): - """ - Return the distance between two points. - """ - d = x-y - return np.sqrt(np.dot(d, d)) - - -@cbook.deprecated('2.2') -def dist_point_to_segment(p, s0, s1): - """ - Get the distance of a point to a segment. - - *p*, *s0*, *s1* are *xy* sequences - - This algorithm from - http://geomalgorithms.com/a02-_lines.html - """ - p = np.asarray(p, float) - s0 = np.asarray(s0, float) - s1 = np.asarray(s1, float) - v = s1 - s0 - w = p - s0 - - c1 = np.dot(w, v) - if c1 <= 0: - return dist(p, s0) - - c2 = np.dot(v, v) - if c2 <= c1: - return dist(p, s1) - - b = c1 / c2 - pb = s0 + b * v - return dist(p, pb) - - -@cbook.deprecated('2.2') -def segments_intersect(s1, s2): - """ - Return *True* if *s1* and *s2* intersect. - *s1* and *s2* are defined as:: - - s1: (x1, y1), (x2, y2) - s2: (x3, y3), (x4, y4) - """ - (x1, y1), (x2, y2) = s1 - (x3, y3), (x4, y4) = s2 - - den = ((y4-y3) * (x2-x1)) - ((x4-x3)*(y2-y1)) - - n1 = ((x4-x3) * (y1-y3)) - ((y4-y3)*(x1-x3)) - n2 = ((x2-x1) * (y1-y3)) - ((y2-y1)*(x1-x3)) - - if den == 0: - # lines parallel - return False - - u1 = n1/den - u2 = n2/den - - return 0.0 <= u1 <= 1.0 and 0.0 <= u2 <= 1.0 - - -@cbook.deprecated('2.2') -def fftsurr(x, detrend=detrend_none, window=window_none): - """ - Compute an FFT phase randomized surrogate of *x*. - """ - if np.iterable(window): - x = window*detrend(x) - else: - x = window(detrend(x)) - z = np.fft.fft(x) - a = 2.*np.pi*1j - phase = a * np.random.rand(len(x)) - z = z*np.exp(phase) - return np.fft.ifft(z).real - - -@cbook.deprecated('2.2') -def movavg(x, n): - """ - Compute the len(*n*) moving average of *x*. - """ - w = np.empty((n,), dtype=float) - w[:] = 1.0/n - return np.convolve(x, w, mode='valid') - - -# the following code was written and submitted by Fernando Perez -# from the ipython numutils package under a BSD license -# begin fperez functions - -""" -A set of convenient utilities for numerical work. - -Most of this module requires numpy or is meant to be used with it. - -Copyright (c) 2001-2004, Fernando Perez. -All rights reserved. - -This license was generated from the BSD license template as found in: -http://www.opensource.org/licenses/bsd-license.php - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the IPython project nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" - - -# ***************************************************************************** -# Globals -# **************************************************************************** -# function definitions -exp_safe_MIN = math.log(2.2250738585072014e-308) -exp_safe_MAX = 1.7976931348623157e+308 - - -@cbook.deprecated("2.2", 'numpy.exp') -def exp_safe(x): - """ - Compute exponentials which safely underflow to zero. - - Slow, but convenient to use. Note that numpy provides proper - floating point exception handling with access to the underlying - hardware. - """ - - if type(x) is np.ndarray: - return np.exp(np.clip(x, exp_safe_MIN, exp_safe_MAX)) - else: - return math.exp(x) - - -@cbook.deprecated("2.2", alternative='numpy.array(list(map(...)))') -def amap(fn, *args): - """ - amap(function, sequence[, sequence, ...]) -> array. - - Works like :func:`map`, but it returns an array. This is just a - convenient shorthand for ``numpy.array(map(...))``. - """ - return np.array(list(map(fn, *args))) - - -@cbook.deprecated("2.2") -def rms_flat(a): - """ - Return the root mean square of all the elements of *a*, flattened out. - """ - return np.sqrt(np.mean(np.abs(a) ** 2)) - - -@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a, ord=1)') -def l1norm(a): - """ - Return the *l1* norm of *a*, flattened out. - - Implemented as a separate function (not a call to :func:`norm` for speed). - """ - return np.sum(np.abs(a)) - - -@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a, ord=2)') -def l2norm(a): - """ - Return the *l2* norm of *a*, flattened out. - - Implemented as a separate function (not a call to :func:`norm` for speed). - """ - return np.sqrt(np.sum(np.abs(a) ** 2)) - - -@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a.flat, ord=p)') -def norm_flat(a, p=2): - """ - norm(a,p=2) -> l-p norm of a.flat - - Return the l-p norm of *a*, considered as a flat array. This is NOT a true - matrix norm, since arrays of arbitrary rank are always flattened. - - *p* can be a number or the string 'Infinity' to get the L-infinity norm. - """ - # This function was being masked by a more general norm later in - # the file. We may want to simply delete it. - if p == 'Infinity': - return np.max(np.abs(a)) - else: - return np.sum(np.abs(a) ** p) ** (1 / p) - - -@cbook.deprecated("2.2", 'numpy.arange') -def frange(xini, xfin=None, delta=None, **kw): - """ - frange([start,] stop[, step, keywords]) -> array of floats - - Return a numpy ndarray containing a progression of floats. Similar to - :func:`numpy.arange`, but defaults to a closed interval. - - ``frange(x0, x1)`` returns ``[x0, x0+1, x0+2, ..., x1]``; *start* - defaults to 0, and the endpoint *is included*. This behavior is - different from that of :func:`range` and - :func:`numpy.arange`. This is deliberate, since :func:`frange` - will probably be more useful for generating lists of points for - function evaluation, and endpoints are often desired in this - use. The usual behavior of :func:`range` can be obtained by - setting the keyword *closed* = 0, in this case, :func:`frange` - basically becomes :func:numpy.arange`. - - When *step* is given, it specifies the increment (or - decrement). All arguments can be floating point numbers. - - ``frange(x0,x1,d)`` returns ``[x0,x0+d,x0+2d,...,xfin]`` where - *xfin* <= *x1*. - - :func:`frange` can also be called with the keyword *npts*. This - sets the number of points the list should contain (and overrides - the value *step* might have been given). :func:`numpy.arange` - doesn't offer this option. - - Examples:: - - >>> frange(3) - array([ 0., 1., 2., 3.]) - >>> frange(3,closed=0) - array([ 0., 1., 2.]) - >>> frange(1,6,2) - array([1, 3, 5]) or 1,3,5,7, depending on floating point vagueries - >>> frange(1,6.5,npts=5) - array([ 1. , 2.375, 3.75 , 5.125, 6.5 ]) - """ - - # defaults - kw.setdefault('closed', 1) - endpoint = kw['closed'] != 0 - - # funny logic to allow the *first* argument to be optional (like range()) - # This was modified with a simpler version from a similar frange() found - # at http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66472 - if xfin is None: - xfin = xini + 0.0 - xini = 0.0 - - if delta is None: - delta = 1.0 - - # compute # of points, spacing and return final list - try: - npts = kw['npts'] - delta = (xfin-xini) / (npts-endpoint) - except KeyError: - npts = int(np.round((xfin-xini)/delta)) + endpoint - # round finds the nearest, so the endpoint can be up to - # delta/2 larger than xfin. - - return np.arange(npts)*delta+xini -# end frange() - - -@cbook.deprecated("2.2", 'numpy.identity') -def identity(n, rank=2, dtype='l', typecode=None): - """ - Returns the identity matrix of shape (*n*, *n*, ..., *n*) (rank *r*). - - For ranks higher than 2, this object is simply a multi-index Kronecker - delta:: - - / 1 if i0=i1=...=iR, - id[i0,i1,...,iR] = -| - \\ 0 otherwise. - - Optionally a *dtype* (or typecode) may be given (it defaults to 'l'). - - Since rank defaults to 2, this function behaves in the default case (when - only *n* is given) like ``numpy.identity(n)`` -- but surprisingly, it is - much faster. - """ - if typecode is not None: - dtype = typecode - iden = np.zeros((n,)*rank, dtype) - for i in range(n): - idx = (i,)*rank - iden[idx] = 1 - return iden - - -@cbook.deprecated("2.2") -def base_repr(number, base=2, padding=0): - """ - Return the representation of a *number* in any given *base*. - """ - chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' - if number < base: - return (padding - 1) * chars[0] + chars[int(number)] - max_exponent = int(math.log(number)/math.log(base)) - max_power = int(base) ** max_exponent - lead_digit = int(number/max_power) - return (chars[lead_digit] + - base_repr(number - max_power * lead_digit, base, - max(padding - 1, max_exponent))) - - -@cbook.deprecated("2.2") -def binary_repr(number, max_length=1025): - """ - Return the binary representation of the input *number* as a - string. - - This is more efficient than using :func:`base_repr` with base 2. - - Increase the value of max_length for very large numbers. Note that - on 32-bit machines, 2**1023 is the largest integer power of 2 - which can be converted to a Python float. - """ - -# assert number < 2L << max_length - shifts = map(operator.rshift, max_length * [number], - range(max_length - 1, -1, -1)) - digits = list(map(operator.mod, shifts, max_length * [2])) - if not digits.count(1): - return 0 - digits = digits[digits.index(1):] - return ''.join(map(repr, digits)).replace('L', '') - - -@cbook.deprecated("2.2", 'numpy.log2') -def log2(x, ln2=math.log(2.0)): - """ - Return the log(*x*) in base 2. - - This is a _slow_ function but which is guaranteed to return the correct - integer value if the input is an integer exact power of 2. - """ - try: - bin_n = binary_repr(x)[1:] - except (AssertionError, TypeError): - return math.log(x)/ln2 - else: - if '1' in bin_n: - return math.log(x)/ln2 - else: - return len(bin_n) - - -@cbook.deprecated("2.2") -def ispower2(n): - """ - Returns the log base 2 of *n* if *n* is a power of 2, zero otherwise. - - Note the potential ambiguity if *n* == 1: 2**0 == 1, interpret accordingly. - """ - - bin_n = binary_repr(n)[1:] - if '1' in bin_n: - return 0 - else: - return len(bin_n) - - -@cbook.deprecated("2.2") -def isvector(X): - """ - Like the MATLAB function with the same name, returns *True* - if the supplied numpy array or matrix *X* looks like a vector, - meaning it has a one non-singleton axis (i.e., it can have - multiple axes, but all must have length 1, except for one of - them). - - If you just want to see if the array has 1 axis, use X.ndim == 1. - """ - return np.prod(X.shape) == np.max(X.shape) - -# end fperez numutils code - - -# helpers for loading, saving, manipulating and viewing numpy record arrays -@cbook.deprecated("2.2", 'numpy.isnan') -def safe_isnan(x): - ':func:`numpy.isnan` for arbitrary types' - if isinstance(x, str): - return False - try: - b = np.isnan(x) - except NotImplementedError: - return False - except TypeError: - return False - else: - return b - - -@cbook.deprecated("2.2", 'numpy.isinf') -def safe_isinf(x): - ':func:`numpy.isinf` for arbitrary types' - if isinstance(x, str): - return False - try: - b = np.isinf(x) - except NotImplementedError: - return False - except TypeError: - return False - else: - return b - - -@cbook.deprecated("2.2") -def rec_append_fields(rec, names, arrs, dtypes=None): - """ - Return a new record array with field names populated with data - from arrays in *arrs*. If appending a single field, then *names*, - *arrs* and *dtypes* do not have to be lists. They can just be the - values themselves. - """ - if (not isinstance(names, str) and np.iterable(names) - and len(names) and isinstance(names[0], str)): - if len(names) != len(arrs): - raise ValueError("number of arrays do not match number of names") - else: # we have only 1 name and 1 array - names = [names] - arrs = [arrs] - arrs = list(map(np.asarray, arrs)) - if dtypes is None: - dtypes = [a.dtype for a in arrs] - elif not np.iterable(dtypes): - dtypes = [dtypes] - if len(arrs) != len(dtypes): - if len(dtypes) == 1: - dtypes = dtypes * len(arrs) - else: - raise ValueError("dtypes must be None, a single dtype or a list") - old_dtypes = rec.dtype.descr - newdtype = np.dtype(old_dtypes + list(zip(names, dtypes))) - newrec = np.recarray(rec.shape, dtype=newdtype) - for field in rec.dtype.fields: - newrec[field] = rec[field] - for name, arr in zip(names, arrs): - newrec[name] = arr - return newrec - - -@cbook.deprecated("2.2") -def rec_drop_fields(rec, names): - """ - Return a new numpy record array with fields in *names* dropped. - """ - - names = set(names) - - newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names - if name not in names]) - - newrec = np.recarray(rec.shape, dtype=newdtype) - for field in newdtype.names: - newrec[field] = rec[field] - - return newrec - - -@cbook.deprecated("2.2") -def rec_keep_fields(rec, names): - """ - Return a new numpy record array with only fields listed in names - """ - - if isinstance(names, str): - names = names.split(',') - - arrays = [] - for name in names: - arrays.append(rec[name]) - - return np.rec.fromarrays(arrays, names=names) - - -@cbook.deprecated("2.2") -def rec_groupby(r, groupby, stats): - """ - *r* is a numpy record array - - *groupby* is a sequence of record array attribute names that - together form the grouping key. e.g., ('date', 'productcode') - - *stats* is a sequence of (*attr*, *func*, *outname*) tuples which - will call ``x = func(attr)`` and assign *x* to the record array - output with attribute *outname*. For example:: - - stats = ( ('sales', len, 'numsales'), ('sales', np.mean, 'avgsale') ) - - Return record array has *dtype* names for each attribute name in - the *groupby* argument, with the associated group values, and - for each outname name in the *stats* argument, with the associated - stat summary output. - """ - # build a dictionary from groupby keys-> list of indices into r with - # those keys - rowd = {} - for i, row in enumerate(r): - key = tuple([row[attr] for attr in groupby]) - rowd.setdefault(key, []).append(i) - - rows = [] - # sort the output by groupby keys - for key in sorted(rowd): - row = list(key) - # get the indices for this groupby key - ind = rowd[key] - thisr = r[ind] - # call each stat function for this groupby slice - row.extend([func(thisr[attr]) for attr, func, outname in stats]) - rows.append(row) - - # build the output record array with groupby and outname attributes - attrs, funcs, outnames = list(zip(*stats)) - names = list(groupby) - names.extend(outnames) - return np.rec.fromrecords(rows, names=names) - - -@cbook.deprecated("2.2") -def rec_summarize(r, summaryfuncs): - """ - *r* is a numpy record array - - *summaryfuncs* is a list of (*attr*, *func*, *outname*) tuples - which will apply *func* to the array *r*[attr] and assign the - output to a new attribute name *outname*. The returned record - array is identical to *r*, with extra arrays for each element in - *summaryfuncs*. - - """ - - names = list(r.dtype.names) - arrays = [r[name] for name in names] - - for attr, func, outname in summaryfuncs: - names.append(outname) - arrays.append(np.asarray(func(r[attr]))) - - return np.rec.fromarrays(arrays, names=names) - - -@cbook.deprecated("2.2") -def rec_join(key, r1, r2, jointype='inner', defaults=None, r1postfix='1', - r2postfix='2'): - """ - Join record arrays *r1* and *r2* on *key*; *key* is a tuple of - field names -- if *key* is a string it is assumed to be a single - attribute name. If *r1* and *r2* have equal values on all the keys - in the *key* tuple, then their fields will be merged into a new - record array containing the intersection of the fields of *r1* and - *r2*. - - *r1* (also *r2*) must not have any duplicate keys. - - The *jointype* keyword can be 'inner', 'outer', 'leftouter'. To - do a rightouter join just reverse *r1* and *r2*. - - The *defaults* keyword is a dictionary filled with - ``{column_name:default_value}`` pairs. - - The keywords *r1postfix* and *r2postfix* are postfixed to column names - (other than keys) that are both in *r1* and *r2*. - """ - - if isinstance(key, str): - key = (key, ) - - for name in key: - if name not in r1.dtype.names: - raise ValueError('r1 does not have key field %s' % name) - if name not in r2.dtype.names: - raise ValueError('r2 does not have key field %s' % name) - - def makekey(row): - return tuple([row[name] for name in key]) - - r1d = {makekey(row): i for i, row in enumerate(r1)} - r2d = {makekey(row): i for i, row in enumerate(r2)} - - r1keys = set(r1d) - r2keys = set(r2d) - - common_keys = r1keys & r2keys - - r1ind = np.array([r1d[k] for k in common_keys]) - r2ind = np.array([r2d[k] for k in common_keys]) - - common_len = len(common_keys) - left_len = right_len = 0 - if jointype == "outer" or jointype == "leftouter": - left_keys = r1keys.difference(r2keys) - left_ind = np.array([r1d[k] for k in left_keys]) - left_len = len(left_ind) - if jointype == "outer": - right_keys = r2keys.difference(r1keys) - right_ind = np.array([r2d[k] for k in right_keys]) - right_len = len(right_ind) - - def key_desc(name): - ''' - if name is a string key, use the larger size of r1 or r2 before - merging - ''' - dt1 = r1.dtype[name] - if dt1.type != np.string_: - return (name, dt1.descr[0][1]) - - dt2 = r2.dtype[name] - if dt1 != dt2: - raise ValueError("The '{}' fields in arrays 'r1' and 'r2' must " - "have the same dtype".format(name)) - if dt1.num > dt2.num: - return (name, dt1.descr[0][1]) - else: - return (name, dt2.descr[0][1]) - - keydesc = [key_desc(name) for name in key] - - def mapped_r1field(name): - """ - The column name in *newrec* that corresponds to the column in *r1*. - """ - if name in key or name not in r2.dtype.names: - return name - else: - return name + r1postfix - - def mapped_r2field(name): - """ - The column name in *newrec* that corresponds to the column in *r2*. - """ - if name in key or name not in r1.dtype.names: - return name - else: - return name + r2postfix - - r1desc = [(mapped_r1field(desc[0]), desc[1]) for desc in r1.dtype.descr - if desc[0] not in key] - r2desc = [(mapped_r2field(desc[0]), desc[1]) for desc in r2.dtype.descr - if desc[0] not in key] - all_dtypes = keydesc + r1desc + r2desc - newdtype = np.dtype(all_dtypes) - newrec = np.recarray((common_len + left_len + right_len,), dtype=newdtype) - - if defaults is not None: - for thiskey in defaults: - if thiskey not in newdtype.names: - warnings.warn('rec_join defaults key="%s" not in new dtype ' - 'names "%s"' % (thiskey, newdtype.names)) - - for name in newdtype.names: - dt = newdtype[name] - if dt.kind in ('f', 'i'): - newrec[name] = 0 - - if jointype != 'inner' and defaults is not None: - # fill in the defaults enmasse - newrec_fields = list(newrec.dtype.fields) - for k, v in defaults.items(): - if k in newrec_fields: - newrec[k] = v - - for field in r1.dtype.names: - newfield = mapped_r1field(field) - if common_len: - newrec[newfield][:common_len] = r1[field][r1ind] - if (jointype == "outer" or jointype == "leftouter") and left_len: - newrec[newfield][common_len:(common_len+left_len)] = ( - r1[field][left_ind] - ) - - for field in r2.dtype.names: - newfield = mapped_r2field(field) - if field not in key and common_len: - newrec[newfield][:common_len] = r2[field][r2ind] - if jointype == "outer" and right_len: - newrec[newfield][-right_len:] = r2[field][right_ind] - - newrec.sort(order=key) - - return newrec - - -@cbook.deprecated("2.2") -def recs_join(key, name, recs, jointype='outer', missing=0., postfixes=None): - """ - Join a sequence of record arrays on single column key. - - This function only joins a single column of the multiple record arrays - - *key* - is the column name that acts as a key - - *name* - is the name of the column that we want to join - - *recs* - is a list of record arrays to join - - *jointype* - is a string 'inner' or 'outer' - - *missing* - is what any missing field is replaced by - - *postfixes* - if not None, a len recs sequence of postfixes - - returns a record array with columns [rowkey, name0, name1, ... namen-1]. - or if postfixes [PF0, PF1, ..., PFN-1] are supplied, - [rowkey, namePF0, namePF1, ... namePFN-1]. - - Example:: - - r = recs_join("date", "close", recs=[r0, r1], missing=0.) - - """ - results = [] - aligned_iters = cbook.align_iterators(operator.attrgetter(key), - *[iter(r) for r in recs]) - - def extract(r): - if r is None: - return missing - else: - return r[name] - - if jointype == "outer": - for rowkey, row in aligned_iters: - results.append([rowkey] + list(map(extract, row))) - elif jointype == "inner": - for rowkey, row in aligned_iters: - if None not in row: # throw out any Nones - results.append([rowkey] + list(map(extract, row))) - - if postfixes is None: - postfixes = ['%d' % i for i in range(len(recs))] - names = ",".join([key] + ["%s%s" % (name, postfix) - for postfix in postfixes]) - return np.rec.fromrecords(results, names=names) - - -@cbook.deprecated("2.2") -def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',', - converterd=None, names=None, missing='', missingd=None, - use_mrecords=False, dayfirst=False, yearfirst=False): - """ - Load data from comma/space/tab delimited file in *fname* into a - numpy record array and return the record array. - - If *names* is *None*, a header row is required to automatically - assign the recarray names. The headers will be lower cased, - spaces will be converted to underscores, and illegal attribute - name characters removed. If *names* is not *None*, it is a - sequence of names to use for the column names. In this case, it - is assumed there is no header row. - - - - *fname*: can be a filename or a file handle. Support for gzipped - files is automatic, if the filename ends in '.gz' - - - *comments*: the character used to indicate the start of a comment - in the file, or *None* to switch off the removal of comments - - - *skiprows*: is the number of rows from the top to skip - - - *checkrows*: is the number of rows to check to validate the column - data type. When set to zero all rows are validated. - - - *converterd*: if not *None*, is a dictionary mapping column number or - munged column name to a converter function. - - - *names*: if not None, is a list of header names. In this case, no - header will be read from the file - - - *missingd* is a dictionary mapping munged column names to field values - which signify that the field does not contain actual data and should - be masked, e.g., '0000-00-00' or 'unused' - - - *missing*: a string whose value signals a missing field regardless of - the column it appears in - - - *use_mrecords*: if True, return an mrecords.fromrecords record array if - any of the data are missing - - - *dayfirst*: default is False so that MM-DD-YY has precedence over - DD-MM-YY. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - - *yearfirst*: default is False so that MM-DD-YY has precedence over - YY-MM-DD. See - http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47 - for further information. - - If no rows are found, *None* is returned - """ - - if converterd is None: - converterd = dict() - - if missingd is None: - missingd = {} - - import dateutil.parser - import datetime - - fh = cbook.to_filehandle(fname) - - delimiter = str(delimiter) - - class FH: - """ - For space-delimited files, we want different behavior than - comma or tab. Generally, we want multiple spaces to be - treated as a single separator, whereas with comma and tab we - want multiple commas to return multiple (empty) fields. The - join/strip trick below effects this. - """ - def __init__(self, fh): - self.fh = fh - - def close(self): - self.fh.close() - - def seek(self, arg): - self.fh.seek(arg) - - def fix(self, s): - return ' '.join(s.split()) - - def __next__(self): - return self.fix(next(self.fh)) - - def __iter__(self): - for line in self.fh: - yield self.fix(line) - - if delimiter == ' ': - fh = FH(fh) - - reader = csv.reader(fh, delimiter=delimiter) - - def process_skiprows(reader): - if skiprows: - for i, row in enumerate(reader): - if i >= (skiprows-1): - break - - return fh, reader - - process_skiprows(reader) - - def ismissing(name, val): - "Should the value val in column name be masked?" - return val == missing or val == missingd.get(name) or val == '' - - def with_default_value(func, default): - def newfunc(name, val): - if ismissing(name, val): - return default - else: - return func(val) - return newfunc - - def mybool(x): - if x == 'True': - return True - elif x == 'False': - return False - else: - raise ValueError('invalid bool') - - dateparser = dateutil.parser.parse - - def mydateparser(x): - # try and return a datetime object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - return d - - mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1)) - - myfloat = with_default_value(float, np.nan) - myint = with_default_value(int, -1) - mystr = with_default_value(str, '') - mybool = with_default_value(mybool, None) - - def mydate(x): - # try and return a date object - d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst) - - if d.hour > 0 or d.minute > 0 or d.second > 0: - raise ValueError('not a date') - return d.date() - mydate = with_default_value(mydate, datetime.date(1, 1, 1)) - - def get_func(name, item, func): - # promote functions in this order - funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr] - for func in funcs[funcs.index(func):]: - try: - func(name, item) - except Exception: - continue - return func - raise ValueError('Could not find a working conversion function') - - # map column names that clash with builtins -- TODO - extend this list - itemd = { - 'return': 'return_', - 'file': 'file_', - 'print': 'print_', - } - - def get_converters(reader, comments): - - converters = None - i = 0 - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - if i == 0: - converters = [mybool]*len(row) - if checkrows and i > checkrows: - break - i += 1 - - for j, (name, item) in enumerate(zip(names, row)): - func = converterd.get(j) - if func is None: - func = converterd.get(name) - if func is None: - func = converters[j] - if len(item.strip()): - func = get_func(name, item, func) - else: - # how should we handle custom converters and defaults? - func = with_default_value(func, None) - converters[j] = func - return converters - - # Get header and remove invalid characters - needheader = names is None - - if needheader: - for row in reader: - if (len(row) and comments is not None and - row[0].startswith(comments)): - continue - headers = row - break - - # remove these chars - delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""") - delete.add('"') - - names = [] - seen = dict() - for i, item in enumerate(headers): - item = item.strip().lower().replace(' ', '_') - item = ''.join([c for c in item if c not in delete]) - if not len(item): - item = 'column%d' % i - - item = itemd.get(item, item) - cnt = seen.get(item, 0) - if cnt > 0: - names.append(item + '_%d' % cnt) - else: - names.append(item) - seen[item] = cnt+1 - - else: - if isinstance(names, str): - names = [n.strip() for n in names.split(',')] - - # get the converter functions by inspecting checkrows - converters = get_converters(reader, comments) - if converters is None: - raise ValueError('Could not find any valid data in CSV file') - - # reset the reader and start over - fh.seek(0) - reader = csv.reader(fh, delimiter=delimiter) - process_skiprows(reader) + # reset the reader and start over + fh.seek(0) + reader = csv.reader(fh, delimiter=delimiter) + process_skiprows(reader) if needheader: while True: @@ -2828,664 +1467,6 @@ def get_converters(reader, comments): return r -# a series of classes for describing the format intentions of various rec views -@cbook.deprecated("2.2") -class FormatObj(object): - def tostr(self, x): - return self.toval(x) - - def toval(self, x): - return str(x) - - def fromstr(self, s): - return s - - def __hash__(self): - """ - override the hash function of any of the formatters, so that we don't - create duplicate excel format styles - """ - return hash(self.__class__) - - -@cbook.deprecated("2.2") -class FormatString(FormatObj): - def tostr(self, x): - val = repr(x) - return val[1:-1] - - -@cbook.deprecated("2.2") -class FormatFormatStr(FormatObj): - def __init__(self, fmt): - self.fmt = fmt - - def tostr(self, x): - if x is None: - return 'None' - return self.fmt % self.toval(x) - - -@cbook.deprecated("2.2") -class FormatFloat(FormatFormatStr): - def __init__(self, precision=4, scale=1.): - FormatFormatStr.__init__(self, '%%1.%df' % precision) - self.precision = precision - self.scale = scale - - def __hash__(self): - return hash((self.__class__, self.precision, self.scale)) - - def toval(self, x): - if x is not None: - x = x * self.scale - return x - - def fromstr(self, s): - return float(s)/self.scale - - -@cbook.deprecated("2.2") -class FormatInt(FormatObj): - - def tostr(self, x): - return '%d' % int(x) - - def toval(self, x): - return int(x) - - def fromstr(self, s): - return int(s) - - -@cbook.deprecated("2.2") -class FormatBool(FormatObj): - def toval(self, x): - return str(x) - - def fromstr(self, s): - return bool(s) - - -@cbook.deprecated("2.2") -class FormatPercent(FormatFloat): - def __init__(self, precision=4): - FormatFloat.__init__(self, precision, scale=100.) - - -@cbook.deprecated("2.2") -class FormatThousands(FormatFloat): - def __init__(self, precision=4): - FormatFloat.__init__(self, precision, scale=1e-3) - - -@cbook.deprecated("2.2") -class FormatMillions(FormatFloat): - def __init__(self, precision=4): - FormatFloat.__init__(self, precision, scale=1e-6) - - -@cbook.deprecated("2.2", alternative='date.strftime') -class FormatDate(FormatObj): - def __init__(self, fmt): - self.fmt = fmt - - def __hash__(self): - return hash((self.__class__, self.fmt)) - - def toval(self, x): - if x is None: - return 'None' - return x.strftime(self.fmt) - - def fromstr(self, x): - import dateutil.parser - return dateutil.parser.parse(x).date() - - -@cbook.deprecated("2.2", alternative='datetime.strftime') -class FormatDatetime(FormatDate): - def __init__(self, fmt='%Y-%m-%d %H:%M:%S'): - FormatDate.__init__(self, fmt) - - def fromstr(self, x): - import dateutil.parser - return dateutil.parser.parse(x) - - -@cbook.deprecated("2.2") -def get_formatd(r, formatd=None): - 'build a formatd guaranteed to have a key for every dtype name' - defaultformatd = { - np.bool_: FormatBool(), - np.int16: FormatInt(), - np.int32: FormatInt(), - np.int64: FormatInt(), - np.float32: FormatFloat(), - np.float64: FormatFloat(), - np.object_: FormatObj(), - np.string_: FormatString()} - - if formatd is None: - formatd = dict() - - for i, name in enumerate(r.dtype.names): - dt = r.dtype[name] - format = formatd.get(name) - if format is None: - format = defaultformatd.get(dt.type, FormatObj()) - formatd[name] = format - return formatd - - -@cbook.deprecated("2.2") -def csvformat_factory(format): - format = copy.deepcopy(format) - if isinstance(format, FormatFloat): - format.scale = 1. # override scaling for storage - format.fmt = '%r' - return format - - -@cbook.deprecated("2.2", alternative='numpy.recarray.tofile') -def rec2txt(r, header=None, padding=3, precision=3, fields=None): - """ - Returns a textual representation of a record array. - - Parameters - ---------- - r: numpy recarray - - header: list - column headers - - padding: - space between each column - - precision: number of decimal places to use for floats. - Set to an integer to apply to all floats. Set to a - list of integers to apply precision individually. - Precision for non-floats is simply ignored. - - fields : list - If not None, a list of field names to print. fields - can be a list of strings like ['field1', 'field2'] or a single - comma separated string like 'field1,field2' - - Examples - -------- - - For ``precision=[0,2,3]``, the output is :: - - ID Price Return - ABC 12.54 0.234 - XYZ 6.32 -0.076 - """ - - if fields is not None: - r = rec_keep_fields(r, fields) - - if cbook.is_numlike(precision): - precision = [precision]*len(r.dtype) - - def get_type(item, atype=int): - tdict = {None: int, int: float, float: str} - try: - atype(str(item)) - except Exception: - return get_type(item, tdict[atype]) - return atype - - def get_justify(colname, column, precision): - ntype = column.dtype - - if np.issubdtype(ntype, np.character): - fixed_width = int(ntype.str[2:]) - length = max(len(colname), fixed_width) - return 0, length+padding, "%s" # left justify - - if np.issubdtype(ntype, np.integer): - length = max(len(colname), - np.max(list(map(len, list(map(str, column)))))) - return 1, length+padding, "%d" # right justify - - if np.issubdtype(ntype, np.floating): - fmt = "%." + str(precision) + "f" - length = max( - len(colname), - np.max(list(map(len, list(map(lambda x: fmt % x, column))))) - ) - return 1, length+padding, fmt # right justify - - return (0, - max(len(colname), - np.max(list(map(len, list(map(str, column))))))+padding, - "%s") - - if header is None: - header = r.dtype.names - - justify_pad_prec = [get_justify(header[i], r.__getitem__(colname), - precision[i]) - for i, colname in enumerate(r.dtype.names)] - - justify_pad_prec_spacer = [] - for i in range(len(justify_pad_prec)): - just, pad, prec = justify_pad_prec[i] - if i == 0: - justify_pad_prec_spacer.append((just, pad, prec, 0)) - else: - pjust, ppad, pprec = justify_pad_prec[i-1] - if pjust == 0 and just == 1: - justify_pad_prec_spacer.append((just, pad-padding, prec, 0)) - elif pjust == 1 and just == 0: - justify_pad_prec_spacer.append((just, pad, prec, padding)) - else: - justify_pad_prec_spacer.append((just, pad, prec, 0)) - - def format(item, just_pad_prec_spacer): - just, pad, prec, spacer = just_pad_prec_spacer - if just == 0: - return spacer*' ' + str(item).ljust(pad) - else: - if get_type(item) == float: - item = (prec % float(item)) - elif get_type(item) == int: - item = (prec % int(item)) - - return item.rjust(pad) - - textl = [] - textl.append(''.join([format(colitem, justify_pad_prec_spacer[j]) - for j, colitem in enumerate(header)])) - for i, row in enumerate(r): - textl.append(''.join([format(colitem, justify_pad_prec_spacer[j]) - for j, colitem in enumerate(row)])) - if i == 0: - textl[0] = textl[0].rstrip() - - text = os.linesep.join(textl) - return text - - -@cbook.deprecated("2.2", alternative='numpy.recarray.tofile') -def rec2csv(r, fname, delimiter=',', formatd=None, missing='', - missingd=None, withheader=True): - """ - Save the data from numpy recarray *r* into a - comma-/space-/tab-delimited file. The record array dtype names - will be used for column headers. - - *fname*: can be a filename or a file handle. Support for gzipped - files is automatic, if the filename ends in '.gz' - - *withheader*: if withheader is False, do not write the attribute - names in the first row - - for formatd type FormatFloat, we override the precision to store - full precision floats in the CSV file - - See Also - -------- - :func:`csv2rec` - For information about *missing* and *missingd*, which can be used to - fill in masked values into your CSV file. - """ - - delimiter = str(delimiter) - - if missingd is None: - missingd = dict() - - def with_mask(func): - def newfunc(val, mask, mval): - if mask: - return mval - else: - return func(val) - return newfunc - - if r.ndim != 1: - raise ValueError('rec2csv only operates on 1 dimensional recarrays') - - formatd = get_formatd(r, formatd) - funcs = [] - for i, name in enumerate(r.dtype.names): - funcs.append(with_mask(csvformat_factory(formatd[name]).tostr)) - - fh, opened = cbook.to_filehandle(fname, 'wb', return_opened=True) - writer = csv.writer(fh, delimiter=delimiter) - header = r.dtype.names - if withheader: - writer.writerow(header) - - # Our list of specials for missing values - mvals = [] - for name in header: - mvals.append(missingd.get(name, missing)) - - ismasked = False - if len(r): - row = r[0] - ismasked = hasattr(row, '_fieldmask') - - for row in r: - if ismasked: - row, rowmask = row.item(), row._fieldmask.item() - else: - rowmask = [False] * len(row) - writer.writerow([func(val, mask, mval) for func, val, mask, mval - in zip(funcs, row, rowmask, mvals)]) - if opened: - fh.close() - - -@cbook.deprecated('2.2', alternative='scipy.interpolate.griddata') -def griddata(x, y, z, xi, yi, interp='nn'): - """ - Interpolates from a nonuniformly spaced grid to some other grid. - - Fits a surface of the form z = f(`x`, `y`) to the data in the - (usually) nonuniformly spaced vectors (`x`, `y`, `z`), then - interpolates this surface at the points specified by - (`xi`, `yi`) to produce `zi`. - - Parameters - ---------- - x, y, z : 1d array_like - Coordinates of grid points to interpolate from. - xi, yi : 1d or 2d array_like - Coordinates of grid points to interpolate to. - interp : string key from {'nn', 'linear'} - Interpolation algorithm, either 'nn' for natural neighbor, or - 'linear' for linear interpolation. - - Returns - ------- - 2d float array - Array of values interpolated at (`xi`, `yi`) points. Array - will be masked is any of (`xi`, `yi`) are outside the convex - hull of (`x`, `y`). - - Notes - ----- - If `interp` is 'nn' (the default), uses natural neighbor - interpolation based on Delaunay triangulation. This option is - only available if the mpl_toolkits.natgrid module is installed. - This can be downloaded from https://github.com/matplotlib/natgrid. - The (`xi`, `yi`) grid must be regular and monotonically increasing - in this case. - - If `interp` is 'linear', linear interpolation is used via - matplotlib.tri.LinearTriInterpolator. - - Instead of using `griddata`, more flexible functionality and other - interpolation options are available using a - matplotlib.tri.Triangulation and a matplotlib.tri.TriInterpolator. - """ - # Check input arguments. - x = np.asanyarray(x, dtype=np.float64) - y = np.asanyarray(y, dtype=np.float64) - z = np.asanyarray(z, dtype=np.float64) - if x.shape != y.shape or x.shape != z.shape or x.ndim != 1: - raise ValueError("x, y and z must be equal-length 1-D arrays") - - xi = np.asanyarray(xi, dtype=np.float64) - yi = np.asanyarray(yi, dtype=np.float64) - if xi.ndim != yi.ndim: - raise ValueError("xi and yi must be arrays with the same number of " - "dimensions (1 or 2)") - if xi.ndim == 2 and xi.shape != yi.shape: - raise ValueError("if xi and yi are 2D arrays, they must have the same " - "shape") - if xi.ndim == 1: - xi, yi = np.meshgrid(xi, yi) - - if interp == 'nn': - use_nn_interpolation = True - elif interp == 'linear': - use_nn_interpolation = False - else: - raise ValueError("interp keyword must be one of 'linear' (for linear " - "interpolation) or 'nn' (for natural neighbor " - "interpolation). Default is 'nn'.") - - # Remove masked points. - mask = np.ma.getmask(z) - if mask is not np.ma.nomask: - x = x.compress(~mask) - y = y.compress(~mask) - z = z.compressed() - - if use_nn_interpolation: - try: - from mpl_toolkits.natgrid import _natgrid - except ImportError: - raise RuntimeError( - "To use interp='nn' (Natural Neighbor interpolation) in " - "griddata, natgrid must be installed. Either install it " - "from http://github.com/matplotlib/natgrid or use " - "interp='linear' instead.") - - if xi.ndim == 2: - # natgrid expects 1D xi and yi arrays. - xi = xi[0, :] - yi = yi[:, 0] - - # Override default natgrid internal parameters. - _natgrid.seti(b'ext', 0) - _natgrid.setr(b'nul', np.nan) - - if np.min(np.diff(xi)) < 0 or np.min(np.diff(yi)) < 0: - raise ValueError("Output grid defined by xi,yi must be monotone " - "increasing") - - # Allocate array for output (buffer will be overwritten by natgridd) - zi = np.empty((yi.shape[0], xi.shape[0]), np.float64) - - # Natgrid requires each array to be contiguous rather than e.g. a view - # that is a non-contiguous slice of another array. Use numpy.require - # to deal with this, which will copy if necessary. - x = np.require(x, requirements=['C']) - y = np.require(y, requirements=['C']) - z = np.require(z, requirements=['C']) - xi = np.require(xi, requirements=['C']) - yi = np.require(yi, requirements=['C']) - _natgrid.natgridd(x, y, z, xi, yi, zi) - - # Mask points on grid outside convex hull of input data. - if np.any(np.isnan(zi)): - zi = np.ma.masked_where(np.isnan(zi), zi) - return zi - else: - # Linear interpolation performed using a matplotlib.tri.Triangulation - # and a matplotlib.tri.LinearTriInterpolator. - from .tri import Triangulation, LinearTriInterpolator - triang = Triangulation(x, y) - interpolator = LinearTriInterpolator(triang, z) - return interpolator(xi, yi) - - -################################################## -# Linear interpolation algorithms -################################################## -@cbook.deprecated("2.2", alternative="numpy.interp") -def less_simple_linear_interpolation(x, y, xi, extrap=False): - """ - This function provides simple (but somewhat less so than - :func:`cbook.simple_linear_interpolation`) linear interpolation. - :func:`simple_linear_interpolation` will give a list of point - between a start and an end, while this does true linear - interpolation at an arbitrary set of points. - - This is very inefficient linear interpolation meant to be used - only for a small number of points in relatively non-intensive use - cases. For real linear interpolation, use scipy. - """ - x = np.asarray(x) - y = np.asarray(y) - xi = np.atleast_1d(xi) - - s = list(y.shape) - s[0] = len(xi) - yi = np.tile(np.nan, s) - - for ii, xx in enumerate(xi): - bb = x == xx - if np.any(bb): - jj, = np.nonzero(bb) - yi[ii] = y[jj[0]] - elif xx < x[0]: - if extrap: - yi[ii] = y[0] - elif xx > x[-1]: - if extrap: - yi[ii] = y[-1] - else: - jj, = np.nonzero(x < xx) - jj = max(jj) - - yi[ii] = y[jj] + (xx-x[jj])/(x[jj+1]-x[jj]) * (y[jj+1]-y[jj]) - - return yi - - -@cbook.deprecated("2.2") -def slopes(x, y): - """ - :func:`slopes` calculates the slope *y*'(*x*) - - The slope is estimated using the slope obtained from that of a - parabola through any three consecutive points. - - This method should be superior to that described in the appendix - of A CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russel - W. Stineman (Creative Computing July 1980) in at least one aspect: - - Circles for interpolation demand a known aspect ratio between - *x*- and *y*-values. For many functions, however, the abscissa - are given in different dimensions, so an aspect ratio is - completely arbitrary. - - The parabola method gives very similar results to the circle - method for most regular cases but behaves much better in special - cases. - - Norbert Nemec, Institute of Theoretical Physics, University or - Regensburg, April 2006 Norbert.Nemec at physik.uni-regensburg.de - - (inspired by a original implementation by Halldor Bjornsson, - Icelandic Meteorological Office, March 2006 halldor at vedur.is) - """ - # Cast key variables as float. - x = np.asarray(x, float) - y = np.asarray(y, float) - - yp = np.zeros(y.shape, float) - - dx = x[1:] - x[:-1] - dy = y[1:] - y[:-1] - dydx = dy/dx - yp[1:-1] = (dydx[:-1] * dx[1:] + dydx[1:] * dx[:-1])/(dx[1:] + dx[:-1]) - yp[0] = 2.0 * dy[0]/dx[0] - yp[1] - yp[-1] = 2.0 * dy[-1]/dx[-1] - yp[-2] - return yp - - -@cbook.deprecated("2.2") -def stineman_interp(xi, x, y, yp=None): - """ - Given data vectors *x* and *y*, the slope vector *yp* and a new - abscissa vector *xi*, the function :func:`stineman_interp` uses - Stineman interpolation to calculate a vector *yi* corresponding to - *xi*. - - Here's an example that generates a coarse sine curve, then - interpolates over a finer abscissa:: - - x = linspace(0,2*pi,20); y = sin(x); yp = cos(x) - xi = linspace(0,2*pi,40); - yi = stineman_interp(xi,x,y,yp); - plot(x,y,'o',xi,yi) - - The interpolation method is described in the article A - CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russell - W. Stineman. The article appeared in the July 1980 issue of - Creative Computing with a note from the editor stating that while - they were: - - not an academic journal but once in a while something serious - and original comes in adding that this was - "apparently a real solution" to a well known problem. - - For *yp* = *None*, the routine automatically determines the slopes - using the :func:`slopes` routine. - - *x* is assumed to be sorted in increasing order. - - For values ``xi[j] < x[0]`` or ``xi[j] > x[-1]``, the routine - tries an extrapolation. The relevance of the data obtained from - this, of course, is questionable... - - Original implementation by Halldor Bjornsson, Icelandic - Meteorolocial Office, March 2006 halldor at vedur.is - - Completely reworked and optimized for Python by Norbert Nemec, - Institute of Theoretical Physics, University or Regensburg, April - 2006 Norbert.Nemec at physik.uni-regensburg.de - """ - - # Cast key variables as float. - x = np.asarray(x, float) - y = np.asarray(y, float) - if x.shape != y.shape: - raise ValueError("'x' and 'y' must be of same shape") - - if yp is None: - yp = slopes(x, y) - else: - yp = np.asarray(yp, float) - - xi = np.asarray(xi, float) - - # calculate linear slopes - dx = x[1:] - x[:-1] - dy = y[1:] - y[:-1] - s = dy/dx # note length of s is N-1 so last element is #N-2 - - # find the segment each xi is in - # this line actually is the key to the efficiency of this implementation - idx = np.searchsorted(x[1:-1], xi) - - # now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1] - # except at the boundaries, where it may be that xi[j] < x[0] or - # xi[j] > x[-1] - - # the y-values that would come out from a linear interpolation: - sidx = s.take(idx) - xidx = x.take(idx) - yidx = y.take(idx) - xidxp1 = x.take(idx+1) - yo = yidx + sidx * (xi - xidx) - - # the difference that comes when using the slopes given in yp - # using the yp slope of the left point - dy1 = (yp.take(idx) - sidx) * (xi - xidx) - # using the yp slope of the right point - dy2 = (yp.take(idx+1)-sidx) * (xi - xidxp1) - - dy1dy2 = dy1*dy2 - # The following is optimized for Python. The solution actually - # does more calculations than necessary but exploiting the power - # of numpy, this is far more efficient than coding a loop by hand - # in Python - yi = yo + dy1dy2 * np.choose(np.array(np.sign(dy1dy2), np.int32)+1, - ((2*xi-xidx-xidxp1)/((dy1-dy2)*(xidxp1-xidx)), - 0.0, - 1/(dy1+dy2),)) - return yi - - class GaussianKDE(object): """ Representation of a kernel-density estimate using Gaussian kernels. @@ -3640,260 +1621,3 @@ def evaluate(self, points): return result __call__ = evaluate - - -################################################## -# Code related to things in and around polygons -################################################## -@cbook.deprecated("2.2") -def inside_poly(points, verts): - """ - *points* is a sequence of *x*, *y* points. - *verts* is a sequence of *x*, *y* vertices of a polygon. - - Return value is a sequence of indices into points for the points - that are inside the polygon. - """ - # Make a closed polygon path - poly = Path(verts) - - # Check to see which points are contained within the Path - return [idx for idx, p in enumerate(points) if poly.contains_point(p)] - - -@cbook.deprecated("2.2") -def poly_below(xmin, xs, ys): - """ - Given a sequence of *xs* and *ys*, return the vertices of a - polygon that has a horizontal base at *xmin* and an upper bound at - the *ys*. *xmin* is a scalar. - - Intended for use with :meth:`matplotlib.axes.Axes.fill`, e.g.,:: - - xv, yv = poly_below(0, x, y) - ax.fill(xv, yv) - """ - if any(isinstance(var, np.ma.MaskedArray) for var in [xs, ys]): - numpy = np.ma - else: - numpy = np - - xs = numpy.asarray(xs) - ys = numpy.asarray(ys) - Nx = len(xs) - Ny = len(ys) - if Nx != Ny: - raise ValueError("'xs' and 'ys' must have the same length") - x = xmin*numpy.ones(2*Nx) - y = numpy.ones(2*Nx) - x[:Nx] = xs - y[:Nx] = ys - y[Nx:] = ys[::-1] - return x, y - - -@cbook.deprecated("2.2") -def poly_between(x, ylower, yupper): - """ - Given a sequence of *x*, *ylower* and *yupper*, return the polygon - that fills the regions between them. *ylower* or *yupper* can be - scalar or iterable. If they are iterable, they must be equal in - length to *x*. - - Return value is *x*, *y* arrays for use with - :meth:`matplotlib.axes.Axes.fill`. - """ - if any(isinstance(var, np.ma.MaskedArray) for var in [ylower, yupper, x]): - numpy = np.ma - else: - numpy = np - - Nx = len(x) - if not np.iterable(ylower): - ylower = ylower*numpy.ones(Nx) - - if not np.iterable(yupper): - yupper = yupper*numpy.ones(Nx) - - x = numpy.concatenate((x, x[::-1])) - y = numpy.concatenate((yupper, ylower[::-1])) - return x, y - - -@cbook.deprecated('2.2') -def is_closed_polygon(X): - """ - Tests whether first and last object in a sequence are the same. These are - presumably coordinates on a polygonal curve, in which case this function - tests if that curve is closed. - """ - return np.all(X[0] == X[-1]) - - -@cbook.deprecated("2.2", message='Moved to matplotlib.cbook') -def contiguous_regions(mask): - """ - return a list of (ind0, ind1) such that mask[ind0:ind1].all() is - True and we cover all such regions - """ - return cbook.contiguous_regions(mask) - - -@cbook.deprecated("2.2") -def cross_from_below(x, threshold): - """ - return the indices into *x* where *x* crosses some threshold from - below, e.g., the i's where:: - - x[i-1]=threshold - - Example code:: - - import matplotlib.pyplot as plt - - t = np.arange(0.0, 2.0, 0.1) - s = np.sin(2*np.pi*t) - - fig, ax = plt.subplots() - ax.plot(t, s, '-o') - ax.axhline(0.5) - ax.axhline(-0.5) - - ind = cross_from_below(s, 0.5) - ax.vlines(t[ind], -1, 1) - - ind = cross_from_above(s, -0.5) - ax.vlines(t[ind], -1, 1) - - plt.show() - - See Also - -------- - :func:`cross_from_above` and :func:`contiguous_regions` - - """ - x = np.asarray(x) - ind = np.nonzero((x[:-1] < threshold) & (x[1:] >= threshold))[0] - if len(ind): - return ind+1 - else: - return ind - - -@cbook.deprecated("2.2") -def cross_from_above(x, threshold): - """ - return the indices into *x* where *x* crosses some threshold from - below, e.g., the i's where:: - - x[i-1]>threshold and x[i]<=threshold - - See Also - -------- - :func:`cross_from_below` and :func:`contiguous_regions` - - """ - x = np.asarray(x) - ind = np.nonzero((x[:-1] >= threshold) & (x[1:] < threshold))[0] - if len(ind): - return ind+1 - else: - return ind - - -################################################## -# Vector and path length geometry calculations -################################################## -@cbook.deprecated('2.2') -def vector_lengths(X, P=2., axis=None): - """ - Finds the length of a set of vectors in *n* dimensions. This is - like the :func:`numpy.norm` function for vectors, but has the ability to - work over a particular axis of the supplied array or matrix. - - Computes ``(sum((x_i)^P))^(1/P)`` for each ``{x_i}`` being the - elements of *X* along the given axis. If *axis* is *None*, - compute over all elements of *X*. - """ - X = np.asarray(X) - return (np.sum(X**(P), axis=axis))**(1./P) - - -@cbook.deprecated('2.2') -def distances_along_curve(X): - """ - Computes the distance between a set of successive points in *N* dimensions. - - Where *X* is an *M* x *N* array or matrix. The distances between - successive rows is computed. Distance is the standard Euclidean - distance. - """ - X = np.diff(X, axis=0) - return vector_lengths(X, axis=1) - - -@cbook.deprecated('2.2') -def path_length(X): - """ - Computes the distance travelled along a polygonal curve in *N* dimensions. - - Where *X* is an *M* x *N* array or matrix. Returns an array of - length *M* consisting of the distance along the curve at each point - (i.e., the rows of *X*). - """ - X = distances_along_curve(X) - return np.concatenate((np.zeros(1), np.cumsum(X))) - - -@cbook.deprecated('2.2') -def quad2cubic(q0x, q0y, q1x, q1y, q2x, q2y): - """ - Converts a quadratic Bezier curve to a cubic approximation. - - The inputs are the *x* and *y* coordinates of the three control - points of a quadratic curve, and the output is a tuple of *x* and - *y* coordinates of the four control points of the cubic curve. - """ - # TODO: Candidate for deprecation -- no longer used internally - - # c0x, c0y = q0x, q0y - c1x, c1y = q0x + 2./3. * (q1x - q0x), q0y + 2./3. * (q1y - q0y) - c2x, c2y = c1x + 1./3. * (q2x - q0x), c1y + 1./3. * (q2y - q0y) - # c3x, c3y = q2x, q2y - return q0x, q0y, c1x, c1y, c2x, c2y, q2x, q2y - - -@cbook.deprecated("2.2") -def offset_line(y, yerr): - """ - Offsets an array *y* by +/- an error and returns a tuple - (y - err, y + err). - - The error term can be: - - * A scalar. In this case, the returned tuple is obvious. - * A vector of the same length as *y*. The quantities y +/- err are computed - component-wise. - * A tuple of length 2. In this case, yerr[0] is the error below *y* and - yerr[1] is error above *y*. For example:: - - import numpy as np - import matplotlib.pyplot as plt - - x = np.linspace(0, 2*np.pi, num=100, endpoint=True) - y = np.sin(x) - y_minus, y_plus = mlab.offset_line(y, 0.1) - plt.plot(x, y) - plt.fill_between(x, y_minus, y2=y_plus) - plt.show() - - """ - if cbook.is_numlike(yerr) or (np.iterable(yerr) and - len(yerr) == len(y)): - ymin = y - yerr - ymax = y + yerr - elif len(yerr) == 2: - ymin, ymax = y - yerr[0], y + yerr[1] - else: - raise ValueError("yerr must be scalar, 1xN or 2xN") - return ymin, ymax diff --git a/lib/matplotlib/pylab.py b/lib/matplotlib/pylab.py index 5c90445bfaec..a8f98cd80c5a 100644 --- a/lib/matplotlib/pylab.py +++ b/lib/matplotlib/pylab.py @@ -154,7 +154,6 @@ _Probability - normpdf - The Gaussian probability density function rand - random numbers from the uniform distribution randn - random numbers from the normal distribution @@ -196,7 +195,6 @@ _Other angle - the angle of a complex array - griddata - interpolate irregularly distributed data to a regular grid load - Deprecated--please use loadtxt. loadtxt - load ASCII data into array. polyfit - fit x, y to an n-th order polynomial @@ -231,15 +229,7 @@ ## We are still importing too many things from mlab; more cleanup is needed. from matplotlib.mlab import ( - amap, base_repr, binary_repr, bivariate_normal, center_matrix, csv2rec, - demean, detrend, detrend_linear, detrend_mean, detrend_none, dist, - dist_point_to_segment, distances_along_curve, entropy, exp_safe, - fftsurr, find, frange, get_sparse_matrix, get_xyz_where, griddata, - identity, inside_poly, is_closed_polygon, ispower2, isvector, l1norm, - l2norm, log2, longest_contiguous_ones, longest_ones, movavg, norm_flat, - normpdf, path_length, poly_below, poly_between, prctile, prctile_rank, - rec2csv, rec_append_fields, rec_drop_fields, rec_join, rk4, rms_flat, - segments_intersect, slopes, stineman_interp, vector_lengths, + demean, detrend, detrend_linear, detrend_mean, detrend_none, window_hanning, window_none) from matplotlib import cbook, mlab, pyplot as plt diff --git a/lib/matplotlib/pyplot.py b/lib/matplotlib/pyplot.py index ec15c5e8d28e..f673a69bec45 100644 --- a/lib/matplotlib/pyplot.py +++ b/lib/matplotlib/pyplot.py @@ -46,7 +46,7 @@ from matplotlib.artist import setp as _setp from matplotlib.axes import Axes, Subplot from matplotlib.projections import PolarAxes -from matplotlib import mlab # for csv2rec, detrend_none, window_hanning +from matplotlib import mlab # for _csv2rec, detrend_none, window_hanning from matplotlib.scale import get_scale_docs, get_scale_names from matplotlib import cm @@ -2230,9 +2230,18 @@ def plotfile(fname, cols=(0,), plotfuncs=None, that changes the axis scaling will set the scaling for all columns. - *comments*, *skiprows*, *checkrows*, *delimiter*, and *names* - are all passed on to :func:`matplotlib.mlab.csv2rec` to - load the data into a record array. + - *comments*: the character used to indicate the start of a comment + in the file, or *None* to switch off the removal of comments + + - *skiprows*: is the number of rows from the top to skip + + - *checkrows*: is the number of rows to check to validate the column + data type. When set to zero all rows are validated. + + - *delimiter*: is the character(s) separating row items + + - *names*: if not None, is a list of header names. In this case, no + header will be read from the file If *newfig* is *True*, the plot always will be made in a new figure; if *False*, it will be made in the current figure if one exists, @@ -2267,8 +2276,9 @@ def plotfile(fname, cols=(0,), plotfuncs=None, from matplotlib.cbook import MatplotlibDeprecationWarning with warnings.catch_warnings(): warnings.simplefilter('ignore', MatplotlibDeprecationWarning) - r = mlab.csv2rec(fname, comments=comments, skiprows=skiprows, - checkrows=checkrows, delimiter=delimiter, names=names) + r = mlab._csv2rec(fname, comments=comments, skiprows=skiprows, + checkrows=checkrows, delimiter=delimiter, + names=names) def getname_val(identifier): 'return the name and column data for identifier' diff --git a/lib/matplotlib/tests/test_cbook.py b/lib/matplotlib/tests/test_cbook.py index 8253b8a08da2..988fb2d83f85 100644 --- a/lib/matplotlib/tests/test_cbook.py +++ b/lib/matplotlib/tests/test_cbook.py @@ -503,3 +503,26 @@ class dummy(): x = np.random.rand(3, 5) xnew = cbook._reshape_2D(x, 'x') assert np.shape(xnew) == (5, 3) + + +def test_contiguous_regions(): + a, b, c = 3, 4, 5 + # Starts and ends with True + mask = [True]*a + [False]*b + [True]*c + expected = [(0, a), (a+b, a+b+c)] + assert cbook.contiguous_regions(mask) == expected + d, e = 6, 7 + # Starts with True ends with False + mask = mask + [False]*e + assert cbook.contiguous_regions(mask) == expected + # Starts with False ends with True + mask = [False]*d + mask[:-e] + expected = [(d, d+a), (d+a+b, d+a+b+c)] + assert cbook.contiguous_regions(mask) == expected + # Starts and ends with False + mask = mask + [False]*e + assert cbook.contiguous_regions(mask) == expected + # No True in mask + assert cbook.contiguous_regions([False]*5) == [] + # Empty mask + assert cbook.contiguous_regions([]) == [] diff --git a/lib/matplotlib/tests/test_mlab.py b/lib/matplotlib/tests/test_mlab.py index 827bd5d8745d..ddbba61d9637 100644 --- a/lib/matplotlib/tests/test_mlab.py +++ b/lib/matplotlib/tests/test_mlab.py @@ -20,60 +20,6 @@ ''' -def test_colinear_pca(): - with pytest.warns(MatplotlibDeprecationWarning): - a = mlab.PCA._get_colinear() - pca = mlab.PCA(a) - - assert_allclose(pca.fracs[2:], 0., atol=1e-8) - assert_allclose(pca.Y[:, 2:], 0., atol=1e-8) - - -@pytest.mark.parametrize('input', [ - # test odd lengths - [1, 2, 3], - # test even lengths - [1, 2, 3, 4], - # derived from email sent by jason-sage to MPL-user on 20090914 - [1, 1, 2, 2, 1, 2, 4, 3, 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 7, 6, 4, 5, 5], -], -ids=[ - 'odd length', - 'even length', - 'custom data', -]) -@pytest.mark.parametrize('percentile', [ - 0, - 50, - 75, - 100, - [0, 75, 100], -]) -def test_prctile(input, percentile): - with pytest.warns(MatplotlibDeprecationWarning): - assert_allclose(mlab.prctile(input, percentile), - np.percentile(input, percentile)) - - -@pytest.mark.parametrize('xmin, xmax, N', [ - (.01, 1000., 6), - (.03, 1313., 7), - (.03, 1313., 0), - (.03, 1313., 1), -], ids=[ - 'tens', - 'primes', - 'none', - 'single', -]) -def test_logspace(xmin, xmax, N): - with pytest.warns(MatplotlibDeprecationWarning): - res = mlab.logspace(xmin, xmax, N) - targ = np.logspace(np.log10(xmin), np.log10(xmax), N) - assert_allclose(targ, res) - assert res.size == N - - class TestStride(object): def get_base(self, x): y = x @@ -204,38 +150,10 @@ def tempcsv(): yield fd -def test_recarray_csv_roundtrip(tempcsv): - expected = np.recarray((99,), [('x', float), ('y', float), ('t', float)]) - # initialising all values: uninitialised memory sometimes produces - # floats that do not round-trip to string and back. - expected['x'][:] = np.linspace(-1e9, -1, 99) - expected['y'][:] = np.linspace(1, 1e9, 99) - expected['t'][:] = np.linspace(0, 0.01, 99) - with pytest.warns(MatplotlibDeprecationWarning): - mlab.rec2csv(expected, tempcsv) - tempcsv.seek(0) - actual = mlab.csv2rec(tempcsv) - - assert_allclose(expected['x'], actual['x']) - assert_allclose(expected['y'], actual['y']) - assert_allclose(expected['t'], actual['t']) - - -def test_rec2csv_bad_shape_ValueError(tempcsv): - bad = np.recarray((99, 4), [('x', float), ('y', float)]) - - # the bad recarray should trigger a ValueError for having ndim > 1. - with pytest.warns(MatplotlibDeprecationWarning): - with pytest.raises(ValueError): - mlab.rec2csv(bad, tempcsv) - - def test_csv2rec_names_with_comments(tempcsv): - tempcsv.write('# comment\n1,2,3\n4,5,6\n') tempcsv.seek(0) - with pytest.warns(MatplotlibDeprecationWarning): - array = mlab.csv2rec(tempcsv, names='a,b,c') + array = mlab._csv2rec(tempcsv, names='a,b,c') assert len(array) == 2 assert len(array.dtype) == 3 @@ -268,25 +186,10 @@ def test_csv2rec_dates(tempcsv, input, kwargs): datetime.datetime(2054, 6, 20, 14, 31, 45), datetime.datetime(2000, 10, 31, 11, 50, 23)] tempcsv.seek(0) - with pytest.warns(MatplotlibDeprecationWarning): - array = mlab.csv2rec(tempcsv, names='a', **kwargs) + array = mlab._csv2rec(tempcsv, names='a', **kwargs) assert_array_equal(array['a'].tolist(), expected) -def test_rec2txt_basic(): - a = np.array([(1.0, 2, 'foo', 'bing'), - (2.0, 3, 'bar', 'blah')], - dtype=np.dtype([('x', np.float32), - ('y', np.int8), - ('s', str, 3), - ('s2', str, 4)])) - truth = (' x y s s2\n' - ' 1.000 2 foo bing \n' - ' 2.000 3 bar blah ').splitlines() - with pytest.warns(MatplotlibDeprecationWarning): - assert mlab.rec2txt(a).splitlines() == truth - - class TestWindow(object): def setup(self): np.random.seed(0) @@ -2135,81 +2038,6 @@ def test_cohere(): assert np.isreal(np.mean(cohsq)) -def test_griddata_linear(): - # z is a linear function of x and y. - def get_z(x, y): - return 3.0*x - y - - # Passing 1D xi and yi arrays to griddata. - x = np.asarray([0.0, 1.0, 0.0, 1.0, 0.5]) - y = np.asarray([0.0, 0.0, 1.0, 1.0, 0.5]) - z = get_z(x, y) - xi = [0.2, 0.4, 0.6, 0.8] - yi = [0.1, 0.3, 0.7, 0.9] - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z, xi, yi, interp='linear') - xi, yi = np.meshgrid(xi, yi) - np.testing.assert_array_almost_equal(zi, get_z(xi, yi)) - - # Passing 2D xi and yi arrays to griddata. - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z, xi, yi, interp='linear') - np.testing.assert_array_almost_equal(zi, get_z(xi, yi)) - - # Masking z array. - z_masked = np.ma.array(z, mask=[False, False, False, True, False]) - correct_zi_masked = np.ma.masked_where(xi + yi > 1.0, get_z(xi, yi)) - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z_masked, xi, yi, interp='linear') - matest.assert_array_almost_equal(zi, correct_zi_masked) - np.testing.assert_array_equal(np.ma.getmask(zi), - np.ma.getmask(correct_zi_masked)) - - -def test_griddata_nn(): - pytest.importorskip('mpl_toolkits.natgrid') - - # z is a linear function of x and y. - def get_z(x, y): - return 3.0*x - y - - # Passing 1D xi and yi arrays to griddata. - x = np.asarray([0.0, 1.0, 0.0, 1.0, 0.5]) - y = np.asarray([0.0, 0.0, 1.0, 1.0, 0.5]) - z = get_z(x, y) - xi = [0.2, 0.4, 0.6, 0.8] - yi = [0.1, 0.3, 0.7, 0.9] - correct_zi = [[0.49999252, 1.0999978, 1.7000030, 2.3000080], - [0.29999208, 0.8999978, 1.5000029, 2.1000059], - [-0.1000099, 0.4999943, 1.0999964, 1.6999979], - [-0.3000128, 0.2999894, 0.8999913, 1.4999933]] - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z, xi, yi, interp='nn') - np.testing.assert_array_almost_equal(zi, correct_zi, 5) - - with pytest.warns(MatplotlibDeprecationWarning): - # Decreasing xi or yi should raise ValueError. - with pytest.raises(ValueError): - mlab.griddata(x, y, z, xi[::-1], yi, interp='nn') - with pytest.raises(ValueError): - mlab.griddata(x, y, z, xi, yi[::-1], interp='nn') - - # Passing 2D xi and yi arrays to griddata. - xi, yi = np.meshgrid(xi, yi) - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z, xi, yi, interp='nn') - np.testing.assert_array_almost_equal(zi, correct_zi, 5) - - # Masking z array. - z_masked = np.ma.array(z, mask=[False, False, False, True, False]) - correct_zi_masked = np.ma.masked_where(xi + yi > 1.0, correct_zi) - with pytest.warns(MatplotlibDeprecationWarning): - zi = mlab.griddata(x, y, z_masked, xi, yi, interp='nn') - np.testing.assert_array_almost_equal(zi, correct_zi_masked, 5) - np.testing.assert_array_equal(np.ma.getmask(zi), - np.ma.getmask(correct_zi_masked)) - - #***************************************************************** # These Tests where taken from SCIPY with some minor modifications # this can be retrieved from: @@ -2403,33 +2231,6 @@ def test_evaluate_equal_dim_and_num_lt(self): np.testing.assert_array_almost_equal(y, y_expected, 7) -def test_contiguous_regions(): - a, b, c = 3, 4, 5 - # Starts and ends with True - mask = [True]*a + [False]*b + [True]*c - expected = [(0, a), (a+b, a+b+c)] - with pytest.warns(MatplotlibDeprecationWarning): - assert mlab.contiguous_regions(mask) == expected - d, e = 6, 7 - # Starts with True ends with False - mask = mask + [False]*e - with pytest.warns(MatplotlibDeprecationWarning): - assert mlab.contiguous_regions(mask) == expected - # Starts with False ends with True - mask = [False]*d + mask[:-e] - expected = [(d, d+a), (d+a+b, d+a+b+c)] - with pytest.warns(MatplotlibDeprecationWarning): - assert mlab.contiguous_regions(mask) == expected - # Starts and ends with False - mask = mask + [False]*e - with pytest.warns(MatplotlibDeprecationWarning): - assert mlab.contiguous_regions(mask) == expected - # No True in mask - assert mlab.contiguous_regions([False]*5) == [] - # Empty mask - assert mlab.contiguous_regions([]) == [] - - def test_psd_onesided_norm(): u = np.array([0, 1, 2, 3, 1, 2, 1]) dt = 1.0